In [None]:
%matplotlib inline
import numpy as np
import plotly.io as pio
from vida.plotfunc.interplot_vida import *
from vida.plotfunc.interplot_cg import *
import plotly.express as px

## Import Preprocessed Data

In [None]:
"""load saved trajectories data for npz file
"""
def loadhelixdata(SEQ, fname):
    # load plot data 
    fnpz_data1 = f"./data/vida_data/{fname}.npz"
    data_npz1 = np.load(fnpz_data1,allow_pickle=True)
    # asssign data to variables
    for var in data_npz1.files:
        globals()[var] = data_npz1[var]
        
    # load plot data 
    fnpz_data2 = f"./data/pretraining/pretraining_{SEQ}.npz"
    data_npz2 = np.load(fnpz_data2,allow_pickle=True)
    # asssign data to variables
    for var in data_npz2.files:
        globals()[var] = data_npz2[var]
         
    # recover full data based on coord_id, indices, and unique data
    global SIMS_adj, SIMS_scar, SIMS_G, SIMS_pair
    SIMS_adj = SIMS_adj_uniq[coord_id_S]
    SIMS_scar = SIMS_scar_uniq[coord_id_S]
    SIMS_G = SIMS_G_uniq[coord_id_S]
    SIMS_pair = SIMS_pair_uniq[coord_id_S]

    global correct_interpair, intrapair_top, intrapair_bot, all_interpair
    correct_interpair_uniq, intrapair_top_uniq, intrapair_bot_uniq, all_interpair_uniq = hover_addon(SIMS_adj_uniq,SIMS_dict_uniq)
    correct_interpair = correct_interpair_uniq[coord_id_S]
    intrapair_top = intrapair_top_uniq[coord_id_S]
    intrapair_bot = intrapair_bot_uniq[coord_id_S]
    all_interpair = all_interpair_uniq[coord_id_S]
    
    global not_infinalstructure
    not_infinalstructure = (all_interpair-correct_interpair) + (intrapair_top+intrapair_bot)
    
    print(SIMS_T.shape,SIMS_HT.shape,SIMS_HT_uniq.shape)
    print(SIMS_adj.shape,SIMS_scar.shape,SIMS_G.shape,SIMS_HT.shape,SIMS_pair.shape)
    print(SIMS_adj_uniq.shape,SIMS_scar_uniq.shape,SIMS_G_uniq.shape,SIMS_pair_uniq.shape) 
    print(SIMS_dict.shape,SIMS_dict_uniq.shape)
    print(coord_id_S.shape,indices_S.shape,trj_id.shape,data_embed.shape,occ_density_S.shape)
    print(pca_coords.shape,pca_all_coords.shape)
    print(phate_coords.shape,phate_all_coords.shape)
    # print(umap_coord_2d.shape,umap_all_coord_2d.shape)
    print(correct_interpair_uniq.shape, intrapair_top_uniq.shape, intrapair_bot_uniq.shape, all_interpair_uniq.shape)
    print(correct_interpair.shape, intrapair_top.shape, intrapair_bot.shape, all_interpair.shape)
    
    return

In [None]:
# make interactive plotly 2D plot for different visualization methods and samples

# SEQ = "PT4"
# # SEQ = "PT3_hairpin"
# fname = SEQ

fname = "PT4_0823-0138"
SEQ = fname[:-10]

print(SEQ)
# load data
loadhelixdata(SEQ, fname)

In [None]:
# load MDS data
mds_data = "./data/vida_data/PT4_mds.npz"
mds_coords = np.load(mds_data)["mds_coords"]
mds_all_coords = np.load(mds_data)["mds_all_coords"]
mds_coords.shape, mds_all_coords.shape

## Sort Trajectories

In [None]:
# get each trajectory

# List of arrays to split
arrays_to_split = [SIMS_dict[:,0], SIMS_T, SIMS_HT, SIMS_G, SIMS_pair, pca_all_coords, phate_all_coords, mds_all_coords]

# Get each trajectory using a single loop
subtrj_id = (trj_id+1)[:-1]
sub_arrays = [np.split(arr, subtrj_id) for arr in arrays_to_split]

# Use zip to unpack the sub-arrays into separate variables if needed
sub_SIMS_dp_og, sub_SIMS_T, sub_SIMS_HT, sub_SIMS_G, sub_SIMS_pair, sub_pca_all_coords, sub_phate_all_coords, sub_mds_all_coords = sub_arrays

# Assert the lengths to ensure correctness
assert len(sub_SIMS_T) == len(sub_SIMS_G) == len(sub_SIMS_dp_og) == len(sub_SIMS_HT) == len(sub_SIMS_pair) == len(sub_pca_all_coords) == len(sub_phate_all_coords) == len(sub_mds_all_coords)

# Print the length of sub_SIMS_T for verification
print(len(sub_SIMS_T))


In [None]:
# Sort the successful and fail trajectories by reaction time

sorted_indices = np.argsort([sub_array[-1] for sub_array in sub_SIMS_T])[::-1]

# List of arrays you want to sort in descending order
arrays_to_sort = [sub_SIMS_dp_og, sub_SIMS_T, sub_SIMS_HT, sub_SIMS_G, sub_SIMS_pair, sub_pca_all_coords, sub_phate_all_coords, sub_mds_all_coords]

# Use list comprehension and zip to sort all arrays simultaneously
sorted_arrays = [np.array(arr,dtype=object)[sorted_indices] for arr in arrays_to_sort] 

# Unpack the sorted arrays into separate variables
sorted_sub_SIMS_dp_og, sorted_sub_SIMS_T, sorted_sub_SIMS_HT, sorted_sub_SIMS_G, sorted_sub_SIMS_pair, sorted_sub_pca_all_coords, sorted_sub_phate_all_coords, sorted_sub_mds_all_coords = sorted_arrays

sorted_sub_SIMS_dp_og.shape, sorted_sub_SIMS_dp_og.shape

In [None]:
# make dataframe for plotting   
df = pd.DataFrame(data={
            "Energy": SIMS_G_uniq, "Pair": SIMS_pair_uniq, "DP": SIMS_dict_uniq[:,0], "HT": SIMS_HT_uniq,
            "PCA 1": pca_coords[:,0], "PCA 2": pca_coords[:,1],
            "PHATE 1": phate_coords[:,0], "PHATE 2": phate_coords[:,1],
            "MDS 1": mds_coords[:,0], "MDS 2": mds_coords[:,1],
            }
            )

dfall = pd.DataFrame(data={
        "Energy": sorted_sub_SIMS_G, "Pair": sorted_sub_SIMS_pair,"DP": sorted_sub_SIMS_dp_og,"HT": sorted_sub_SIMS_HT, "TotalT": sorted_sub_SIMS_T,
        "PCA": sorted_sub_pca_all_coords, "PHATE": sorted_sub_phate_all_coords, "MDS": sorted_sub_mds_all_coords,
        "IDX": sorted_indices
        }
        )


## Visualize

### Interactive plots 2D

In [None]:
import imp, vida.plotfunc.interplot_vida
imp.reload(vida.plotfunc.interplot_vida)
from vida.plotfunc.interplot_vida import *

In [None]:
# Plot
# VIS_METHOD = ["PCA", "PHATE"]
VIS_METHOD = ["MDS"]


for vis in VIS_METHOD: 
    fig = interactive_plotly_2D_sort(SEQ,df,dfall,vis)
    
    pio.write_html(fig, file=f"../output_files/saved_ViDa_plots/plot_mlcb/PT4_MDS.html", auto_open=True)
    
    print("DONE: ", vis)

### Direct PCA/PHATE

In [None]:
"""load saved trajectories data for npz file
"""
def loadhelixdata(SEQ, fname):
    # load plot data 
    # fnpz_data1 = f"./data/vida_data/{fname}.npz"
    fnpz_data1 = f"../output_files/saved_ViDa_plots/plot_dna29/dir_PCA_PHATE.npz"

    data_npz1 = np.load(fnpz_data1,allow_pickle=True)
    # asssign data to variables
    for var in data_npz1.files:
        globals()[var] = data_npz1[var]
        
    # load plot data 
    fnpz_data2 = f"./data/pretraining/pretraining_{SEQ}.npz"
    data_npz2 = np.load(fnpz_data2,allow_pickle=True)
    # asssign data to variables
    for var in data_npz2.files:
        globals()[var] = data_npz2[var]
         
    # recover full data based on coord_id, indices, and unique data
    global SIMS_adj, SIMS_scar, SIMS_G, SIMS_pair
    SIMS_adj = SIMS_adj_uniq[coord_id_S]
    SIMS_scar = SIMS_scar_uniq[coord_id_S]
    SIMS_G = SIMS_G_uniq[coord_id_S]
    SIMS_pair = SIMS_pair_uniq[coord_id_S]

    global correct_interpair, intrapair_top, intrapair_bot, all_interpair
    correct_interpair_uniq, intrapair_top_uniq, intrapair_bot_uniq, all_interpair_uniq = hover_addon(SIMS_adj_uniq,SIMS_dict_uniq)
    correct_interpair = correct_interpair_uniq[coord_id_S]
    intrapair_top = intrapair_top_uniq[coord_id_S]
    intrapair_bot = intrapair_bot_uniq[coord_id_S]
    all_interpair = all_interpair_uniq[coord_id_S]
    
    global not_infinalstructure
    not_infinalstructure = (all_interpair-correct_interpair) + (intrapair_top+intrapair_bot)
    
    print(SIMS_T.shape,SIMS_HT.shape,SIMS_HT_uniq.shape)
    print(SIMS_adj.shape,SIMS_scar.shape,SIMS_G.shape,SIMS_HT.shape,SIMS_pair.shape)
    print(SIMS_adj_uniq.shape,SIMS_scar_uniq.shape,SIMS_G_uniq.shape,SIMS_pair_uniq.shape) 
    print(SIMS_dict.shape,SIMS_dict_uniq.shape)
    print(coord_id_S.shape,indices_S.shape,trj_id.shape,data_embed.shape,occ_density_S.shape)
    print(pca_coords.shape,pca_all_coords.shape)
    print(phate_coords.shape,phate_all_coords.shape)
    # print(umap_coord_2d.shape,umap_all_coord_2d.shape)
    print(correct_interpair_uniq.shape, intrapair_top_uniq.shape, intrapair_bot_uniq.shape, all_interpair_uniq.shape)
    print(correct_interpair.shape, intrapair_top.shape, intrapair_bot.shape, all_interpair.shape)
    
    return


fname = "PT4_0823-0138"
SEQ = fname[:-10]

print(SEQ)
# load data
loadhelixdata(SEQ, fname)

In [None]:
# Plot
VIS_METHOD = ["PCA", "PHATE"]


for vis in VIS_METHOD: 
    fig = interactive_plotly_2D_sort(SEQ,df,dfall,vis)
    
    pio.write_html(fig, file=f"../output_files/saved_ViDa_plots/plot_dna29/direct_{SEQ}_{vis}.html", auto_open=True)
    
    print("DONE: ", vis)

In [None]:
%%script false --no-raise-error

"""Direct PCA and PHATE
"""
def loadhelixdata(SEQ, fname):
    # load plot data 
    fnpz_data1 = f"./data/vida_data/{fname}.npz"
    data_npz1 = np.load(fnpz_data1,allow_pickle=True)
    # asssign data to variables
    for var in data_npz1.files:
        globals()[var] = data_npz1[var]
        
    # load plot data 
    fnpz_data2 = f"./data/pretraining/pretraining_{SEQ}.npz"
    data_npz2 = np.load(fnpz_data2,allow_pickle=True)
    # asssign data to variables
    for var in data_npz2.files:
        globals()[var] = data_npz2[var]
         
    # recover full data based on coord_id, indices, and unique data
    global SIMS_adj, SIMS_scar, SIMS_G, SIMS_pair
    SIMS_adj = SIMS_adj_uniq[coord_id_S]
    SIMS_scar = SIMS_scar_uniq[coord_id_S]
    SIMS_G = SIMS_G_uniq[coord_id_S]
    SIMS_pair = SIMS_pair_uniq[coord_id_S]

    global correct_interpair, intrapair_top, intrapair_bot, all_interpair
    correct_interpair_uniq, intrapair_top_uniq, intrapair_bot_uniq, all_interpair_uniq = hover_addon(SIMS_adj_uniq,SIMS_dict_uniq)
    correct_interpair = correct_interpair_uniq[coord_id_S]
    intrapair_top = intrapair_top_uniq[coord_id_S]
    intrapair_bot = intrapair_bot_uniq[coord_id_S]
    all_interpair = all_interpair_uniq[coord_id_S]
    
    global not_infinalstructure
    not_infinalstructure = (all_interpair-correct_interpair) + (intrapair_top+intrapair_bot)
    
    # print(SIMS_T.shape,SIMS_HT.shape,SIMS_HT_uniq.shape)
    # print(SIMS_adj.shape,SIMS_scar.shape,SIMS_G.shape,SIMS_HT.shape,SIMS_pair.shape)
    # print(SIMS_adj_uniq.shape,SIMS_scar_uniq.shape,SIMS_G_uniq.shape,SIMS_pair_uniq.shape) 
    # print(SIMS_dict.shape,SIMS_dict_uniq.shape)
    # print(coord_id_S.shape,indices_S.shape,trj_id.shape,data_embed.shape,occ_density_S.shape)
    # print(pca_coords.shape,pca_all_coords.shape)
    # print(phate_coords.shape,phate_all_coords.shape)
    # # print(umap_coord_2d.shape,umap_all_coord_2d.shape)
    # print(correct_interpair_uniq.shape, intrapair_top_uniq.shape, intrapair_bot_uniq.shape, all_interpair_uniq.shape)
    # print(correct_interpair.shape, intrapair_top.shape, intrapair_bot.shape, all_interpair.shape)
    
    
    """make pd dataframe for plotting
    """
    global df, dfall, df0, df1, dfall0, dfall1
    df = pd.DataFrame(data={
            "Energy": SIMS_G_uniq, "Occp": occ_density_S, "Pair": SIMS_pair_uniq, "DP": SIMS_dict_uniq[:,0],"HT": SIMS_HT_uniq,
            "correct_interpair": correct_interpair_uniq, "intrapair_top": intrapair_top_uniq, "intrapair_bot": intrapair_bot_uniq, "all_interpair": all_interpair_uniq,
            "PCA 1": pca_coords_direct[:,0], "PCA 2": pca_coords_direct[:,1],
            "PCA X": pca_coords_direct[:,0], "PCA Y": pca_coords_direct[:,1], "PCA Z": pca_coords_direct[:,2],
            "PHATE 1": phate_coords_direct[:,0], "PHATE 2": phate_coords_direct[:,1],
            # "MDS 1": mds_coords[:,0], "MDS 2": mds_coords[:,1],
            # "UMAP 1": umap_coord_2d[:,0],"UMAP 2": umap_coord_2d[:,1],
            }
            )

    # make dataframe for plotting   
    dfall = pd.DataFrame(data={
            "Energy": SIMS_G, "Pair": SIMS_pair,"DP": SIMS_dict[:,0],"HT": SIMS_HT, "TotalT": SIMS_T,
            "correct_interpair": correct_interpair, "intrapair_top": intrapair_top, "intrapair_bot":intrapair_bot, "all_interpair": all_interpair,
            "PCA 1": pca_all_coords_direct[:,0], "PCA 2": pca_all_coords_direct[:,1],
            "PCA X": pca_all_coords_direct[:,0], "PCA Y": pca_all_coords_direct[:,1], "PCA Z": pca_all_coords_direct[:,2],
            "PHATE 1": phate_all_coords_direct[:,0], "PHATE 2": phate_all_coords_direct[:,1],
            # "MDS 1": mds_all_coords[:,0], "MDS 2": mds_all_coords[:,1],
            # "UMAP 1": umap_all_coord_2d[:,0],"UMAP 2": umap_all_coord_2d[:,1],
            }
            )

    df0 = df[df["Pair"] == 0]
    df1 = df[df["Pair"] == 1]

    dfall0 = dfall[dfall["Pair"] == 0]
    dfall1 = dfall[dfall["Pair"] == 1]
    
    return


# make interactive plotly 2D plot for different visualization methods and samples
fname = "./noViDa-noEnergy/PT4_hairpin_noViDa"
SEQ = "PT4_hairpin"
VIS_METHOD = ["PCA","PHATE"]
print(SEQ)

# load data
loadhelixdata(SEQ, fname)

n_trace = len(trj_id)

for vis in VIS_METHOD: 
    fig = interactive_plotly_2D(SEQ,n_trace,df,dfall,trj_id,vis)
    
    pio.write_html(fig, file=f"../output_files/saved_newmodel_plots/energylandscape/{fname}_{vis}.html", auto_open=True)
    
    print("DONE: ", vis)
    


In [None]:
%%script false --no-raise-error

"""load saved trajectories data for npz file
"""
def loadhelixdata(SEQ, fname):
    # load plot data 
    fnpz_data1 = f"./data/vida_data/{fname}.npz"
    data_npz1 = np.load(fnpz_data1,allow_pickle=True)
    # asssign data to variables
    for var in data_npz1.files:
        globals()[var] = data_npz1[var]
        
    # load plot data 
    fnpz_data2 = f"./data/pretraining/pretraining_{SEQ}.npz"
    data_npz2 = np.load(fnpz_data2,allow_pickle=True)
    # asssign data to variables
    for var in data_npz2.files:
        globals()[var] = data_npz2[var]
         
    # recover full data based on coord_id, indices, and unique data
    global SIMS_adj, SIMS_scar, SIMS_G, SIMS_pair
    SIMS_adj = SIMS_adj_uniq[coord_id_S]
    SIMS_scar = SIMS_scar_uniq[coord_id_S]
    SIMS_G = SIMS_G_uniq[coord_id_S]
    SIMS_pair = SIMS_pair_uniq[coord_id_S]

    global correct_interpair, intrapair_top, intrapair_bot, all_interpair
    correct_interpair_uniq, intrapair_top_uniq, intrapair_bot_uniq, all_interpair_uniq = hover_addon(SIMS_adj_uniq,SIMS_dict_uniq)
    correct_interpair = correct_interpair_uniq[coord_id_S]
    intrapair_top = intrapair_top_uniq[coord_id_S]
    intrapair_bot = intrapair_bot_uniq[coord_id_S]
    all_interpair = all_interpair_uniq[coord_id_S]
    
    global not_infinalstructure
    not_infinalstructure = (all_interpair-correct_interpair) + (intrapair_top+intrapair_bot)
    
    print(SIMS_T.shape,SIMS_HT.shape,SIMS_HT_uniq.shape)
    print(SIMS_adj.shape,SIMS_scar.shape,SIMS_G.shape,SIMS_HT.shape,SIMS_pair.shape)
    print(SIMS_adj_uniq.shape,SIMS_scar_uniq.shape,SIMS_G_uniq.shape,SIMS_pair_uniq.shape) 
    print(SIMS_dict.shape,SIMS_dict_uniq.shape)
    print(coord_id_S.shape,indices_S.shape,trj_id.shape,data_embed.shape,occ_density_S.shape)
    print(pca_coords.shape,pca_all_coords.shape)
    print(phate_coords.shape,phate_all_coords.shape)
    # print(umap_coord_2d.shape,umap_all_coord_2d.shape)
    print(correct_interpair_uniq.shape, intrapair_top_uniq.shape, intrapair_bot_uniq.shape, all_interpair_uniq.shape)
    print(correct_interpair.shape, intrapair_top.shape, intrapair_bot.shape, all_interpair.shape)
    
    
    """make pd dataframe for plotting
    """
    global df, dfall, df0, df1, dfall0, dfall1
    df = pd.DataFrame(data={
            "Energy": SIMS_G_uniq, "Occp": occ_density_S, "Pair": SIMS_pair_uniq, "DP": SIMS_dict_uniq[:,0],"HT": SIMS_HT_uniq,
            "correct_interpair": correct_interpair_uniq, "intrapair_top": intrapair_top_uniq, "intrapair_bot": intrapair_bot_uniq, "all_interpair": all_interpair_uniq,
            "PCA 1": pca_coords[:,0], "PCA 2": pca_coords[:,1],
            "PCA X": pca_coords[:,0], "PCA Y": pca_coords[:,1], "PCA Z": pca_coords[:,2],
            "PHATE 1": phate_coords[:,0], "PHATE 2": phate_coords[:,1],
            # "MDS 1": mds_coords[:,0], "MDS 2": mds_coords[:,1],
            # "UMAP 1": umap_coord_2d[:,0],"UMAP 2": umap_coord_2d[:,1],
            }
            )

    # make dataframe for plotting   
    dfall = pd.DataFrame(data={
            "Energy": SIMS_G, "Pair": SIMS_pair,"DP": SIMS_dict[:,0],"HT": SIMS_HT, "TotalT": SIMS_T,
            "correct_interpair": correct_interpair, "intrapair_top": intrapair_top, "intrapair_bot":intrapair_bot, "all_interpair": all_interpair,
            "PCA 1": pca_all_coords[:,0], "PCA 2": pca_all_coords[:,1],
            "PCA X": pca_all_coords[:,0], "PCA Y": pca_all_coords[:,1], "PCA Z": pca_all_coords[:,2],
            "PHATE 1": phate_all_coords[:,0], "PHATE 2": phate_all_coords[:,1],
            # "MDS 1": mds_all_coords[:,0], "MDS 2": mds_all_coords[:,1],
            # "UMAP 1": umap_all_coord_2d[:,0],"UMAP 2": umap_all_coord_2d[:,1],
            }
            )

    df0 = df[df["Pair"] == 0]
    df1 = df[df["Pair"] == 1]

    dfall0 = dfall[dfall["Pair"] == 0]
    dfall1 = dfall[dfall["Pair"] == 1]
    
    return


# Plot
VIS_METHOD = ["PCA", "PHATE"]
# VIS_METHOD = ["MDS"]


for vis in VIS_METHOD: 
    fig = interactive_plotly_2D_sort(SEQ,n_trace,df,dfall,trj_id,vis)
    
    pio.write_html(fig, file=f"../output_files/saved_newmodel_plots/energylandscape/{fname}_{vis}.html", auto_open=True)
    
    print("DONE: ", vis)

### Coarse-grained plot

In [None]:
import imp, interplot
imp.reload(interplot)
from interplot import *

In [None]:
# # make interactive cg plot for different samples 
# for SEQ in SEQ_list:
    
#     loadhelixdata(SEQ)  # load data
#     n_trace = len(trj_id)
#     grid_G=grid_energy(correct_interpair, not_infinalstructure, SIMS_G)
#     fig = interactive_cgplot(SEQ,n_trace,grid_G,trj_id,correct_interpair,not_infinalstructure)
    
#     if SEQ in ["PT3", "PT4", "PT3_hairpin"]:
#         pio.write_html(fig, file="..output_files/saved_interactive_plots/coarse_grained/{}_100epoch_coarsegrained.html".format(SEQ), auto_open=True)
#     elif SEQ in ["PT0", "PT4_hairpin"]:
#         pio.write_html(fig, file="..output_files/saved_interactive_plots/coarse_grained/{}_60epoch_coarsegrained.html".format(SEQ), auto_open=True)
#     print("DONE: ", SEQ)


seq = "PT3_hairpin"
loadhelixdata(seq)
grid_g=grid_energy(correct_interpair, not_infinalstructure, SIMS_G)
fig = interactive_cgplot(seq,n_trace,grid_g,trj_id,correct_interpair,not_infinalstructure)
pio.write_html(fig, file="{}_cg.html".format(seq), auto_open=True)
# fig.show()

In [None]:
correct_interpair, not_infinalstructure

### Seperate paired and unpaired

In [None]:
# make interactive plotly 2D plot for different visualization methods and samples
fname = "PT4_hairpin_0312-0118"
# fname = "./noViDa-noEnergy/PT4_hairpin_noViDa"
SEQ = fname[:-10]
# SEQ = "PT4_hairpin"

print(SEQ)

# load data
loadhelixdata(SEQ, fname)

n_trace = len(trj_id)

for vis in VIS_METHOD: 
    fig = interactive_plotly_2D_parivsunpair(SEQ,n_trace,df0,df1,dfall,trj_id,vis)
    
    pio.write_html(fig, file=f"../output_files/saved_newmodel_plots/pair_vs_unpair/{fname}_{vis}.html", auto_open=True)
    
    print("DONE: ", vis)

## Report trace plots

In [None]:
###############################################################################
# Figure traces
###############################################################################

# from re import X

def interactive_plotly_2D_reportold(SEQ,n_trace,df,dfall,trj_id,vis):
    fig = go.Figure()
    
    # plot energy landscape background
    fig.add_trace(go.Scattergl(
            x=df["{} 1".format(vis)], 
            y=df["{} 2".format(vis)], 
            mode='markers',
            marker=dict(
                sizemode='area',
                size=df["HT"],
                sizeref=5e-11,
                color=df["Energy"],
                colorscale="Plasma",
                showscale=True,
                # colorbar_x=-0.2,
                colorbar=dict(
                    title="Free energy (kcal/mol)", 
                    # orientation='h', 
                    x=-0.3,
                    titleside="right",  
                    # len=1,
                    # y=0.5,
                ),
                line=dict(width=0.2
                ),
            ),
            customdata = np.stack((df['Pair'],np.log(df["Occp"])),axis=-1),
            text=df['DP'],
            hovertemplate=
                "DP notation: <br> <b>%{text}</b><br>" +
                "X: %{x}   " + "   Y: %{y} <br>"+
                "Energy:  %{marker.color:.3f} kcal/mol<br>"+
                "Pair (0/1->unpaired/paired,resp):  %{customdata[0]}<br><br>"+
                "Average holding time:  %{marker.size:.5g} s<br>"+
                "Occupancy density (logscale):  %{customdata[1]:.3f}<br>",
                
            name="background",
            # showlegend=False,
        )
    )

    # layout trajectory on top of energy landscape
    idx = 0
    names=["trace 1", "trace 5", "trace 7"]
    colors=[ "red", "blue", "black"]
    for i in n_trace:
        
        subdf = plot_trj(trj_id,dfall,i,vis,dim="2D")[0]
        fig.add_trace(
            go.Scattergl(
                x=subdf["sub X"], 
                y=subdf["sub Y"],
                mode='lines+markers',
                line=dict(
                    # color='rgb({}, {}, {})'.format((i/100*255),(i/100*255),(i/100*255)),
                    color=colors[idx],
                    width=3,
                ),
                marker=dict(
                    sizemode='area',
                    size=subdf["HT"],
                    sizeref=9e-11,
                    color=subdf["Energy"],
                    colorscale="Plasma",
                    showscale=False,
                    colorbar=dict(
                        x=-0.2,
                        tickvals=[],
                        y=0.5,
                        len=1,
                        ),
                ),
                
                text=subdf["Step"],
                customdata = np.stack((subdf['Pair'],subdf["TotalT"],subdf["DP"]),axis=-1),
                hovertemplate=
                    "Step:  <b>%{text}</b><br><br>"+
                    "DP notation: <br> <b>%{customdata[2]}</b><br>" +
                    "X: %{x}   " + "   Y: %{y} <br>"+
                    "Energy:  %{marker.color:.3f} kcal/mol<br>"+
                    "Pair (0/1->unpaired/paired,resp):  %{customdata[0]} <br><br>"+
                    "Holding time for last step:  %{marker.size:.5g} s<br>",
                    # "Total Time:  %{customdata[1]:.5e} s<br>",
                visible='legendonly',
                name=names[idx],
                
                        )
        )
        idx+=1

    # label initial and final states
    fig.add_trace(
        go.Scattergl(
            x=plot_trj(trj_id,dfall,0,vis,dim="2D")[1],
            y=plot_trj(trj_id,dfall,0,vis,dim="2D")[2],
            mode='markers+text',
            marker_color="lime", 
            marker_size=15,
            text=["I", "F"],
            textposition="middle center",
            textfont=dict(
            family="sans serif",
            size=16,
            color="black"
        ),
            hoverinfo='skip',
            showlegend=False,
                        )
    )
    
    fig.update_xaxes(
        range=[min(df["{} 1".format(vis)])*1.1,max(df["{} 1".format(vis)])*1.1]
    )
    
    fig.update_yaxes(
        range=[min(df["{} 2".format(vis)])*1.1,max(df["{} 2".format(vis)])*1.1]
    )

    fig.update_layout(
        title=dict(
                text="ViDa with {}: P4+T4".format(vis),
                x=0.20,
                # y=0.87,
                   ),
        xaxis=dict(
                title="{} 1".format(vis),
            ),
        yaxis=dict(
                title="{} 2".format(vis),
            ),
        legend=dict(
            title="Single Trajectory",
            title_font=dict(size=10),
            font=dict(
                # family="Courier",
                size=10,
                color="black"
        )
        ),
        font=dict(
            size=26,
        )
    )
    
    return fig



seq = "PT4"
loadhelixdata(seq)

print(seq)

# n_trace = [0]
# VIS_METHOD = ["PCA", "PHATE", "t-SNE"]
# vis = VIS_METHOD[0]
# print(vis)
# fig = interactive_plotly_2D_report(seq,n_trace,df,dfall,trj_id,vis)
# pio.write_html(fig, file="{}_{}_2D_report.html".format(seq,vis), auto_open=True)

VIS_METHOD = ["PCA", "PHATE", "t-SNE", "UMAP"]
VIS_METHOD = ["PHATE"]

n_trace = [0,4,6]
for vis in VIS_METHOD:
    print(vis)
    fig = interactive_plotly_2D_report(seq,n_trace,df,dfall,trj_id,vis)
    pio.write_html(fig, file="{}_{}_2D_report.html".format(seq,vis), auto_open=True)


In [None]:
###############################################################################
# Figure ViDa traces
###############################################################################

def interactive_plotly_2D_reportold(SEQ,n_trace,df,dfall,trj_id,vis,trace_a,trace_b,rxn_name):
    fig = go.Figure()
    
    # plot energy landscape background
    fig.add_trace(go.Scattergl(
            x=df["{} 1".format(vis)], 
            y=df["{} 2".format(vis)], 
            mode='markers',
            marker=dict(
                sizemode='area',
                size=df["HT"],
                sizeref=5e-11,
                color=df["Energy"],
                colorscale="Plasma",
                showscale=True,
                # colorbar_x=-0.2,
                colorbar=dict(
                    title="Free energy (kcal/mol)",  
                    x=-0.3,
                    titleside="right",  
                    # len=1.065,
                    # y=0.5,
                ),
                line=dict(width=0.2
                ),
            ),
            # customdata = np.stack((df['Pair'],np.log(df["Occp"])),axis=-1),
            # text=df['DP'],
            # hovertemplate=
            #     "DP notation: <br> <b>%{text}</b><br>" +
            #     "X: %{x}   " + "   Y: %{y} <br>"+
            #     "Energy:  %{marker.color:.3f} kcal/mol<br>"+
            #     "Pair (0/1->unpaired/paired,resp):  %{customdata[0]}<br><br>"+
            #     "Average holding time:  %{marker.size:.5g} s<br>"+
            #     "Occupancy density (logscale):  %{customdata[1]:.3f}<br>",
                
            # name="background",
            # # showlegend=False,
        )
    )

    # layout trajectory on top of energy landscape
    idx = 0
    names=[f"trace {trace_a}", f"trace {trace_b}"]
    colors=["black", "blue"]
    for i in n_trace:
        
        subdf = plot_trj(trj_id,dfall,i,vis,dim="2D")[0]
        fig.add_trace(
            go.Scattergl(
                x=subdf["sub X"], 
                y=subdf["sub Y"],
                mode='lines+markers',
                line=dict(
                    # color='rgb({}, {}, {})'.format((i/100*255),(i/100*255),(i/100*255)),
                    color=colors[idx],
                    width=3,
                ),
                marker=dict(
                    sizemode='area',
                    size=subdf["HT"],
                    # sizeref=8e-11,
                    sizeref=11e-11,
                    color=subdf["Energy"],
                    colorscale="Plasma",
                    showscale=False,
                    colorbar=dict(
                        x=-0.2,
                        tickvals=[],
                        y=0.5,
                        len=1,
                        ),
                ),
                
                # text=subdf["Step"],
                # customdata = np.stack((subdf['Pair'],subdf["TotalT"],subdf["DP"]),axis=-1),
                # hovertemplate=
                #     "Step:  <b>%{text}</b><br><br>"+
                #     "DP notation: <br> <b>%{customdata[2]}</b><br>" +
                #     "X: %{x}   " + "   Y: %{y} <br>"+
                #     "Energy:  %{marker.color:.3f} kcal/mol<br>"+
                #     "Pair (0/1->unpaired/paired,resp):  %{customdata[0]} <br><br>"+
                #     "Holding time for last step:  %{marker.size:.5g} s<br>",
                #     # "Total Time:  %{customdata[1]:.5e} s<br>",
                # # visible='legendonly',
                visible=True,
                name=names[idx],
                        )
        )
        idx+=1

    # label initial and final states
    fig.add_trace(
        go.Scattergl(
            x=plot_trj(trj_id,dfall,0,vis,dim="2D")[1],
            y=plot_trj(trj_id,dfall,0,vis,dim="2D")[2],
            mode='markers+text',
            marker_color="lime", 
            marker_size=15,
            text=["I", "F"],
            textposition="middle center",
            textfont=dict(
            family="sans serif",
            size=16,
            color="black"
        ),
            hoverinfo='skip',
            showlegend=False,
                        )
    )
    
    fig.update_xaxes(
        range=[min(df["{} 1".format(vis)])*1.1,max(df["{} 1".format(vis)])*1.1]
    )
    
    fig.update_yaxes(
        range=[min(df["{} 2".format(vis)])*1.1,max(df["{} 2".format(vis)])*1.1]
    )

    fig.update_layout(
        title=dict(
                text="ViDa with {}: {}".format(vis,rxn_name),
                x=0.20,
                # y=0.87,
            ),
        xaxis=dict(
                title="{} 1".format(vis),
            ),
        yaxis=dict(
                title="{} 2".format(vis),
            ),
        legend=dict(
            title="Single Trajectory",
            title_font=dict(size=10),
            font=dict(
                # family="Courier",
                size=10,
                color="black"
        )
        ),
        font=dict(
            size=26,
        )
    )
    return fig


########## MAIN ##########
# fname = "PT0_usePT4_03040216"
# rxn_name = "Pretrained P0+T0"

fname = "PT4_0823-0138"
SEQ = fname[:-10]

loadhelixdata(SEQ,fname)
print(SEQ)

trace_a = 38
trace_b = 84

VIS_METHOD = ["PHATE"]
n_trace = [trace_a-1,trace_b-1]
for vis in VIS_METHOD:
    print(vis)
    fig = interactive_plotly_2D_report(seq,n_trace,df,dfall,trj_id,vis,trace_a,trace_b,rxn_name)
    pio.write_html(fig, file=f"../output_files/saved_ViDa_plots/plot_dna29/{fname}_{vis}_dna29.html")


In [None]:
def interactive_plotly_2D_report(SEQ,df,dfall,vis):
    fig = go.Figure()
    
    # plot energy landscape background
    fig.add_trace(go.Scattergl(
            x=df["{} 1".format(vis)], 
            y=df["{} 2".format(vis)], 
            mode='markers',
            marker=dict(
                sizemode='area',
                # sizemode='diameter',
                size=df["HT"],
                # sizeref=5e-10,
                sizeref=5e-11,
                color=df["Energy"],
                colorscale="Plasma",
                showscale=True,
                colorbar=dict(
                    title="Free energy (kcal/mol)",  
                    titleside="right",  
                    orientation='h',
                    tickfont=dict(size=20)
                ),
                line=dict(width=0.2
                ),
            ),
            text=df['DP'],
            hovertemplate=
                "DP notation: <br> <b>%{text}</b><br>" +
                "X: %{x}   " + "   Y: %{y} <br>"+
                "Energy:  %{marker.color:.3f} kcal/mol<br><br>"+
                "Average holding time:  %{marker.size:.5g} s<br>",
                    
            name="Energy landscape",
            # showlegend=False,
        )
    )

    # # layout trajectory on top of energy landscape
    # for i in range(len(dfall)):
    idx = 0
    colors=["cyan", "red"]
    for i in [2,7]:
        Step = []
        if len(dfall["DP"][i]) < 2000:
            Step = np.arange(len(dfall["DP"][i]))
        else:
            Step = np.full(len(dfall["DP"][i]), None, dtype=object)
        
        fig.add_trace(
            go.Scattergl(
                x=dfall[f"{vis}"][i][:,0],
                y=dfall[f"{vis}"][i][:,1],
                mode='lines+markers',
                line=dict(
                    color=colors[idx],
                    width=2,
                ),
                marker=dict(
                    sizemode='area',
                    size=dfall["HT"][i],
                    sizeref=3e-10,
                    color=dfall["Energy"][i],
                    colorscale="Plasma",
                    showscale=False,
                    colorbar=dict(
                        x=-0.2,
                        y=0.5,
                        tickvals=[],
                        len=1,
                    ),
                ),
                text=Step,
                customdata=np.stack((dfall['Pair'][i],
                                     dfall['TotalT'][i],
                                     dfall['DP'][i],
                                     ),axis=-1),
                hovertemplate=
                    "Step:  <b>%{text}</b><br><br>"+
                    "DP notation: <br> <b>%{customdata[2]}</b><br>" +
                    "X: %{x}   " + "   Y: %{y} <br>"+
                    "Energy:  %{marker.color:.3f} kcal/mol<br><br>"+
                    "Holding time for last step:  %{marker.size:.5g} s<br>"+
                    "Total time until current state:  %{customdata[1]:.5e} s<br>",
                visible='legendonly',
                name = "Trace {}".format(dfall["IDX"][i]),
            )
        )
        idx += 1
            
    # label initial and final states
    fig.add_trace(
        go.Scattergl(
            x=[dfall[f"{vis}"][0][0,0],dfall[f"{vis}"][0][-1,0]],
            y=[dfall[f"{vis}"][0][0,1],dfall[f"{vis}"][0][-1,1]],
            mode='markers+text',
            marker_color="lime", 
            marker_size=25,
            text=["I", "F"],
            textposition="middle center",
            textfont=dict(
            family="sans serif",
            size=16,
            color="black"
        ),
            hoverinfo='skip',
            showlegend=False,
                        )
    )
    
    fig.update_xaxes(
        range=[min(df["{} 1".format(vis)])*1.1,max(df["{} 1".format(vis)])*1.1]
    )
    
    fig.update_yaxes(
        range=[min(df["{} 2".format(vis)])*1.1,max(df["{} 2".format(vis)])*1.1]
    )
    
    return fig


## Plotting
fname = "PT4_0823-0138"
SEQ = fname[:-10]
vis = "MDS"

fig = interactive_plotly_2D_report(SEQ,df,dfall,vis)
pio.write_html(fig, file=f"../output_files/saved_ViDa_plots/plot_dna29/{fname}_{vis}_dna29.html", auto_open=True)


### CG Plot

In [None]:
def interactive_cgplot_report(SEQ,n_trace,grid_G,trj_id,correct_interpair,not_infinalstructure,trace_a,trace_b,rxn_name):
    fig = go.Figure()
    
    fig.add_trace(go.Heatmap(
        x = np.arange(26),
        y = np.arange(26),
        z=grid_G.T,
        
        showscale=True,
        colorbar=dict(
            title="Free energy (kcal/mol)",
            titleside="right",
            x=-0.24,
        ),
        colorscale = 'plasma',
        hovertemplate=
                "# bp in final:  %{x}<br>" + 
                "# bp NOT in final:  %{y}<br>" + 
                "Average energy:  %{z:.5f} kcal/mol<br>",
        name="background",
        showlegend=True,
                )
    )
    
    idx = 0
    names=[f"trace {trace_a}", f"trace {trace_b}"]    
    colors=["red", "cyan"]
    
    for i in n_trace:
        X,Y = plot_trj_cg(i, trj_id,correct_interpair,not_infinalstructure)
        fig.add_trace(go.Scatter(
            x=X,
            y=Y,
            mode='lines',
            line=dict(color=colors[idx], width=3),
            showlegend=True,
            hoverinfo='all',
            visible='legendonly',
            name=names[idx],
        )
                      )
        idx+=1
        
        
    # label initial and final states
    fig.add_trace(
        go.Scattergl(
            x=[correct_interpair[0], correct_interpair[-1]],
            y=[not_infinalstructure[0], not_infinalstructure[-1]],
            mode='markers+text',
            marker_color="lime", 
            marker_size=35,
            text=["I", "F"],
            textposition="middle center",
            textfont=dict(
            family="sans serif",
            size=25,
            color="black"
        ),
            hoverinfo='skip',
            showlegend=False,
                        )
    )
        
    fig.update_xaxes(
        range=[-1,26]
    )
    
    fig.update_yaxes(
        range=[-1,15.8]
    )

    fig.update_layout(
            title=dict(
                text= f"Coarsed-grained: {rxn_name}",
                x=0.2,
                # y=0.85,
                       ),
            xaxis=dict(
                    title="Number of base pairs in the final structure",
                ),
            yaxis=dict(
                    title="Number of base pairs  NOT in the final structure",
                ),
            autosize=True,
            legend=dict(
                title="Single Trajectory",
                title_font=dict(size=10),
                font=dict(
                    size=10,
                    color="black"
            )
            ),
            font=dict(
                size=26,
        )
        )
    
    return fig

# ########## MAIN ##########
seq = "PT4"
rxn_name = "P4+T4"
# loadhelixdata(seq,seq)
# print(seq)

trace_a = 84
trace_b = 38
n_trace=[trace_a-1,trace_b-1]
grid_g=grid_energy(correct_interpair, not_infinalstructure, SIMS_G)
grid_G = np.where(grid_g == 0, None, grid_g)
grid_G[0,0] = 0
fig = interactive_cgplot_report(seq,n_trace,grid_G,trj_id,correct_interpair,not_infinalstructure, trace_a,trace_b,rxn_name)
pio.write_html(fig, file="{}_cg.html".format(seq), auto_open=True)
