In [None]:
%matplotlib inline
import numpy as np
import plotly.io as pio
from misc import *
from interplot import *
import plotly.express as px

### Load data

In [None]:
def load_data(seq):
    """load saved trajectories data for npz file
    """
    # multiple trajectories
    if seq in ["PT3","PT4","PT3_hairpin"]:
        fnpz_data = "data/helix_assoc/helix_assoc_{}_multrj_100epoch_py.npz".format(seq)
    elif seq in ["PT0","PT4_hairpin"]:
        fnpz_data = "data/helix_assoc/helix_assoc_{}_multrj_60epoch_py.npz".format(seq)
    data_npz = np.load(fnpz_data)

    # asssign data to variables
    for var in data_npz.files:
        # locals()[var] = data_npz[var]
        globals()[var] = data_npz[var]
        
    # recover full data based on coord_id, indices, and unique data
    SIMS_adj = SIMS_adj_uniq[coord_id_S]
    SIMS_scar = SIMS_scar_uniq[coord_id_S]
    SIMS_G = SIMS_G_uniq[coord_id_S]
    SIMS_pair = SIMS_pair_uniq[coord_id_S]
   
    """make pd dataframe for plotting
    """
    df = pd.DataFrame(data={
            "Energy": SIMS_G_uniq, "Occp": occ_density_S, "Pair": SIMS_pair_uniq, "DP": SIMS_dict_uniq[:,0],"HT": SIMS_HT_uniq,
            "PCA 1": pca_coords[:,0], "PCA 2": pca_coords[:,1],
            "PCA X": pca_coords[:,0], "PCA Y": pca_coords[:,1], "PCA Z": pca_coords[:,2],
            "PHATE 1": phate_coords[:,0], "PHATE 2": phate_coords[:,1],
            "t-SNE 1": tsne_coord_2d[:,0],"t-SNE 2": tsne_coord_2d[:,1],
            "t-SNE X": tsne_coord_3d[:,0],"t-SNE Y": tsne_coord_3d[:,1],"t-SNE Z": tsne_coord_3d[:,2],
            "UMAP 1": umap_coord_2d[:,0],"UMAP 2": umap_coord_2d[:,1],
            "UMAP X": umap_coord_3d[:,0],"UMAP Y": umap_coord_3d[:,1],"UMAP Z": umap_coord_3d[:,2],
            }
            )

    # make dataframe for plotting
    dfall = pd.DataFrame(data={
            "Energy": SIMS_G, "Pair": SIMS_pair,"DP": SIMS_dict[:,0],"HT": SIMS_HT,"TotalT": SIMS_T,
            "PCA 1": pca_all_coords[:,0], "PCA 2": pca_all_coords[:,1],
            "PCA X": pca_all_coords[:,0], "PCA Y": pca_all_coords[:,1], "PCA Z": pca_all_coords[:,2],
            "PHATE 1": phate_all_coords[:,0], "PHATE 2": phate_all_coords[:,1],
            "t-SNE 1": tsne_all_coord_2d[:,0],"t-SNE 2": tsne_all_coord_2d[:,1],
            "t-SNE X": tsne_all_coord_3d[:,0],"t-SNE Y": tsne_all_coord_3d[:,1],"t-SNE Z": tsne_all_coord_3d[:,2],
            "UMAP 1": umap_all_coord_2d[:,0],"UMAP 2": umap_all_coord_2d[:,1],
            "UMAP X": umap_all_coord_3d[:,0],"UMAP Y": umap_all_coord_3d[:,1],"UMAP Z": umap_all_coord_3d[:,2],
            }
            )
    
    return df, dfall, trj_id, 

In [None]:
df_PT0,dfall_PT0,trj_id_PT0 = load_data(seq="PT0")
df_PT3,dfall_PT3,trj_id_PT3 = load_data(seq="PT3")
df_PT4,dfall_PT4,trj_id_PT4 = load_data(seq="PT4")
df_PT3_hairpin,dfall_PT3_hairpin,trj_id_PT3_hairpin = load_data(seq="PT3_hairpin")
df_PT4_hairpin,dfall_PT4_hairpin,trj_id_PT4_hairpin = load_data(seq="PT4_hairpin")


### PT0 vs. PT3

In [None]:
def trj_time(trj_id,dfall):
        
        ratio = np.array([])
        time_TOT = np.array([])

        for i in range(100):
                TRJ_ID = trj_id+1
                
                if i == 0:
                        s = 0
                        s_prime = TRJ_ID[i]
                elif i == len(trj_id):
                        s = TRJ_ID[i-1]
                        s_prime = len(dfall)
                else:
                        s = TRJ_ID[i-1]
                        s_prime = TRJ_ID[i]
                
                # get pair, HT, TotalT for each trajectory
                subdf = pd.DataFrame(data={
                "Pair": dfall["Pair"][s:s_prime],
                "HT": dfall["HT"][s:s_prime],
                "TotalT": dfall["TotalT"][s:s_prime],
                }
                )
                
                un2p_ratio = subdf[subdf["Pair"]==0]["HT"].sum()/subdf[subdf["Pair"]==1]["HT"].sum()
                total_time = subdf["TotalT"].iloc[-1]
                
                ratio = np.append(ratio, un2p_ratio)
                time_TOT = np.append(time_TOT, total_time)
        
        return ratio, time_TOT

In [None]:
u2pratio_PT0, time_TOT_PT0 = trj_time(trj_id_PT0,dfall_PT0)
u2pratio_PT3, time_TOT_PT3 = trj_time(trj_id_PT3,dfall_PT3)
u2pratio_PT4, time_TOT_PT4 = trj_time(trj_id_PT4,dfall_PT4)
u2pratio_PT3_hairpin, time_TOT_PT3_hairpin = trj_time(trj_id_PT3_hairpin,dfall_PT3_hairpin)
u2pratio_PT4_hairpin, time_TOT_PT4_hairpin = trj_time(trj_id_PT4_hairpin,dfall_PT4_hairpin)


In [None]:
print(time_TOT_PT0.mean(), time_TOT_PT3.mean(), time_TOT_PT4.mean(), time_TOT_PT3_hairpin.mean(), time_TOT_PT4_hairpin.mean())
print(u2pratio_PT0.mean(), u2pratio_PT3.mean(), u2pratio_PT4.mean(), u2pratio_PT3_hairpin.mean(), u2pratio_PT4_hairpin.mean())


In [None]:
# average time: PT0 vs. PT3 vs. PT4
fig = px.bar(pd.DataFrame({
    "x":["PT0", "PT3", "PT3_hairpin", "PT4", "PT4_hairpin"],
    "height":[time_TOT_PT0.mean()/1e-6,
              time_TOT_PT3.mean()/1e-6, 
              time_TOT_PT3_hairpin.mean()/1e-6, 
              time_TOT_PT4.mean()/1e-6,
              time_TOT_PT4_hairpin.mean()/1e-6,
              ],
    }), 
                   x="x", 
                   y="height",
                #    text_auto='.3f'
                   text=['{:.3f} \u03bcs'.format(time_TOT_PT0.mean()/1e-6),
                         '{:.3f} \u03bcs'.format(time_TOT_PT3.mean()/1e-6), 
                         '{:.3f} \u03bcs'.format(time_TOT_PT3_hairpin.mean()/1e-6), 
                         '{:.3f} \u03bcs'.format(time_TOT_PT4.mean()/1e-6), 
                         '{:.3f} \u03bcs'.format(time_TOT_PT4_hairpin.mean()/1e-6),
                         ],
                   )

fig.update_layout(bargap=0.3, 
                  title={"text": "Average raction time over 100 simulations for different strands", "x": 0.5},
                  xaxis_title="Sample name",
                  yaxis_title="Average time (\u03bcs)"
)
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)

fig.show()

In [None]:
PT = ["PT0", "PT3", "PT3_hairpin", "PT4", "PT4_hairpin"]
Y_unp = [u2pratio_PT0.mean(), u2pratio_PT3.mean(), u2pratio_PT3_hairpin.mean(), u2pratio_PT4.mean(), u2pratio_PT4_hairpin.mean()]
Y_p = [1-u2pratio_PT0.mean(), 1-u2pratio_PT3.mean(), 1-u2pratio_PT3_hairpin.mean(), 1-u2pratio_PT4.mean(), 1-u2pratio_PT4_hairpin.mean()]
fig = go.Figure()
fig.add_trace(go.Bar(x=PT,
                y=Y_unp,
                name='Unpaired',
                text=['{:.3f}'.format(u2pratio_PT0.mean()),
                      '{:.3f}'.format(u2pratio_PT3.mean()), 
                      '{:.3f}'.format(u2pratio_PT3_hairpin.mean()),
                      '{:.3f}'.format(u2pratio_PT4.mean()), 
                      '{:.3f}'.format(u2pratio_PT4_hairpin.mean()),
                      ],
                textposition="auto",
                ))
fig.add_trace(go.Bar(x=PT,
                y= Y_p,
                name='Paired',
                text=['{:.3f}'.format(1-u2pratio_PT0.mean()),
                      '{:.3f}'.format(1-u2pratio_PT3.mean()), 
                      '{:.3f}'.format(1-u2pratio_PT3_hairpin.mean()), 
                      '{:.3f}'.format(1-u2pratio_PT4.mean()), 
                      '{:.3f}'.format(1-u2pratio_PT4_hairpin.mean()),
                      ],
                textposition="auto",
                ))
fig.update_layout(
    title='Ratio of reaction time of unpaired and paired for different strands over 100 simulations',
    titlefont_size=15,
    xaxis=dict(
        title = 'Sample name',
        tickfont_size=14,
    ),
    yaxis=dict(
        title = 'Ratio of unpaired and paired',
        titlefont_size = 14,
    ),
    legend=dict(
        # x=0,
        # y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Bar(
        x=np.arange(1,101),
        y=time_TOT_PT0/1e-6,
        name="PT0",
        width=1,
        # line_color='black',
    ))
fig.add_trace(
    go.Bar(
        x=np.arange(1,101),
        y=time_TOT_PT3/1e-6,
        name="PT3",
        width=1,
    ))
fig.add_trace(
    go.Bar(
        x=np.arange(1,101),
        y=time_TOT_PT3_hairpin/1e-6,
        name="PT3-hairpin",
        width=1,
        
        # line_color='blue', 
    ))
fig.add_trace(
    go.Bar(
        x=np.arange(1,101),
        y=time_TOT_PT4/1e-6,
        name="PT4",
        width=1,
        
        # line_color='green',
    ))
fig.add_trace(
    go.Bar(
        x=np.arange(1,101),
        y=time_TOT_PT4_hairpin/1e-6,
        name="PT4-hairpin",
        width=1,
        
        # line_color='yellow',
    ))

fig.update_layout(
    title='Individual reaction time over 100 simulations for different strands',
    titlefont_size=16,
    xaxis=dict(
        title = 'Simulations',
        tickfont_size=14,
    ),
    yaxis=dict(
        title = 'Reaction time (\u03bcs)',
        titlefont_size = 14,
    ),
    # legend=dict(
    #     x=0,
    #     y=1.0,
    #     bgcolor='rgba(255, 255, 255, 0)',
    #     bordercolor='rgba(255, 255, 255, 0)'
    # ),
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        y=time_TOT_PT0/1e-6,
        name="PT0",
        line_color='black',
    ))
fig.add_trace(
    go.Scatter(
        y=time_TOT_PT3/1e-6,
        name="PT3",
        line_color='red',
        
    ))
fig.add_trace(
    go.Scatter(
        y=time_TOT_PT3_hairpin/1e-6,
        name="PT3_hairpin",
        line_color='blue',
        
    ))
fig.add_trace(
    go.Scatter(
        y=time_TOT_PT4/1e-6,
        name="PT4",
        line_color='green',
    ))
fig.add_trace(
    go.Scatter(
        y=time_TOT_PT4_hairpin/1e-6,
        name="PT4_hairpin",
        line_color='yellow',
        
    ))

fig.update_layout(
    title='Individual reaction time over 100 simulations for different strands',
    titlefont_size=16,
    xaxis=dict(
        title = 'Simulations',
        tickfont_size=14,
    ),
    yaxis=dict(
        title = 'Average time (\u03bcs)',
        titlefont_size = 14,
    ),
    # legend=dict(
    #     x=0,
    #     y=1.0,
    #     bgcolor='rgba(255, 255, 255, 0)',
    #     bordercolor='rgba(255, 255, 255, 0)'
    # ),
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)

fig.show()

## Draw helix structure

In [None]:
import networkx as nx
from networkx.drawing.nx_pylab import draw_networkx
from networkx.drawing.layout import *
import matplotlib.pyplot as plt

In [None]:
PT3 = "TGACGATCATGTCTGCGTGACTAGA"  # PT3
top_dp = "."*10+"("*3+"."*5+")"*3+"."*4  # top_P3: 11,12,13 --- 19,20,21
bot_dp = "."*4+"("*3+"."*5+")"*3+"."*10  # bot_T3: 5,6,7 --- 13,14,15

top_dp, bot_dp

In [None]:
P3_dp = top_dp

adj_P3 = dot2adj(P3_dp,hairpin=True, helix=False)

g3 = nx.convert_matrix.from_numpy_matrix(adj_P3)

nx.draw(g3, node_size=90, pos=kamada_kawai_layout(g3),with_labels=True,font_size=15) 

In [None]:
PT4 = "ACACGATCATGTCTGCGTGACTAGA"  # PT4
P4_dp = "."*1+"("*4+"."*10+")"*4+"."*6 

adj_P4 = dot2adj(P4_dp,hairpin=True, helix=False)

g4 = nx.convert_matrix.from_numpy_matrix(adj_P4)

nx.draw(g4, node_size=90, pos=kamada_kawai_layout(g4),with_labels=True,font_size=15) 

In [None]:
P4_dp

In [None]:
dp = "..(((.(.......))))......."

adj_dp= dot2adj(dp,hairpin=True, helix=False)

g_dp = nx.convert_matrix.from_numpy_matrix(adj_dp)

nx.draw(g_dp, node_size=90, pos=kamada_kawai_layout(g_dp),with_labels=True,font_size=15) 

In [None]:
# ..(((.(.......)))).....(. + .(..(((.....)))..).....). #

dp = "..(((.(.......)))).....(..(..(((.....)))..).....)."

adj_dp= dot2adj(dp,helix=True)

g_dp = nx.convert_matrix.from_numpy_matrix(adj_dp)

nx.draw(g_dp, node_size=90, pos=kamada_kawai_layout(g_dp),with_labels=True,font_size=8) 