In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import h5py
from misc import *

from sklearn.decomposition import PCA
import phate

## 1. Import/generate Data

### 1.1 Load multiple simulated trajectory from Mulistrand

In [None]:
# load multiple trajectories from multiple files
folder_name = "data/helix_assoc_PT3_new/assoc_PT3_1sim_20C"
# folder_name = "data/helix_assoc_PT0_new/assoc_PT0_1sim_20C"

# define absorbing (final) state structure
FINAL_STRUCTURE = "(((((((((((((((((((((((((+)))))))))))))))))))))))))"
num_files = 100

SIMS,SIMS_retrieve,SIMS_concat = load_multitrj(folder_name,FINAL_STRUCTURE,num_files)

print("SIMS: ", len(SIMS))
print("SIMS_retrieve: ", SIMS_retrieve.shape)
print("SIMS_concat: ", len(SIMS_concat))

### 1.2 Load single simulated trajectory from Mulistrand

In [None]:
# # load text file
# # f = open('./data/helix_assos/assos_PT3_1sim_20C_21.txt', 'r') # PT3 
# # STRAND_NAME = "assos_PT3_1sim_20C_21"

# f = open('./data/helix_assos/assos_PT0_1sim_20C_51.txt', 'r') # PT0
# STRAND_NAME = "assos_PT0_1sim_20C_51"

# """ Dimenstions of SIM list 
# SIM: [[sim1], [sim2], ...]
# sim: [[state1], [state2], ...]
# state: [structure, time, energy]
# """
# # define absorbing (final) state structure
# FINAL_STRUCTURE = "(((((((((((((((((((((((((+)))))))))))))))))))))))))"

# SIM = loadtrj(f,FINAL_STRUCTURE,type="Multiple")
# SIM_retrieve = np.array(SIM)
# SIM_concat = concat_helix_structures(SIM) 

# print("SIM: ", len(SIM))
# print("SIM_retrieve: ", SIM_retrieve.shape)
# print("SIM_concat: ", len(SIM_concat))

### 2. Convert dot-paren to adjacency matrix

In [None]:
""" Dimenstions of SIM_adj list 
SIM_adj: N*m*m
    N: number of states in the trajectory
    m: number of nucleotides in the state (strand)
"""
# # get single trajectory's data
# # get adjacency matrix, energy, and holding time for each state
# SIM_adj,SIM_G,SIM_T,SIM_HT = sim_adj(SIM_concat)
# print(SIM_adj.shape,SIM_G.shape,SIM_T.shape,SIM_HT.shape)


# get multiple trajectories' data
SIMS_adj, SIMS_G, SIMS_T, SIMS_HT = sim_adj(SIMS_concat)
print(SIMS_adj.shape,SIMS_G.shape,SIMS_T.shape,SIMS_HT.shape)

In [None]:
# get unique states adjacency matrix with their occupancy density, get unique energy, and time;
# and their corresponding indices

# # single trajectory
# indices,occ_density,SIM_adj_uniq,SIM_G_uniq,SIM_T_uniq,SIM_HT_uniq \
#      = get_unique(SIM_concat,SIM_adj,SIM_G,SIM_T,SIM_HT)
# print(indices.shape, occ_density.shape, SIM_adj_uniq.shape,SIM_G_uniq.shape,SIM_T_uniq.shape,SIM_HT_uniq.shape)


# multiple trajectories
indices_S,occ_density_S,SIMS_adj_uniq,SIMS_G_uniq,SIMS_T_uniq,SIMS_HT_uniq \
     = get_unique(SIMS_concat,SIMS_adj,SIMS_G,SIMS_T,SIMS_HT)
print(indices_S.shape, occ_density_S.shape, SIMS_adj_uniq.shape,SIMS_G_uniq.shape,SIMS_T_uniq.shape,SIMS_HT_uniq.shape)

In [None]:
SIMS_adj_uniq.shape, (np.unique(SIMS_adj_uniq,axis=0)).shape

### 3. Get labeled trajectory data

In [None]:
# # single trajectory
# # get trajectory data with its corresponding labels 
# SIM_dict = label_structures(SIM_concat,indices) 
# coord_id = SIM_dict[:,3].astype(int)
# print(SIM_dict.shape, coord_id.shape)
# # find the structure having the largest occupancy density
# print(SIM_retrieve[indices[occ_density.argmax()]])


# multiple trajectories
SIMS_dict = label_structures(SIMS_concat,indices_S)
coord_id_S = SIMS_dict[:,3].astype(int)
print(SIMS_dict.shape, coord_id_S.shape)
# find the structure having the largest occupancy density
print(SIMS_retrieve[indices_S[occ_density_S.argmax()]])

In [None]:
# # save SIMS_dict
# fname_dict = "data/helix_assoc_new/helix_assoc_new_PT3_multrj_100epoch_SIM_dict.npz"
# with open(fname_dict, 'wb') as f:
#     np.savez(f,SIMS_dict = SIMS_dict)

# # save SIMS
# fname_sims = "data/helix_assoc_new/helix_assoc_new_PT3_multrj_100epoch_SIMSpartial.npz"
# with open(fname_sims, 'wb') as f:
#     np.savez(f,
#             SIMS_adj=SIMS_adj,SIMS_G=SIMS_G,SIMS_HT=SIMS_HT,
#             SIMS_adj_uniq=SIMS_adj_uniq,SIMS_G_uniq=SIMS_G_uniq, SIMS_HT_uniq=SIMS_HT_uniq,
#             occp_S=occ_density_S, coord_id_S=coord_id_S,SIMS_dict = SIMS_dict
#             )
    
    
fname_sims = "data/helix_assoc_new/helix_assoc_new_PT3_multrj_100epoch_SIMSpartial.npz"
npzfile = np.load(fname_sims)

SIMS_adj=npzfile["SIMS_adj"];SIMS_G=npzfile["SIMS_G"];SIMS_HT=npzfile["SIMS_HT"];
SIMS_adj_uniq=npzfile["SIMS_adj_uniq"];SIMS_G_uniq=npzfile["SIMS_G_uniq"];SIMS_HT_uniq=npzfile["SIMS_HT_uniq"];
occ_density_S=npzfile["occp_S"];coord_id_S=npzfile["coord_id_S"];SIMS_dict=npzfile["SIMS_dict"]

### 4. Convert adjacency matrix scattering coefficients

#### SIMS_scar

In [None]:
# # Single trajectory
# # convert all states
# scat_coeff_array = transform_dataset(SIM_adj)
# SIM_scar = get_normalized_moments(scat_coeff_array).squeeze()
# print(SIM_scar.shape)

# # convert only unique states to get unique scattering
# scat_coeff_array = transform_dataset(SIM_adj_uniq)
# SIM_scar_uniq = get_normalized_moments(scat_coeff_array).squeeze()
# print(SIM_scar_uniq.shape)

In [None]:
##############
# # Multiple trajectories
scat_coeff_array_S = transform_dataset(SIMS_adj)
SIMS_scar = get_normalized_moments(scat_coeff_array_S).squeeze()
print(SIMS_scar.shape)
##############

In [None]:
# For large trajectories states
SIMS_scar1 = get_normalized_moments(transform_dataset(SIMS_adj[:100000])).squeeze()
SIMS_scar2 = get_normalized_moments(transform_dataset(SIMS_adj[100000:200000])).squeeze()
SIMS_scar3 = get_normalized_moments(transform_dataset(SIMS_adj[200000:300000])).squeeze()


In [None]:
SIMS_scar4 = get_normalized_moments(transform_dataset(SIMS_adj[300000:400000])).squeeze()
SIMS_scar5 = get_normalized_moments(transform_dataset(SIMS_adj[400000:500000])).squeeze()
SIMS_scar6 = get_normalized_moments(transform_dataset(SIMS_adj[500000:600000])).squeeze()


In [None]:
SIMS_scar7 = get_normalized_moments(transform_dataset(SIMS_adj[600000:700000])).squeeze()
SIMS_scar8 = get_normalized_moments(transform_dataset(SIMS_adj[700000:800000])).squeeze()
SIMS_scar9 = get_normalized_moments(transform_dataset(SIMS_adj[800000:])).squeeze()


In [None]:
SIMS_scar = np.concatenate((SIMS_scar1,SIMS_scar2,SIMS_scar3,SIMS_scar4,SIMS_scar5,SIMS_scar6,SIMS_scar7,SIMS_scar8,SIMS_scar9))

In [None]:
# # save sim_scar data
# fname_data = "data/helix_assoc_new/helix_assoc_PT3_multrj_100epoch_SIMS_scar.npz"
# with open(fname_data, 'wb') as f:
#     np.savez(f,
#             SIMS_scar=SIMS_scar,
#             )

In [None]:
SIMS_scar.shape, (np.unique(SIMS_scar,axis=0)).shape

#### SIMS_scar_uniq

In [None]:
#############
# convert only unique states to get unique scattering
scat_coeff_array_S = transform_dataset(SIMS_adj_uniq)
SIMS_scar_uniq = get_normalized_moments(scat_coeff_array_S).squeeze()
print(SIMS_scar_uniq.shape)
#############

In [None]:
# # save sim_scar data
# fname_data = "data/helix_assoc_new/helix_assoc_PT3_multrj_100epoch_SIMS_scar_uniq.npz"
# with open(fname_data, 'wb') as f:
#     np.savez(f,
#             SIMS_scar_uniq=SIMS_scar_uniq,
#             )

In [None]:
SIMS_scar_uniq.shape, (np.unique(SIMS_scar_uniq,axis=0)).shape

In [None]:
# # save sim_scar data
# fname_data = "data/helix_assoc_new/helix_assoc_PT3_multrj_100epoch_SIMS_scarall.npz"
# with open(fname_data, 'wb') as f:
#     np.savez(f,
#             SIMS_scar=SIMS_scar,
#             SIMS_scar_uniq=SIMS_scar_uniq,
#             )

# # fname_data = "data/helix_assos/helix_assos_PT0_multrj_100epoch__newSIMS.npz"
# # npzfile = np.load(fname_data)
# # SIMS_adj=npzfile["SIMS_adj"];SIMS_scar=npzfile["SIMS_scar"];SIMS_G=npzfile["SIMS_G"];SIMS_HT=npzfile["SIMS_HT"];
# # SIMS_adj_uniq=npzfile["SIMS_adj_uniq"];SIMS_scar_uniq=npzfile["SIMS_scar_uniq"];SIMS_G_uniq=npzfile["SIMS_G_uniq"];SIMS_HT_uniq=npzfile["SIMS_HT_uniq"];
# # occp_S=npzfile["occp_S"];coord_id_S=npzfile["coord_id_S"]


### 5. Split data into tranning and test sets

In [None]:
# """load saved trajectories data
# """
# # # single trajectory
# # fname_data = "/Users/chenwei/Desktop/Github/RPE/code/data/helix_assos/assos_PT0_1sim_20C_51_1985epoch.npz"

# # multiple trajectories
# fname_data = "/Users/chenwei/Desktop/Github/RPE/code/data/helix_assos/helix_assos_PT3_multrj_100epoch.h5" 

# h5file = h5py.File(fname_data,'r') 
# h5file.keys()

# SIMS_adj = h5file["SIM_adj"][()] #
# SIMS_scar = h5file["SIM_scar"][()] #
# SIMS_G = h5file["SIM_G"][()] #
# SIMS_HT = h5file["SIM_HT"][()] #

# SIMS_adj_uniq = h5file["SIM_adj_uniq"][()] #
# SIMS_scar_uniq = h5file["SIM_scar_uniq"][()] #
# SIMS_G_uniq = h5file["SIM_G_uniq"][()] #
# SIMS_HT_uniq = h5file["SIM_HT_uniq"][()] #

# coord_id_S = h5file["coord_id"][()] #
# data_embed = h5file["data_embed"][()] #
# occ_density_S = h5file["occp"][()] #

# indices_S = h5file["indices"][()] 
# pca_coords = h5file["pca_coords"][()]
# pca_all_coords = h5file["pca_all_coords"][()]

# phate_coords = h5file["phate_coords"][()] #
# phate_all_coords = h5file["phate_all_coords"][()] #

# print(SIMS_adj.shape,SIMS_scar.shape,SIMS_G.shape,SIMS_HT.shape)
# print(SIMS_adj_uniq.shape,SIMS_scar_uniq.shape,SIMS_G_uniq.shape,SIMS_HT_uniq.shape)   
# print(coord_id_S.shape,data_embed.shape,occ_density_S.shape,indices_S.shape)
# print(pca_coords.shape,pca_all_coords.shape,phate_coords.shape,phate_all_coords.shape)

In [None]:
"""Shape of split data
    train_data: [tr_adjs, tr_coeffs, tr_energies]
    test_data: [te_adjs, te_coeffs, te_energies]
"""
# train_data,test_data = split_data(SIM_adj_uniq,SIM_scar_uniq,SIM_G_uniq)  # single trj
train_data,test_data = split_data(SIMS_adj_uniq,SIMS_scar_uniq,SIMS_G_uniq)  # multiple trj


### 6. Train and test dataloader

In [None]:
"""Structure of train_tup when gnn=False
    train_tup: [train_coeffs,train_energy] 
"""
train_loader, train_tup, test_tup, valid_loader,early_stop_callback = load_trte(train_data,test_data,
                                              batch_size=64)
train_tup[0].shape, test_tup[0].shape, train_loader.batch_size

## 2.1 Load Model

In [None]:
# set up hyperparameters

input_dim = train_tup[0].shape[-1]
len_epoch = len(train_loader)

hparams = {
    'input_dim':  input_dim,
    'bottle_dim': 25,
    'hidden_dim': 400, #not used in model
    
    'len_epoch': len_epoch,
    'learning_rate': 0.0001,
    'max_epochs': 100,  # PT0 --> 1985 epoch  # PT3 --> 60， 100, 150, 756(overtfit) epoch
    'n_gpus': 0,
    'batch_size': 64, #not used in model
    
    'alpha':1.0,
    'beta':0.0001,

}

hparams = argparse.Namespace(**hparams)

model = GSAE(hparams)
print(model)

## 2.2 Train Model

In [None]:
trainer = pl.Trainer.from_argparse_args(hparams,
                                        max_epochs=hparams.max_epochs,
                                        gpus=hparams.n_gpus,
                                        # callbacks=[early_stop_callback],
                                        )
trainer.fit(model=model,
            train_dataloader=train_loader,
            val_dataloaders=valid_loader,)

In [None]:
model

In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/ --host localhost --port 8000
#  http://localhost:8000

In [None]:
# save the trained model
# fname_model = "models/{}_model_{}epoch.pickle".format(STRAND_NAME,hparams.max_epochs) # single trj
fname_model = "models/helix_assoc_new_PT3_multrj_model_{}epoch.pickle".format(hparams.max_epochs) # multiple trj

pickle.dump(model, open(fname_model, 'wb'))
print('Trained model saved.')

## 3. Load Pretrained Models

In [None]:
# fname_model = "models/helix_assoc_new_PT0_multrj_model_100epoch.pickle"
fname_model = "models/helix_assoc_new_PT3_multrj_model_100epoch.pickle"

model = pickle.load(open(fname_model, 'rb'))
model

## 4. Get Embeddings

In [None]:
# # single trajectory
# with torch.no_grad():
#         data_embed = model.embed(torch.Tensor(SIM_scar_uniq))[0]

# # multiple trajectories
with torch.no_grad():
        data_embed = model.embed(torch.Tensor(SIMS_scar_uniq))[0]

In [None]:
# # do PCA for GSAE embeded data
pca_coords = PCA(n_components=3).fit_transform(data_embed)

# # get all pca embedded states coordinates
# pca_all_coords = pca_coords[coord_id]  # single trj
pca_all_coords = pca_coords[coord_id_S]  # multiple trj

pca_coords.shape, pca_all_coords.shape

In [None]:
# # do PHATE for GSAE embeded data
phate_operator = phate.PHATE(n_jobs=-2)
phate_coords = phate_operator.fit_transform(data_embed)

# # get all phate embedded states coordinates
# phate_all_coords = phate_coords[coord_id]  # single trj
phate_all_coords = phate_coords[coord_id_S]  # multiple trj

phate_coords.shape, phate_all_coords.shape

In [None]:
(np.unique(pca_coords,axis=0)).shape, (np.unique(pca_all_coords,axis=0)).shape

In [None]:
(np.unique(phate_coords,axis=0)).shape, (np.unique(phate_all_coords,axis=0)).shape

### Save all dats to h5 / npz

In [None]:
""" NPZ file
    Save all obtained data to npz file,
    Single trajectory
"""
# fname_data = "data/helix_assos/{}_{}epoch.npz".format(STRAND_NAME,hparams.max_epochs)
# with open(fname_data, 'wb') as f:
#     np.savez(f,
#             SIM_adj=SIM_adj,SIM_scar=SIM_scar,SIM_G=SIM_G,SIM_HT=SIM_HT,
#             SIM_adj_uniq=SIM_adj_uniq, SIM_scar_uniq=SIM_scar_uniq,
#             SIM_G_uniq=SIM_G_uniq, SIM_HT_uniq=SIM_HT_uniq,
#             # SIM_dict=SIM_dict, 
#             occp=occ_density,
#             data_embed=data_embed, coord_id=coord_id,
#             pca_coords=pca_coords, pca_all_coords=pca_all_coords,
#             phate_coords=phate_coords, phate_all_coords=phate_all_coords,
#             )


""" Save all obtained data to npz file,
    Multiple trajectories
"""
# # save for python
# fname_data = "data/helix_assos/helix_assos_PT0_multrj_100epoch_py.npz"
# with open(fname_data, 'wb') as f:
#     np.savez(f,
#             SIMS_adj=SIMS_adj,SIMS_scar=SIMS_scar,SIMS_G=SIMS_G,SIMS_HT=SIMS_HT,
#             SIMS_adj_uniq=SIMS_adj_uniq, SIMS_scar_uniq=SIMS_scar_uniq,
#             SIMS_G_uniq=SIMS_G_uniq, SIMS_HT_uniq=SIMS_HT_uniq,
#             occp_S=occ_density_S, coord_id_S=coord_id_S,
#             data_embed=data_embed, 
#             pca_coords=pca_coords, pca_all_coords=pca_all_coords,
#             phate_coords=phate_coords, phate_all_coords=phate_all_coords,
#             )

# # save for julia
# fname_data = "data/helix_assos/helix_assos_PT0_multrj_100epoch_jl.npz"
# with open(fname_data, 'wb') as f:
#     np.savez(f,
#             # SIMS_adj=SIMS_adj,  # no good for julia, too large
#             # SIMS_scar=SIMS_scar,  # no good for julia, too large
#             SIMS_G=SIMS_G,
#             SIMS_HT=SIMS_HT,
#             # SIMS_adj_uniq=SIMS_adj_uniq,  # no good for julia, too large
#             SIMS_scar_uniq=SIMS_scar_uniq,
#             SIMS_G_uniq=SIMS_G_uniq, 
#             SIMS_HT_uniq=SIMS_HT_uniq,
#             occp_S=occ_density_S, coord_id_S=coord_id_S,
#             data_embed=data_embed, 
#             pca_coords=pca_coords, pca_all_coords=pca_all_coords,
#             phate_coords=phate_coords, phate_all_coords=phate_all_coords,
#             )


# print(npyfile["SIMS_adj"].shape, npyfile["SIMS_scar"].shape, npyfile["SIMS_G"].shape, npyfile["SIMS_HT"].shape,"\n",
# npyfile["SIMS_adj_uniq"].shape, npyfile["SIMS_scar_uniq"].shape, npyfile["SIMS_G_uniq"].shape, npyfile["SIMS_HT_uniq"].shape, "\n",
# npyfile["occp_S"].shape, npyfile["data_embed"].shape, npyfile["coord_id_S"].shape, "\n",
# npyfile["pca_coords"].shape, npyfile["pca_all_coords"].shape, npyfile["phate_coords"].shape, npyfile["phate_all_coords"].shape)


In [None]:
""" HDF5 file
    Single trajectory
"""
# fname_data_h5 = "data/helix_assos/assos_PT3_1sim_20C_21_60epoch.h5"
# save_h5(fname_data_h5,
#             SIM_adj, SIM_scar, SIM_G, SIM_HT,
#             SIM_adj_uniq, SIM_scar_uniq, SIM_G_uniq, SIM_HT_uniq,
#             occ_density, data_embed, coord_id,
#             pca_coords, pca_all_coords,
#             phate_coords, phate_all_coords)

""" 
    Multiple trajectories
"""
# # fname_data_h5 = "data/helix_assoc_new/helix_assoc_new_PT0_multrj_100epoch.h5"
# fname_data_h5 = "data/helix_assoc_new/helix_assoc_new_PT3_multrj_100epoch.h5"

# save_h5(fname_data_h5,
#             SIMS_adj, SIMS_scar, SIMS_G, SIMS_HT,
#             SIMS_adj_uniq, SIMS_scar_uniq, SIMS_G_uniq, SIMS_HT_uniq,
#             occ_density_S, data_embed, coord_id_S, indices_S,
#             pca_coords, pca_all_coords,
#             phate_coords, phate_all_coords)


## 5. Visualize

In [None]:
fname_data = "/Users/chenwei/Desktop/Github/RPE/code/data/helix_assoc_new/helix_assoc_new_PT3_multrj_100epoch.h5"
# fname_data = "/Users/chenwei/Desktop/Github/RPE/code/data/helix_assoc_new/helix_assoc_new_PT0_multrj_100epoch.h5"

h5file = h5py.File(fname_data,'r') 
print(SIMS_retrieve[h5file["indices"][()][h5file["occp"][()].argmax()]])
h5file.keys()

# print(h5file["SIM_adj"].shape, h5file["SIM_scar"].shape, h5file["SIM_G"].shape, h5file["SIM_HT"].shape,"\n",
# h5file["SIM_adj_uniq"].shape, h5file["SIM_scar_uniq"].shape, h5file["SIM_G_uniq"].shape, h5file["SIM_HT_uniq"].shape, "\n",
# h5file["coord_id"].shape, h5file["data_embed"].shape, h5file["occp"].shape, h5file["indices"].shape,"\n",
# h5file["pca_coords"].shape, h5file["pca_all_coords"].shape, h5file["phate_coords"].shape, h5file["phate_all_coords"].shape)


### 1. PCA Vis

In [None]:
X = h5file["pca_all_coords"][:,0]
Y = h5file["pca_all_coords"][:,1]
Z = h5file["pca_all_coords"][:,2]

# PCA: 2 components
fig,ax = plt.subplots(figsize=(8,6))
im = ax.scatter(X, Y, 
          c=h5file["SIM_G"],
          cmap='plasma',
        )

plt.colorbar(im)

annotations=["I","F"]
x = [X[0],X[-1]]
y = [Y[0],Y[-1]]
plt.scatter(x,y,s=150, c="green", alpha=1)
for i, label in enumerate(annotations):
    plt.annotate(label, (x[i]-0.3,y[i]-0.3),fontsize=15,c="yellow")

In [None]:
X = h5file["pca_coords"][:,0]
Y = h5file["pca_coords"][:,1]
Z = h5file["pca_coords"][:,2]

# PCA: 2 components
fig,ax = plt.subplots(figsize=(8,6))
im = ax.scatter(X, Y, 
          c=h5file["SIM_G_uniq"], 
          cmap='plasma',
        )

plt.colorbar(im)

annotations=["I","F"]
x = [X[0],X[-1]]
y = [Y[0],Y[-1]]
plt.scatter(x,y,s=150, c="green", alpha=1)
for i, label in enumerate(annotations):
    plt.annotate(label, (x[i]-0.3,y[i]-0.3),fontsize=15,c="yellow")

In [None]:
X = h5file["pca_coords"][:,0]
Y = h5file["pca_coords"][:,1]
Z = h5file["pca_coords"][:,2]

# PCA: 3 components
fig,ax = plt.subplots(figsize=(8,6))
ax = plt.axes(projection ="3d")

im = ax.scatter3D(X,Y,Z,
          c=h5file["SIM_G_uniq"],      
          cmap='plasma')
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
plt.colorbar(im)

annotations=["I","F"]
x = [X[0],X[-1]]
y = [Y[0],Y[-1]]
z = [Z[0], Z[-1]]
ax.scatter(x,y,z,s=100,c="green",alpha=1)

#### Try use PCA directly without AE

In [None]:
# pca_coords1 = PCA(n_components=3).fit_transform(npyfile["SIM_scar_uniq"])  # single trj
pca_coords1 = PCA(n_components=3).fit_transform(h5file["SIM_scar_uniq"])   # multiple trj

X = pca_coords1[:,0]
Y = pca_coords1[:,1]
Z = pca_coords1[:,2]

# PCA: 2 components
fig,ax = plt.subplots(figsize=(8,6))
im = ax.scatter(X, Y, 
          c=h5file["SIM_G_uniq"], 
          cmap='plasma',
        )

plt.colorbar(im)

annotations=["I","F"]
x = [X[0],X[-1]]
y = [Y[0],Y[-1]]
plt.scatter(x,y,s=150, c="green", alpha=1)
for i, label in enumerate(annotations):
    plt.annotate(label, (x[i]-0.3,y[i]-0.3),fontsize=15,c="black")

### 2. PHATE Vis

In [None]:
X_phate = h5file["phate_all_coords"][:,0]
Y_phate = h5file["phate_all_coords"][:,1]

fig,ax = plt.subplots(figsize=(8,6))
im = ax.scatter(X_phate,Y_phate,
                c=h5file["SIM_G"],   # multiple trj               
                cmap='plasma',
               )

plt.colorbar(im)

annotations=["I","F"]
x = [X_phate[0],X_phate[-1]]
y = [Y_phate[0],Y_phate[-1]]
plt.scatter(x,y,s=50, c="green", alpha=1)
for i, label in enumerate(annotations):
    plt.annotate(label, (x[i],y[i]),fontsize=30,c="black")

In [None]:
X_phate = h5file["phate_coords"][:,0]
Y_phate = h5file["phate_coords"][:,1]

fig,ax = plt.subplots(figsize=(8,6))
im = ax.scatter(X_phate,Y_phate,
                c=h5file["SIM_G_uniq"],                 
                cmap='plasma',
               )

plt.colorbar(im)

annotations=["I","F"]
x = [X_phate[0],X_phate[-1]]
y = [Y_phate[0],Y_phate[-1]]
plt.scatter(x,y,s=50, c="green", alpha=1)
for i, label in enumerate(annotations):
    plt.annotate(label, (x[i],y[i]),fontsize=30,c="black")

#### PHATE without AE

In [None]:
phate_operator = phate.PHATE(n_jobs=-2)
# phate1 = phate_operator.fit_transform(npyfile["SIM_scar_uniq"])   # single trj
phate1 = phate_operator.fit_transform(h5file["SIM_scar_uniq"])   # multiple trj`


fig,ax = plt.subplots(figsize=(8,6))
im = ax.scatter(phate1[:,0],
          phate1[:,1],
          c=h5file["SIM_G_uniq"], 
          cmap='plasma',
        )

plt.colorbar(im)

annotations=["I","F"]
x = [phate1[:,0][0],phate1[:,0][-1]]
y = [phate1[:,1][0],phate1[:,1][-1]]
plt.scatter(x,y,s=50, c="green", alpha=1)
for i, label in enumerate(annotations):
    plt.annotate(label, (x[i],y[i]),fontsize=20,c="black")

## Draw helix structure

In [None]:
import networkx as nx
import numpy as np
from networkx.drawing.nx_pylab import draw_networkx
from networkx.drawing.layout import *

import matplotlib.pyplot as plt
from misc import *

f = open('./data/helix_assos/assos_PT0_1sim_20C_51.txt', 'r') # PT0
STRAND_NAME = "assos_PT0_1sim_20C_51"

# define absorbing (final) state structure
FINAL_STRUCTURE = "(((((((((((((((((((((((((+)))))))))))))))))))))))))"

SIM = loadtrj(f,FINAL_STRUCTURE,type="Multiple")
SIM_retrieve = np.array(SIM)
SIM_concat = concat_helix_structures(SIM) 

print("SIM: ", len(SIM))
print("SIM_retrieve: ", SIM_retrieve.shape)
print("SIM_concat: ", len(SIM_concat))

# get single trajectory's data
# get adjacency matrix, energy, and holding time for each state
SIM_adj,SIM_G,SIM_T,SIM_HT = sim_adj(SIM_concat)
print(SIM_adj.shape,SIM_G.shape,SIM_T.shape,SIM_HT.shape)

# single trajectory
indices,occ_density,SIM_adj_uniq,SIM_G_uniq,SIM_T_uniq,SIM_HT_uniq \
     = get_unique(SIM_concat,SIM_adj,SIM_G,SIM_T,SIM_HT)
print(indices.shape, occ_density.shape, SIM_adj_uniq.shape,SIM_G_uniq.shape,SIM_T_uniq.shape,SIM_HT_uniq.shape)

print(np.unique(SIM_adj_uniq,axis=0).shape, SIM_adj_uniq.shape)

In [None]:
ex_neighborhood_graphs = [nx.convert_matrix.from_numpy_matrix(x) for x in SIM_adj[:5]]

fig, ax = plt.subplots(1,5, figsize=(20,5))

for i, g in enumerate(ex_neighborhood_graphs):
    ppos = kamada_kawai_layout(g)
    nx.draw(g, ax=ax[i], node_size=5, pos=ppos) 

In [None]:
ex_neighborhood_graphs = [nx.convert_matrix.from_numpy_matrix(x) for x in SIM_adj[-3:]]

fig, ax = plt.subplots(1,3, figsize=(20,5))

for i, g in enumerate(ex_neighborhood_graphs):
    ppos = kamada_kawai_layout(g)
    nx.draw(g, ax=ax[i], node_size=5, pos=ppos) 

In [None]:
ex_neighborhood_graphs = [nx.convert_matrix.from_numpy_matrix(x) for x in SIM_adj[-20:-15]]

fig, ax = plt.subplots(1,5, figsize=(20,5))

for i, g in enumerate(ex_neighborhood_graphs):
    ppos = kamada_kawai_layout(g)
    nx.draw(g, ax=ax[i], node_size=5, pos=ppos) 

In [None]:
ex_neighborhood_graphs = [nx.convert_matrix.from_numpy_matrix(x) for x in SIM_adj[-16:-11]]

fig, ax = plt.subplots(1,5, figsize=(20,5))

for i, g in enumerate(ex_neighborhood_graphs):
    ppos = kamada_kawai_layout(g)
    nx.draw(g, ax=ax[i], node_size=5, pos=ppos) 

In [None]:

for j in range(-20,0,5):
    fig, ax = plt.subplots(1,5, figsize=(20,5))
    ex_neighborhood_graphs = [nx.convert_matrix.from_numpy_matrix(x) for x in SIM_adj[j:j+5]]
    if j+5 == 0:
        ex_neighborhood_graphs = [nx.convert_matrix.from_numpy_matrix(x) for x in SIM_adj[j:]]

    for i, g in enumerate(ex_neighborhood_graphs):
        ppos = kamada_kawai_layout(g)
        nx.draw(g, ax=ax[i], node_size=5, pos=ppos)



In [None]:
s1 = '...(..............)......'
s1 = '...((............))......'
s2 = '.........................'

s12 = s1+s2
print(s12,len(s12))

d_a2 = dot2adj(s12)
print(d_a2, d_a2.shape)

g = nx.convert_matrix.from_numpy_matrix(d_a2)

# fig, ax = plt.plot(figsize=(20,5))

nx.draw(g, node_size=5, pos=kamada_kawai_layout(g)) 

In [None]:
s1 = "....(((.....)))..((...))."
s2 = ".(.(..............).)...."
s12 = s1+s2

print(s12,len(s12))

d_a2 = dot2adj(s12)
print(d_a2, d_a2.shape)

g = nx.convert_matrix.from_numpy_matrix(d_a2)

# fig, ax = plt.plot(figsize=(20,5))

nx.draw(g, node_size=50, pos=kamada_kawai_layout(g),with_labels=True,font_size=10)

print(# 24, 32
      d_a2[23,31], d_a2[31,23])

In [None]:
s1 = "....(((.....)))..((...))."
s2 = ".(.(..............).)...."
s12 = s1+s2

print(s12,len(s12))

d_a2 = dot2adj(s12)
print(d_a2, d_a2.shape)

g = nx.convert_matrix.from_numpy_matrix(d_a2)

# fig, ax = plt.plot(figsize=(20,5))

nx.draw(g, node_size=50, pos=circular_layout(g),with_labels=True,font_size=10)

print(# 24, 32
      d_a2[23,31], d_a2[31,23])

In [None]:
g = nx.convert_matrix.from_numpy_matrix(SIM_adj[-3])
nx.draw(g, node_size=50, pos=shell_layout(g),with_labels=True,font_size=8) 

In [None]:
g = nx.convert_matrix.from_numpy_matrix(SIM_adj[11])
nx.draw(g, node_size=50, pos=shell_layout(g),with_labels=True,font_size=8) 

In [None]:
g = nx.convert_matrix.from_numpy_matrix(SIM_adj[-1])
nx.draw(g, node_size=50, pos=shell_layout(g),with_labels=True,font_size=8) 

In [None]:
s1 = '....(.....)..............'
s2 = '...((............))......'
s12 = s1+s2

print(s12,len(s12))

d_a2 = dot2adj(s12)
print(d_a2, d_a2.shape)

g = nx.convert_matrix.from_numpy_matrix(d_a2)

# fig, ax = plt.plot(figsize=(20,5))

nx.draw(g, node_size=5, pos=kamada_kawai_layout(g)) 

In [None]:
s12 = "....(((.....))).(((...))..(.(..............).).).."

print(s12,len(s12))

d_a2 = dot2adj(s12)
# print(d_a2, d_a2.shape)

g = nx.convert_matrix.from_numpy_matrix(d_a2)

# fig, ax = plt.plot(figsize=(20,5))

nx.draw(g, node_size=5, pos=kamada_kawai_layout(g)) 

In [None]:
s12 = "....(((.....))).(((...))..(.(..............).).).."

print(s12,len(s12))

d_a2 = dot2adj(s12)
# print(d_a2, d_a2.shape)

g = nx.convert_matrix.from_numpy_matrix(d_a2)

# fig, ax = plt.plot(figsize=(20,5))

nx.draw(g, node_size=50, pos=shell_layout(g),with_labels=True,font_size=8) 

In [None]:
s12 = '.((..))..(..).'

print(s12,len(s12))

d_a2 = dot2adj(s12,hairpin=True,helix=False)
# print(d_a2, d_a2.shape)

g = nx.convert_matrix.from_numpy_matrix(d_a2)

# fig, ax = plt.plot(figsize=(20,5))

nx.draw(g, node_size=50, pos=shell_layout(g),with_labels=True,font_size=8) 

In [None]:
s12 = "....((((.....(.....).....))..))..(...........)...."
print(s12,len(s12))

d_a2 = dot2adj(s12,helix=True)
print(s12,len(s12),d_a2[24,25])

# print(d_a2, d_a2.shape)

g = nx.convert_matrix.from_numpy_matrix(d_a2)

# fig, ax = plt.plot(figsize=(20,5))

nx.draw(g, node_size=90, pos=kamada_kawai_layout(g),with_labels=True,font_size=10) 

In [None]:
s12 = "(.)..."

# d_a2 = dot2adj(s12,helix=False,hairpin=True)
d_a2 = dot2adj(s12,helix=True)

print(s12,len(s12),d_a2[int(len(s12)/2-1),int(len(s12)/2)])

# print(d_a2, d_a2.shape)

g = nx.convert_matrix.from_numpy_matrix(d_a2)

# fig, ax = plt.plot(figsize=(20,5))

nx.draw(g, node_size=90, pos=kamada_kawai_layout(g),with_labels=True,font_size=10) 