# Chr-NL-Optimization

Import the necessary packages

In [1]:
import numpy as np
import os
import subprocess
import sys
import copy
import sklearn.cluster as sk

We defined the important parameters in the next block.

In [2]:
import MDAnalysis as mda

run_number          = 1

# The next two parameters are used when computing the DamID and TSASeq signals
sigma_tanh          =  4
rc_tanh             =  0.75

N_chr_beads         = 60642            #Number of chromosome particles 

first_frame                 = 0      #The analysis starts from the first frame, currently, 500
N_nucleolus_particles       = 300      #The number of nucleolus particles
N_speckles_particles        = 1600     #The number of speckle particles
N_lamina_particles          = 8000     #The number of lamina particles
radius_nucleus              = 13.0     #The radius of cell nucleus, 13.0 (LJ unit) = 5.0 µm

"""Info files"""
gLength             = np.loadtxt("mol_info/gLengthFile.txt",dtype=int)      #The difference between each two neighboring values
                                                                            #in the file represents the length of the chromosome
maternalIdx         = np.loadtxt("mol_info/maternalIdxFile.txt",dtype=int)  #Index of each maternal chromosome
paternalIdx         = np.loadtxt("mol_info/paternalIdxFile.txt",dtype=int)  #Index of each paternal chromosome

Compute the simulated DamID and TSASeq

The function here we use is the same as the function defined in chr_chr_optimization.ipynb

In [3]:
def DamID_TSASeq_calculation(traj_data):
    traj_data = mda.coordinates.LAMMPS.DCDReader(traj_data)
    
    N_frame                     = len(traj_data)-first_frame
    exp_tsa_seq                 = np.zeros(N_chr_beads)
    exp_damid                   = np.zeros(N_chr_beads)
    exp_tsa_seq_all_frames      = np.zeros((N_frame,N_chr_beads))
    exp_damid_all_frames        = np.zeros((N_frame,N_chr_beads))
    N_speckle                   = []
    
    for frame_number in range(first_frame,len(traj_data),1):
        chr_I_data    = traj_data.trajectory[frame_number].positions[:N_chr_beads]
        speckles_data = traj_data.trajectory[frame_number].positions[(N_chr_beads+N_nucleolus_particles)
                                                    :(N_chr_beads+N_nucleolus_particles+N_speckles_particles)]
        lamina_data   = traj_data.trajectory[frame_number].positions[(N_chr_beads+
                                                    N_nucleolus_particles+N_speckles_particles):]

        #Following code identifies the speckles clusters and calculates center of mass positions
        """Code Snippet from DBSCAN Python Page
        https://scikit-learn.org/stable/auto_examples/cluster/plot_dbscan.html#sphx-glr-auto-examples-cluster-plot-dbscan-py
        """
        db = sk.DBSCAN(eps=1.0).fit(speckles_data)
        core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
        core_samples_mask[db.core_sample_indices_] = True
        labels = db.labels_
        # Number of clusters in labels, ignoring noise if present.
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        n_noise_ = list(labels).count(-1)
        
        N_speckle.append(n_clusters_)

        cluster_com_master = np.zeros((n_clusters_,3))
        radius_cluster_master = np.zeros(n_clusters_)
        i = 0
        while(i<=max(labels)):
            #Go cluster by cluster
            r_points_cluster          = copy.deepcopy(speckles_data[labels==i])
            cluster_com               = np.mean(r_points_cluster,axis=0)
            cluster_com_master[i]     = copy.deepcopy(cluster_com)
            radius_cluster_master[i]  = (np.sum((r_points_cluster-cluster_com)**2,axis=None)
                                         /len(r_points_cluster))**0.5
            i +=1

        for i in range(len(chr_I_data)):
            ###Speckles
            distances_from_speckles = np.sum((cluster_com_master-chr_I_data[i])**2,axis=1)**0.5
            distances_from_speckles -= radius_cluster_master

            #Do this only for speckles
            exp_tsa_seq_all_frames[frame_number-first_frame,i] = np.sum(0.5*(1.0+
                                            np.tanh(sigma_tanh*(rc_tanh-distances_from_speckles))),axis=None)

            exp_tsa_seq_all_frames[frame_number-first_frame,i] /= float(n_clusters_)
            exp_tsa_seq[i] += copy.deepcopy(exp_tsa_seq_all_frames[frame_number-first_frame,i])

            ###Lamina
            distances_from_lamina = np.sum((lamina_data-chr_I_data[i])**2,axis=1)**0.5

            #Do this only for lamina
            exp_damid_all_frames[frame_number-first_frame,i] = np.sum(0.5*(1.0+
                                            np.tanh(sigma_tanh*(rc_tanh-distances_from_lamina))),axis=None)
            exp_damid[i] += copy.deepcopy(exp_damid_all_frames[frame_number-first_frame,i])


    damid_all_frames_haploid = np.zeros((N_frame,int(N_chr_beads/2)))
    tsaseq_all_frames_haploid = np.zeros((N_frame,int(N_chr_beads/2)))
    for i in range(23):
        damid_all_frames_haploid[:,gLength[i]:gLength[i+1]] = 0.5*(
            exp_damid_all_frames[:,maternalIdx[i][0]-1:maternalIdx[i][1]]
             +exp_damid_all_frames[:,paternalIdx[i][0]-1:paternalIdx[i][1]])
        tsaseq_all_frames_haploid[:,gLength[i]:gLength[i+1]] = 0.5*(
            exp_tsa_seq_all_frames[:,maternalIdx[i][0]-1:maternalIdx[i][1]]
             +exp_tsa_seq_all_frames[:,paternalIdx[i][0]-1:paternalIdx[i][1]])
        
    
    return (np.mean(damid_all_frames_haploid,axis=0), np.mean(tsaseq_all_frames_haploid,axis=0), N_speckle) 

We compute the simulated DamID and TSASeq firstly.

In [4]:
damid_simulated, tsaseq_simulated, n_clusters = DamID_TSASeq_calculation("frame_10.dcd")
gw_lamina = np.mean(damid_simulated)
gw_speckles = np.mean(tsaseq_simulated)

We load the experimental DamID and TSASeq

In [5]:
damid_data_low_res  = np.loadtxt("DamID-OE.txt",usecols=[1])
tsa_data_low_res    = np.loadtxt("TSA-Seq-OE.txt",usecols=[1])

damid_data_low_res_haploid  = np.zeros(30321) #Haploid results
tsa_data_low_res_haploid    = np.zeros(30321) #Haploid results
for i in range(23):
    damid_data_low_res_haploid[gLength[i]:gLength[i+1]] = 0.5*(damid_data_low_res[maternalIdx[i][0]-1:maternalIdx[i][1]] +
                                                       damid_data_low_res[paternalIdx[i][0]-1:paternalIdx[i][1]]
                                                       )
    tsa_data_low_res_haploid[gLength[i]:gLength[i+1]] = 0.5*(tsa_data_low_res[maternalIdx[i][0]-1:maternalIdx[i][1]] +
                                                       tsa_data_low_res[paternalIdx[i][0]-1:paternalIdx[i][1]]
                                                       )
    
expt_dam            = damid_data_low_res_haploid*gw_lamina
expt_tsa            = tsa_data_low_res_haploid*gw_speckles

update_chr_list     = np.array([i for i in range(1,23)])
update_chr_list    -= 1

We start the adam optimization

In [6]:
##DamID part

m_dw_dam                    = np.loadtxt('adam_chr_NL_param/%02d/mdw_dam.txt'%(run_number-1))
v_dw_dam                    = np.loadtxt('adam_chr_NL_param/%02d/vdw_dam.txt'%(run_number-1))
m_db_dam                    = np.loadtxt('adam_chr_NL_param/%02d/mdb_dam.txt'%(run_number-1))
v_db_dam                    = np.loadtxt('adam_chr_NL_param/%02d/vdb_dam.txt'%(run_number-1))
beta1_dam                   = 0.9
beta2_dam                   = 0.999
epsilon_dam                 = 1e-8
eta_dam                     = 0.01
t_dam                       = int(np.loadtxt('adam_chr_NL_param/%02d/t_dam.txt'%(run_number-1)))

grad_dam        = -damid_simulated + expt_dam

In [7]:
# START TO DO THE ADAM OPTIMIZATION

# momentum beta 1
# *** weights *** #
m_dw_dam        = beta1_dam*m_dw_dam + (1-beta1_dam)*grad_dam
# *** biases *** #
m_db_dam        = beta1_dam*m_db_dam + (1-beta1_dam)*grad_dam
# rms beta 2
# *** weights *** #
v_dw_dam        = beta2_dam*v_dw_dam + (1-beta2_dam)*(grad_dam**2)
# *** biases *** #
v_db_dam        = beta2_dam*v_db_dam + (1-beta2_dam)*grad_dam

# We save the updated parameters in Adam for the next iteration

subprocess.call(["mkdir -p adam_chr_NL_param/%02d"%run_number],shell=True,stdout=subprocess.PIPE)
np.savetxt('adam_chr_NL_param/%02d/mdw_dam.txt'%(run_number), m_dw_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdw_dam.txt'%(run_number), v_dw_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/mdb_dam.txt'%(run_number), m_db_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdb_dam.txt'%(run_number), v_db_dam.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/t_dam.txt'%(run_number), np.array([t_dam+1]).reshape((-1,1)), fmt='%d')

## bias correction
m_dw_corr_dam   = m_dw_dam/(1-beta1_dam**t_dam)
m_db_corr_dam   = m_db_dam/(1-beta1_dam**t_dam)
v_dw_corr_dam   = v_dw_dam/(1-beta2_dam**t_dam)
v_db_corr_dam   = v_db_dam/(1-beta2_dam**t_dam)

dalpha_dam      = m_dw_corr_dam/(np.sqrt(v_dw_corr_dam)+epsilon_dam)

In [8]:
# Load the old parameters and update them

damid = np.loadtxt("potential/%02d/chr_lam_param.txt"%(run_number-1))[:60642]

for i in update_chr_list:
    damid[maternalIdx[i][0]-1:maternalIdx[i][1]] -= eta_dam*dalpha_dam[gLength[i]:gLength[i+1]]
    damid[paternalIdx[i][0]-1:paternalIdx[i][1]] -= eta_dam*dalpha_dam[gLength[i]:gLength[i+1]]

In [9]:
# TSASeq part, very similar to the above

m_dw_tsa                    = np.loadtxt('adam_chr_NL_param/%02d/mdw_tsa.txt'%(run_number-1))
v_dw_tsa                    = np.loadtxt('adam_chr_NL_param/%02d/vdw_tsa.txt'%(run_number-1))
m_db_tsa                    = np.loadtxt('adam_chr_NL_param/%02d/mdb_tsa.txt'%(run_number-1))
v_db_tsa                    = np.loadtxt('adam_chr_NL_param/%02d/vdb_tsa.txt'%(run_number-1))
beta1_tsa                   = 0.9
beta2_tsa                   = 0.999
epsilon_tsa                 = 1e-8
eta_tsa                     = 0.01
t_tsa                       = int(np.loadtxt('adam_chr_NL_param/%02d/t_tsa.txt'%(run_number-1)))

grad_tsa        = -tsaseq_simulated + expt_tsa

In [10]:
# START TO DO THE ADAM TRAINING
# momentum beta 1
# *** weights *** #
m_dw_tsa        = beta1_tsa*m_dw_tsa + (1-beta1_tsa)*grad_tsa
# *** biases *** #
m_db_tsa        = beta1_tsa*m_db_tsa + (1-beta1_tsa)*grad_tsa
# rms beta 2
# *** weights *** #
v_dw_tsa        = beta2_tsa*v_dw_tsa + (1-beta2_tsa)*(grad_tsa**2)
# *** biases *** #
v_db_tsa        = beta2_tsa*v_db_tsa + (1-beta2_tsa)*grad_tsa

np.savetxt('adam_chr_NL_param/%02d/mdw_tsa.txt'%(run_number), m_dw_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdw_tsa.txt'%(run_number), v_dw_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/mdb_tsa.txt'%(run_number), m_db_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/vdb_tsa.txt'%(run_number), v_db_tsa.reshape((-1,1)), fmt='%15.12e')
np.savetxt('adam_chr_NL_param/%02d/t_tsa.txt'%(run_number), np.array([t_tsa+1]).reshape((-1,1)), fmt='%d')

## bias correction
m_dw_corr_tsa   = m_dw_tsa/(1-beta1_tsa**t_tsa)
m_db_corr_tsa   = m_db_tsa/(1-beta1_tsa**t_tsa)
v_dw_corr_tsa   = v_dw_tsa/(1-beta2_tsa**t_tsa)
v_db_corr_tsa   = v_db_tsa/(1-beta2_tsa**t_tsa)

dalpha_tsa     = m_dw_corr_tsa/(np.sqrt(v_dw_corr_tsa)+epsilon_tsa)

In [12]:
# Load the old parameters and update them

tsaseq = np.loadtxt("potential/%02d/chr_spec_param.txt"%(run_number-1))[:60642]

for i in update_chr_list:
    tsaseq[maternalIdx[i][0]-1:maternalIdx[i][1]] -= eta_tsa*dalpha_tsa[gLength[i]:gLength[i+1]]
    tsaseq[paternalIdx[i][0]-1:paternalIdx[i][1]] -= eta_tsa*dalpha_tsa[gLength[i]:gLength[i+1]]

In [13]:
#Added portion to overide the parameters to 0.0 if no expt signal on segment

zero_signal_damid   = (damid_data_low_res[:]    == 0.0)
zero_signal_tsa     = (tsa_data_low_res[:]      == 0.0)
damid[zero_signal_damid]  = 0.0
tsaseq[zero_signal_tsa]    = 0.0

We save the new parameters

In [14]:
np.savetxt("potential/%02d/chr_spec_param.txt"%run_number, np.append(tsaseq,[0]*9900).reshape((-1,1)), fmt='%.6f')
np.savetxt("potential/%02d/chr_lam_param.txt"%run_number, np.append(damid,[0]*9900).reshape((-1,1)), fmt='%.6f')