added visualize_MCchain() function to visualization module

changed init.ipy to import visualization module added script to process output of MC simulations scripts for senior thesis / presentation figures script for group meeting plots
brunobeltran · Jun 3, 2019 · d2a4dd3 · d2a4dd3
1 parent 4fb8776
commit d2a4dd3
Show file tree

Hide file tree

Showing 7 changed files with 1,931 additions and 30 deletions.
diff --git a/init.ipy b/init.ipy
@@ -31,5 +31,6 @@ from nuc_chain import rotations as ncr
 from nuc_chain import wignerD as wd
 from nuc_chain import fluctuations as wlc
 from nuc_chain import linkers as ncl
+from nuc_chain import visualization as vis
 from nuc_chain import math as ncm
 from MultiPoint import propagator
diff --git a/nuc_chain/visualization.py b/nuc_chain/visualization.py
@@ -114,10 +114,12 @@ def visualize_chain(entry_rots, entry_pos, links, w_ins=default_w_in,
                 color=colors[i], **kwargs)
     return mfig
 
-def visualize_MLC_chain(entry_pos, r_dna=dna_params['r_dna'],
-        mfig=None, palette="husl", nucleosome_color=None,
-        unwraps=None, plot_entry=False, plot_exit=False, plot_spheres=True, entry_rots=None, **kwargs):
-    """Visualize output of :py:func:`minimum_energy_no_sterics`.
+def visualize_MC_chain(entry_rots, entry_pos, w_ins=default_w_in,
+        w_outs=default_w_out, lpb=dna_params['lpb'], r_dna=dna_params['r_dna'],
+        helix_params=helix_params_best,
+        mfig=None, palette="husl", nucleosome_color=None, translation=False,
+        unwraps=None, plot_entry=False, plot_exit=False, **kwargs):
+    """Visualize output of MC simulation structure with a list of positions for each nucleosome.
 
     Parameters
     ----------
@@ -147,22 +149,32 @@ def visualize_MLC_chain(entry_pos, r_dna=dna_params['r_dna'],
     if mfig is None:
         mfig = mlab.figure()
     num_nucleosomes = len(entry_pos)
-    #assert(np.all(entry_rots.shape[:2] == entry_pos.shape))
+    w_ins, w_outs = convert.resolve_wrapping_params(unwraps, w_ins, w_outs, num_nucleosomes)
     colors = sns.color_palette(palette, num_nucleosomes)
     if nucleosome_color is not None:
         colors = [nucleosome_color]*num_nucleosomes
-    if plot_spheres:
+    if translation:
+        #plot nucleosome
+        nuc, mfig = plot_nucleosome(entry_rot=entry_rots[0], entry_pos=entry_pos[0],
+            Lw=w_ins[0]+w_outs[0], helix_params=helix_params,
+            mfig=mfig, color=colors[0], **kwargs)
+    else:
+        #plot sphere
         mlab.points3d(entry_pos[0, 0], entry_pos[0, 1], entry_pos[0, 2], scale_factor=5, figure=mfig,
                 color=colors[0], **kwargs)
-    # if plot_entry:
-    #     mlab.quiver3d(*entry_pos[0], *entry_rots[0][:,0], mode='arrow',
-    #                   scale_factor=5, color=(1,0,0))
-    #     mlab.quiver3d(*entry_pos[0], *entry_rots[0][:,1], mode='arrow',
-    #                   scale_factor=5, color=(0,1,0))
-    #     mlab.quiver3d(*entry_pos[0], *entry_rots[0][:,2], mode='arrow',
-    #                   scale_factor=5, color=(0,0,1))
+
+    if plot_entry:
+        mlab.quiver3d(*entry_pos[0], *entry_rots[0][:,0], mode='arrow',
+                      scale_factor=5, color=(1,0,0))
+        mlab.quiver3d(*entry_pos[0], *entry_rots[0][:,1], mode='arrow',
+                      scale_factor=5, color=(0,1,0))
+        mlab.quiver3d(*entry_pos[0], *entry_rots[0][:,2], mode='arrow',
+                      scale_factor=5, color=(0,0,1))
     for i in range(1,num_nucleosomes):
-        prev_exit_pos = entry_pos[i-1]
+        if translation:
+            prev_exit_pos = nuc[:, -1]
+        else:
+            prev_exit_pos = entry_pos[i-1]
         mlab.plot3d([prev_exit_pos[0], entry_pos[i][0]],
                     [prev_exit_pos[1], entry_pos[i][1]],
                     [prev_exit_pos[2], entry_pos[i][2]],
@@ -175,7 +187,11 @@ def visualize_MLC_chain(entry_pos, r_dna=dna_params['r_dna'],
                           scale_factor=5, color=(0,1,0))
             mlab.quiver3d(*entry_pos[i], *entry_rots[i][:,2], mode='arrow',
                           scale_factor=5, color=(0,0,1))
-        if plot_spheres:
+        if translation:
+            nuc, mfig = plot_nucleosome(entry_rot=entry_rots[i], entry_pos=entry_pos[i],
+                Lw=w_ins[i]+w_outs[i], helix_params=helix_params,
+                mfig=mfig, color=colors[i], **kwargs)
+        else:
             mlab.points3d(entry_pos[i, 0], entry_pos[i, 1], entry_pos[i, 2], scale_factor=5, figure=mfig,
                 color=colors[i], **kwargs)
     return mfig

diff --git a/scripts/MCsimulations_042519.py b/scripts/MCsimulations_042519.py
@@ -0,0 +1,115 @@
+"""Script to analyze output of MC simulations of nucleosome chains."""
+import re
+from pathlib import Path
+import pickle
+
+import matplotlib.cm as cm
+import numpy as np
+import seaborn as sns
+import pandas as pd
+from matplotlib import pyplot as plt
+import scipy
+from scipy import stats
+from scipy.optimize import curve_fit
+
+from nuc_chain import geometry as ncg
+from nuc_chain import linkers as ncl
+from MultiPoint import propagator
+from nuc_chain import fluctuations as wlc
+from nuc_chain.linkers import convert
+from nuc_chain import visualization as vis
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+
+#These follow Andy's plotting preferences for
+params = {'axes.edgecolor': 'black', 'axes.grid': True, 'axes.titlesize': 20.0,
+'axes.linewidth': 0.75, 'backend': 'pdf','axes.labelsize': 18,'legend.fontsize': 18,
+'xtick.labelsize': 18,'ytick.labelsize': 18,'text.usetex': False,'figure.figsize': [7, 5],
+'mathtext.fontset': 'stixsans', 'savefig.format': 'pdf', 'xtick.bottom':True, 'xtick.major.pad': 5, 'xtick.major.size': 5, 'xtick.major.width': 0.5,
+'ytick.left':True, 'ytick.right':False, 'ytick.major.pad': 5, 'ytick.major.size': 5, 'ytick.major.width': 0.5, 'ytick.minor.right':False, 'lines.linewidth':2}
+
+plt.rcParams.update(params)
+
+teal_flucts = '#387780'
+red_geom = '#E83151'
+dull_purple = '#755F80'
+rich_purple = '#e830e8'
+
+dir_default='csvs/MC/41bp_notrans'
+
+def extract_MC(dir=dir_default, timestep=55, v=1):
+    """Extract the positions and orientation traids of each nucleosome in a MC simulation."""
+    dir = Path(dir)
+    if dir.is_dir() is False:
+        raise ValueError(f'The director {dir} does not exist')
+    entry_pos = np.loadtxt(dir/f'r{timestep}v{v}')
+    entry_us = np.loadtxt(dir/f'u{timestep}v{v}')
+    entry_t3 = entry_us[:, 0:3] #U
+    entry_t1 = entry_us[:, 3:] #V
+    entry_t2 = np.cross(entry_t3, entry_t1, axis=1) #UxV
+    num_nucs = entry_pos.shape[0]
+    entry_rots = []
+    for i in range(num_nucs):
+        #t1, t2, t3 as columns
+        rot = np.eye(3)
+        rot[:, 0] = entry_t1[i, :] #V
+        rot[:, 1] = entry_t2[i, :] #UxV
+        rot[:, 2] = entry_t3[i, :] #U
+        entry_rots.append(rot)
+    entry_rots = np.array(entry_rots)
+    return entry_rots, entry_pos
+    #^above files saved in csvs/MLC in npy format
+
+def check_equilibration(maxtimestep=55):
+    """Plot the end-to-end distance of the simulated structure vs. time to see if it's equilibrated"""
+    MC_sqrtR2 = np.zeros((maxtimestep+1,))
+
+    for i in range(maxtimestep):
+        rots, pos = extract_MC(timestep=i, v=1)
+        pos = pos - pos[0, :]
+        r2 = np.array([pos[i, :].T@pos[i, :] for i in range(pos.shape[0])])
+        MC_sqrtR2[i] = np.sqrt(r2[-1])
+
+    fig, ax = plt.subplots()
+    ax.plot(np.arange(0, maxtimestep+1), MC_sqrtR2)
+    plt.xlabel('Time step')
+    plt.ylabel('End-to-end distance (nm)')
+
+
+def plot_R2_MC_vs_theory(links=np.tile(36, 6999), unwrap=0, num_sims=10, maxtimestep=55, plot_sims=False, simdir='csvs/MC/36bp_notrans'):
+    """Plot the end-to-end distance predicted by the theory against the R^2 of the simulated structure."""
+    link = links[0] #linker length (36bp)
+    R2, Rmax, kuhn = wlc.R2_kinked_WLC_no_translation(links, plotfig=False, unwraps=unwrap)
+    r2 = np.concatenate(([0], R2))
+    rmax = np.concatenate(([0], Rmax))
+    fig, ax = plt.subplots()
+
+    MCr_allsim = np.ones((rmax.size, num_sims))
+    #plot results from the 55th time step of 10 different simulations
+    for i in range(1, num_sims):
+        rots, pos = extract_MC(dir=simdir, timestep=maxtimestep, v=i)
+        #shift MC positions so beginning of polymer coincides with the origin
+        pos = pos - pos[0, :]
+        #calculate end-to-end distance
+        MCr2 = np.array([pos[i, :].T@pos[i, :] for i in range(pos.shape[0])])
+        MCr_allsim[:, i-1] = np.sqrt(MCr2)
+        #plot each of the individual simulated end-to-end distances
+        if plot_sims:
+            if i==1:
+                mylabel = 'Simulation'
+            else:
+                mylabel = None
+            plt.loglog(rmax, np.sqrt(MCr2), color=dull_purple, alpha=0.4, label=mylabel)
+
+    plt.loglog(rmax, np.mean(MCr_allsim, axis=1), color=dull_purple, lw=3, label='Simulation')
+    plt.loglog(rmax, np.sqrt(r2), color='k', lw=3, label='Theory')
+    plt.legend()
+    plt.xlabel(r'Total linker length (nm)')
+    plt.ylabel(r'$\sqrt{\langle R^2 \rangle}$')
+    plt.title('Homogeneous Chain 36bp linkers')
+    plt.subplots_adjust(left=0.15, bottom=0.16, top=0.93, right=0.98)
+    #plt.savefig('plots/end-to-end-distance-theory-vs-simulation_36bp_0unwraps.png')
+
+
+
+
+
diff --git a/scripts/PRLfigures.py b/scripts/PRLfigures.py
@@ -24,6 +24,7 @@
 from MultiPoint import propagator
 from nuc_chain import fluctuations as wlc
 from nuc_chain.linkers import convert
+from nuc_chain import visualization as vis
 
 # Plotting parameters
 # PRL Font preference: computer modern roman (cmr), medium weight (m), normal shape
@@ -87,7 +88,7 @@ def render_chain(linkers, unwraps=0, **kwargs):
         # to use "off-screen rendering" and fullscreen your window before
         # saving (this is actually required if you're using a tiling window
         # manager like e.g. i3 or xmonad).
-        ncg.visualize_chain(entry_rots, entry_pos, linkers, unwraps=unwraps, plot_spheres=True)
+        vis.visualize_chain(entry_rots, entry_pos, linkers, unwraps=unwraps, plot_spheres=True)
 
 def draw_triangle(alpha, x0, width, orientation, base=10,
                             **kwargs):
@@ -219,9 +220,9 @@ def plot_fig2a(lis=default_lis, colors=None):
     plt.fill_between(x, ymin, ymax, where=x>=250, color=[0.9, 0.9, 0.91])
 
     # power law triangle for the two extremal regimes
-    corner = draw_power_law_triangle(1, [2, 3], 0.5, 'up')
+    corner = draw_power_law_triangle(1, [np.log10(2), np.log10(3)], 0.5, 'up')
     plt.text(3, 11, '$L^1$')
-    corner = draw_power_law_triangle(1/2, [350, 30], 0.8, 'down')
+    corner = draw_power_law_triangle(1/2, [np.log10(350), np.log10(30)], 0.8, 'down')
     plt.text(700, 16, '$L^{1/2}$')
 
     plt.xlim([xmin, xmax])
@@ -264,37 +265,37 @@ def plot_fig3(mu=41):
     all_kuhns = pd.read_csv('./csvs/kuhns_so_far.csv', index_col=np.arange(5))
     kg = all_kuhns.loc['box', 'geometrical', mu].reset_index()
     kg = kg.sort_values('variance')
-    ax.plot(kg['variance'].values, kg['b'].values, '--^', markersize=3, label='Zero-temperature',
+    ax.plot(kg['variance'].values, kg['b'].values, '--^', markersize=3, label='0T, uniform',
             color=red_geom)
     kf = all_kuhns.loc['box', 'fluctuations', mu].reset_index()
     kf = kf.sort_values('variance')
-    ax.plot(kf['variance'].values, kf['b'].values, '-o', markersize=3, label='Fluctuating',
+    ax.plot(kf['variance'].values, kf['b'].values, '-o', markersize=3, label='Fluctuating, uniform',
             color=teal_flucts)
     rdf = pd.read_csv('./csvs/r2/r2-fluctuations-exponential-link-mu_41-0unwraps.csv')
     b = rdf['kuhn'].mean()
     xlim = plt.xlim()
-    plt.plot([-10, 50], [b, b], 'k-.', label='Maximum Entropy')
+    plt.plot([-10, 50], [b, b], 'k-.', label='Fluctuating, exponential')
     plt.xlim(xlim)
     ax.set_ylim([0, 100])
     plt.xlabel('Linker length variability $\pm\sigma$ (bp)')
     plt.ylabel('Kuhn length (nm)')
     plt.legend()
-    fig.text(1.3, 0, r'$\pm 0 bp$', size=9)
-    fig.text(1.6, 0, r'$\pm 2 bp$', size=9)
-    fig.text(1.9, 0, r'$\pm 6 bp$', size=9)
-    # plt.subplots_adjust(left=0.07, bottom=0.15, top=0.92, right=0.97)
+    #fig.text(0, 1.3, r'$\pm 0 bp$', size=9)
+    #fig.text(0.1, 1.1, r'$\pm 2 bp$', size=9)
+    #fig.text(0.7, 1.2, r'$\pm 6 bp$', size=9)
+    #plt.subplots_adjust(left=0.07, bottom=0.15, top=0.92, right=0.97)
     plt.tight_layout()
     plt.savefig('./plots/PRL/fig-3-kuhn_length_vs_window_size_41_sigma0to40.pdf',
                bbox_inches='tight')
 
-def render_fig3_chains(mu=41, sigmas=[0, 2, 6]):
+def render_fig3_chains(mu=41, sigmas=[0, 2, 6], N=50):
     for sigma in sigmas:
         sign_bit = 2*np.round(np.random.rand(N)) - 1
         render_chain(mu + sign_bit*np.random.randint(sigma+1), size=(N,))
 
 def plot_fig4b():
     fig, ax = plt.subplots(figsize=(default_width, default_height))
-    kuhns = pd.read_csv('csvs/kuhns_so_far.csv')
+    kuhns = pd.read_csv('csvs/kuhns_so_far.csv') #note this is the old version (not in new repo)
     kuhns = kuhns.set_index(['variance_type', 'type', 'mu', 'variance'])
     mu_max = 100
     # dotted line at 100 nm
@@ -309,7 +310,7 @@ def make_plottable(df):
     ax.plot(exp_fluct['mu'], exp_fluct['b'], label='Exponential', color=teal_flucts)
     homo_fluct = kuhns.loc['homogenous', 'fluctuations']
     homo_fluct = make_plottable(homo_fluct)
-    ax.plot(homo_fluct['mu'], homo_fluct['b'], color=dull_purple, alpha=0.5, lw=0.75, label='Homogenous')
+    ax.plot(homo_fluct['mu'], homo_fluct['b'], color=dull_purple, alpha=0.5, lw=0.75, label='Homogeneous')
 
 
     #lines for yeast, mice, human
@@ -334,7 +335,8 @@ def make_plottable(df):
     plt.tight_layout()
     plt.savefig('plots/PRL/fig4b_kuhn_exponential.pdf', bbox_inches='tight')
 
-def plot_fig5(df=None, rmax_or_ldna='rmax', named_sim='mu56'):
+def plot_fig5(df=None, rmax_or_ldna='ldna', named_sim='mu56'):
+    #TODO: change units to be in inverse nanometers cubed
     fig, ax = plt.subplots(figsize=(default_width, default_height))
     n = rmax_or_ldna
     # first set sim-specific parameters, draw scaling triangles at manually
@@ -480,7 +482,7 @@ def plot_fig5(df=None, rmax_or_ldna='rmax', named_sim='mu56'):
         plt.xlabel('Total linker length (bp)')
     elif rmax_or_ldna == 'ldna':
         plt.xlabel('Genomic distance (bp)')
-    plt.ylabel(r'$P_\mathrm{loop}\;\;\;(\mathrm{nm}^{-3})$')
+    plt.ylabel(r'$P_\mathrm{loop}\;\;\;(\mathrm{bp}^{-3})$')
 
     # plt.legend([fill, l100, lnormed], ['Average $\pm$ 95\%',
     #         'Straight chain, b=100nm', f'Straight chain, b={b:0.2f}nm'],

diff --git a/scripts/group_meeting_031919.py b/scripts/group_meeting_031919.py
@@ -0,0 +1,81 @@
+#plotting parameters
+import matplotlib.cm as cm
+import numpy as np
+import seaborn as sns
+import pandas as pd
+from matplotlib import pyplot as plt
+import scipy
+from scipy import stats
+from scipy.optimize import curve_fit
+from nuc_chain import geometry as ncg
+from nuc_chain import linkers as ncl
+from MultiPoint import propagator
+from nuc_chain import fluctuations as wlc
+from nuc_chain.linkers import convert
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+
+#These follow Andy's plotting preferences for
+params = {'axes.edgecolor': 'black', 'axes.grid': False, 'axes.facecolor': 'white', 'axes.titlesize': 20.0,
+'axes.linewidth': 0.75, 'backend': 'pdf','axes.labelsize': 18,'legend.fontsize': 18,
+'xtick.labelsize': 18,'ytick.labelsize': 18,'text.usetex': False,'figure.figsize': [7, 5],
+'mathtext.fontset': 'stixsans', 'savefig.format': 'pdf', 'xtick.bottom':True, 'xtick.major.pad': 5, 'xtick.major.size': 5, 'xtick.major.width': 0.5,
+'ytick.left':True, 'ytick.right':False, 'ytick.major.pad': 5, 'ytick.major.size': 5, 'ytick.major.width': 0.5, 'ytick.minor.right':False, 'lines.linewidth':2}
+
+plt.rcParams.update(params)
+
+#Mouse data
+def plot_looping_within_contact_radius(a=10, lp=50, loglog=True):
+    """Plot probability that 2 ends will form a loop within contact radius a, in nm,
+    as a function of dimensionless chain length N=Rmax/2lp, where lp is in nm.
+    Plots kinked model vs. bare WLC looping probabilities for both Rlinks and Llinks."""
+
+    #convert a and lp to basepairs
+    a_in_bp = a / ncg.dna_params['lpb']
+    lp_in_bp = lp / ncg.dna_params['lpb']
+
+    #load in linker lengths used to simulate nucleosome positions in mice (mu=45bp)
+    links = np.load('csvs/Bprops/0unwraps/heterogenous/Sarah/mice2/linker_lengths_101nucs.npy')
+    #Rlinks -- linkers right of TSS, starting with 60bp between -1 and +1 nuc
+    Rlinks = links[50:]
+    #Llinks -- linkers left of TSS, starting with 15bp between -2 and -1 nuc
+    Llinks = links[0:50]
+    #reverse so linker from -1 to -2 nuc is first
+    Llinks_rev = Llinks[::-1]
+    total_links = np.concatenate((Llinks_rev, Rlinks))
+
+    #cumulative chain length including burried basepairs
+    unwrap = 0
+    #plot as positive distance from TSS in bp
+    ldna_Rlinks = convert.genomic_length_from_links_unwraps(Rlinks, unwraps=unwrap) #max WLC chain length in bp
+    #plot as negative distance from TSS in bp
+    ldna_Llinks = -1*convert.genomic_length_from_links_unwraps(Llinks_rev, unwraps=unwrap) #max WLC chain length in bp
+
+    #load in calculated looping probabilities
+    loops = pd.read_csv('../../data_for_Sarah/mice2_looping_probs_a=10nm_102nucs.csv')
+
+    #50th nucleosome or row 49, corresponds to -2nuc
+    loops_to_plot = loops['49']
+    #only interested in plotting looping with nucleation site (-2 nuc)
+    Lloops = np.concatenate((loops_to_plot[0:49], [loops_to_plot[50]]))
+    Lloops = Lloops[::-1]
+    #linkers left of -2 nuc
+    #ldna_leftnuc = ldna_Llinks[1:]
+
+    #linkers right of -2 nuc
+    Rloops = loops_to_plot[51:]
+    #ldna_rightnuc = np.concatenate(([ldna_Llinks[0]], ldna_Rlinks))
+
+
+    fig, ax = plt.subplots(figsize=(6.25, 4.89))
+    colors = sns.color_palette("BrBG", 9)
+    ax.plot(ldna_Rlinks, Rloops, '-o', lw=2, markersize=4, color=colors[1], label='Right of TSS')
+    ax.plot(ldna_Llinks, Lloops, '-o', lw=2, markersize=4, color=colors[-2], label='Left of TSS')
+    plt.xlabel(r'Distance from TSS (bp)')
+    plt.ylabel(f'Looping probability, a={a}nm')
+    plt.subplots_adjust(left=0.16, bottom=0.15, top=0.98, right=0.98)
+    plt.yscale('log')
+    plt.legend(loc=0)
+    #plt.ylim([10**-4.5, 10**-1.8])
+    plt.savefig(f'plots/lp{lp}nm_a{a}nm_left_right_contact_probability_mice2.png')
+
+