# Born to slide: mobile origin licensing factors confer resistance to conflicts with RNA polymerase

<img style="float: left;" src="https://icons.iconarchive.com/icons/dan-wiersma/solar-system/48/Mars-icon.png">

## Jupyter notebooks 

#### Figures:
+ 1B
+ S1D

#### Archives:
+ Transcription_licensed-DNA_ATP_LS.yama
+ Transcription_licensed-DNA_ATP_HS.yama
+ Transcription_licensed-DNA_ATPgS_LS.yama
+ Transcription_licensed-DNA_ATPgS_HS.yama
+ Transcription_low-chromatin-licensed-DNA_ATP_1.yama
+ Transcription_high-chromatin-licensed-DNA_ATP.yama
+ Transcription_low-chromatin-licensed-DNA_ATP_2.yama
+ Transcription_low-chromatin-licensed-DNA_ATPgS.yama
+ Photobleaching_licensed-DNA_ATP.yama
+ Photobleaching_licensed-DNA_ATPgS.yama
+ Transcription_licensed-DNA_T7T_ATP.yama
+ Transcription_licensed-DNA_T7T_ATPgS.yama
+ Transcription_ORC-DNA_ATP.yama
+ Transcription_ORC-DNA_T7T_ATP.yama

#### Additional data:
+ none

### Initialize ImageJ

In [1]:
import imagej
ij = imagej.init('/Applications/Fiji.app')
ij.getVersion()

'2.1.0/1.53c'

### Imports

In [2]:
import sys
sys.path.insert(0, '..')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import awesome_data
from marspy.convert.archive import DnaMoleculeArchive, instantiate_archive, describe_archives
from tqdm.notebook import tqdm
from functools import reduce

marspy initialized.


### Select archives

In [3]:
selected_archives = ['Transcription_licensed-DNA_ATP_LS.yama',
                     'Transcription_licensed-DNA_ATP_HS.yama',
                     'Transcription_licensed-DNA_ATPgS_LS.yama',
                     'Transcription_licensed-DNA_ATPgS_HS.yama',
                     'Transcription_low-chromatin-licensed-DNA_ATP_1.yama',
                     'Transcription_high-chromatin-licensed-DNA_ATP.yama',
                     'Transcription_low-chromatin-licensed-DNA_ATP_2.yama',
                     'Transcription_low-chromatin-licensed-DNA_ATPgS.yama',
                     'Photobleaching_licensed-DNA_ATP.yama',
                     'Photobleaching_licensed-DNA_ATPgS.yama',
                     'Transcription_licensed-DNA_T7T_ATP.yama',
                     'Transcription_licensed-DNA_T7T_ATPgS.yama',
                     'Transcription_ORC-DNA_ATP.yama',
                     'Transcription_ORC-DNA_T7T_ATP.yama']

### Figure style and output directory

In [4]:
from figure_style import *

In [5]:
set_style_paper()

In [6]:
# change to desired output directory
dir_out = '/Volumes/pool-duderstadt/Matthias/Manuscript/Figures/SVGs/Figure1/'
dir_out

'/Volumes/pool-duderstadt/Matthias/Manuscript/Figures/SVGs/Figure1/'

### Instantiate selected archives

In [None]:
for archive in tqdm(selected_archives):
    instantiate_archive(archive, awesome_data.datasets)

HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

### Archives Overview

In [None]:
describe_archives(DnaMoleculeArchive.instances)

### Load segment tables, apply filter & detect pauses

In [None]:
for index,archive in enumerate(tqdm(DnaMoleculeArchive.instances)):
    
    archive.add_segments_tables()
    archive.filter_segments(b_min=-10, sigma_b_max=10)
    archive.detect_pauses()


### Generate main df

In [None]:
def setup_pandas_df(archive_instances):
    '''
    Sets up a multilevel df with all relevant information
    '''
    
    # indices will be UIDs
    prefixes = set()
    for archive in archive_instances:
        prefixes.update(archive.prefixes)
    col_1 = sorted(list(prefixes))
    #inside
    col_2 = ['initial_intensity', 'lifetime', 'start_site', 'stop_site', 'number_pauses']
    col_3 = ['']
    hier_index = pd.MultiIndex.from_product([col_1,col_2,col_3],names=['molecule','properties','properties_details'])
    print(prefixes)
    return pd.DataFrame(columns=hier_index)
    

In [None]:
df = setup_pandas_df(DnaMoleculeArchive.instances)

# sort MultiIndexCols for performance
df.sort_index(axis=1, inplace=True)
df.head()


In [None]:
for archive in tqdm(DnaMoleculeArchive.instances):

    for molecule in archive.molecules:
        # general columns first
        df.loc[molecule.uid,'dna_length'] = molecule.calc_length_dna()
        df.loc[molecule.uid,'dna_count_valid'] = archive.dna_count_valid
        df.loc[molecule.uid,'hs_wash'] = archive.highsalt_wash
        df.loc[molecule.uid,'nucleotide'] = archive.nucleotide
        df.loc[molecule.uid,'tags'] = reduce(lambda tag1,tag2: tag1+','+tag2, molecule.tags)

        # general proteins
        for protein in molecule.proteins:
            #store protein number
            df.loc[molecule.uid,'number_'+protein] = molecule.params['Number_'+protein]
            #assign label for each protein
            df.loc[molecule.uid,'label_'+protein] = archive.labels[protein]
            
        
        # every specific protein
        for prefix in molecule.prefixes:

            #take mean intensity from first 10 frames (maybe need try block later if shorter traces)
            df.loc[molecule.uid,(prefix,'initial_intensity')] = molecule.df.iloc[:5][prefix+'Intensity'].mean()

            #lifetime (in frames) defined on tracking length
            df.loc[molecule.uid,(prefix,'lifetime')] = len(molecule.df[prefix+'y'].dropna())
            
            #start & stop site
            if re.match('T7',prefix):
                df.loc[molecule.uid,(prefix,'start_site')] = molecule.df[prefix+'Position_on_DNA'].min()
                df.loc[molecule.uid,(prefix,'stop_site')] = molecule.df[prefix+'Position_on_DNA'].max()
            else:
                df.loc[molecule.uid,(prefix,'start_site')] = molecule.df.iloc[:5][prefix+'Position_on_DNA'].mean()
                df.loc[molecule.uid,(prefix,'stop_site')] = molecule.df[prefix+'Position_on_DNA'].max()
            
            
# convert tags back to list
df['tags'] = df['tags'].apply(lambda tags: tags.split(','))

# again sort MultiIndexCols for performance
df.sort_index(axis=1, inplace=True)

# fix data types
df = df.infer_objects()


In [None]:
df

## Figures

### Figure 1B: ORC-MCM loading sites

In [None]:
proteins = ['ORC','MCM']
pal_pos = 1

fig,axes = plt.subplots(len(proteins),1,sharex=True,sharey=True)

for i, protein in enumerate(proteins):
    sns.distplot(df.filter(regex=(protein+"_"),axis=1).xs('start_site',level='properties',axis=1)/1000,
                 label=f'{protein}',color=palettes[protein][pal_pos],bins = np.linspace(0, 25, 51), ax=axes[i])
    
    axes[i].text(0.95, 0.95, f"n = {len(pd.melt(df.filter(regex=(protein+'_'),axis=1).xs('start_site',level='properties',axis=1))['value'].dropna())}",
            verticalalignment='top', horizontalalignment='right',
            transform=axes[i].transAxes)
    
    axes[i].text(0.95, 0.95, f"n = {len(pd.melt(df.filter(regex=(protein+'_'),axis=1).xs('start_site',level='properties',axis=1))['value'].dropna())}",
            verticalalignment='top', horizontalalignment='right',
            transform=axes[i].transAxes)
    
for ax in axes:
    ax.set_xlim(0,24)
    ax.set_ylim(0,0.75)
    ax.set_ylabel(None)
    ax.legend(frameon=False,loc=9)

    sns.despine()

axes2 = fig.add_axes([.383, .62, .42, .2])
axes3 = fig.add_axes([.383, .21, .42, .2])

sns.distplot(df.filter(regex=(proteins[0]+"_"),axis=1).xs('start_site',level='properties',axis=1)/1000,
             color=palettes[proteins[0]][pal_pos],bins = np.linspace(0, 25, 51), kde=False, ax=axes2)

axes2.set_xlim(8,21)
axes2.set_ylim(0,12.5)
axes2.set_ylabel('Counts')
axes2.set_xticklabels([])
axes2.set_xticks([10,15,20])
axes2.set_yticks([0,5,10])
axes2.patch.set_alpha(0)
axes2.yaxis.tick_right()
axes2.yaxis.set_label_position("right")
axes2.spines['left'].set_visible(False)
axes2.spines['top'].set_visible(False)

sns.distplot(df.filter(regex=(proteins[1]+"_"),axis=1).xs('start_site',level='properties',axis=1)/1000,
             color=palettes[proteins[1]][pal_pos],bins = np.linspace(0, 25, 51), kde=False, ax=axes3)

axes3.set_xlim(8,21)
axes3.set_ylim(0,25)
axes3.set_ylabel('Counts')
axes3.set_xticklabels([])
axes3.set_xticks([10,15,20])
axes3.set_yticks([0,10,20])
axes3.patch.set_alpha(0)
axes3.yaxis.tick_right()
axes3.yaxis.set_label_position("right")
axes3.spines['left'].set_visible(False)
axes3.spines['top'].set_visible(False)

fig.text(0.5, -0.04, 'Position on DNA (kb)', ha='center', rotation='horizontal')
fig.text(-0.04, 0.5, 'Probability density', va='center', rotation='vertical')


#plt.tight_layout()

plt.savefig(dir_out+'1B_ORC-MCM_loading_sites_inset.svg',transparent=True)

### Figure S1D: ORC-MCM loading sites - hue nucleotide

In [None]:
_plot_df = pd.DataFrame(columns=['start_site','nucleotide','protein'])
proteins = ['ORC','MCM']
for protein in proteins:
    for nuc in df['nucleotide'].unique():
        _temp_df = pd.DataFrame(data=pd.melt(df[df['nucleotide']==nuc].filter(regex=(protein+"_"),axis=1).xs('start_site',level='properties',axis=1)/1000)['value'].dropna())
        _temp_df.columns = ['start_site']
        _temp_df['nucleotide'] = nuc
        _temp_df['protein'] = protein
        _plot_df = pd.concat([_plot_df,_temp_df])

fig,axes = plt.subplots(1,2,sharex=True,sharey=True)


sns.violinplot(x='start_site',y=['dummy_label']*len(_plot_df[_plot_df['protein']=='ORC']),hue='nucleotide',data=_plot_df[_plot_df['protein']=='ORC'], 
                split=True,inner='quartile', scale='area',ax=axes[0], palette=[palettes['ORC'][1],palettes['ORC'][3]], hue_labels=[1,2])
sns.violinplot(x='start_site',y=['dummy_label']*len(_plot_df[_plot_df['protein']=='MCM']),hue='nucleotide',data=_plot_df[_plot_df['protein']=='MCM'], 
                split=True,inner='quartile', scale='area',ax=axes[1], palette=[palettes['MCM'][1],palettes['OCCM'][1]], hue_labels=[1,2])

for i, protein in enumerate(proteins):
    axes[i].set_xlabel(None)
    axes[i].set_yticklabels([])
    axes[i].set_yticks([])
    axes[i].text(0.95, 0.75, f"n-ATP = {len(_plot_df[(_plot_df['protein']==protein) & (_plot_df['nucleotide']=='ATP')])}",
            verticalalignment='top', horizontalalignment='right',
            transform=axes[i].transAxes)
    axes[i].text(0.95, 0.65, f"n-ATPgS = {len(_plot_df[(_plot_df['protein']==protein) & (_plot_df['nucleotide']=='ATPgS')])}",
            verticalalignment='top', horizontalalignment='right',
            transform=axes[i].transAxes)

    axes[i].legend(frameon=False)

sns.despine()
fig.text(0.5, -0.04, 'Position on DNA (kb)', ha='center', rotation='horizontal')
fig.text(0.04, 0.5, 'Relative probability density', va='center', rotation='vertical')


#plt.tight_layout()

plt.savefig(dir_out+'S1D_ORC-MCM_loading_sites_hueNuc.svg',transparent=True)
