In [None]:
__doc__ = """
This notebook makes figures from the ligation efficiency calculations done elsewhere.
This notebook does not do any of the work to estimate ligation efficiencies.

It outputs figures and takes all_ligation_eff_estimates.xlsx as the only input.

Protein shift efficiencies are calculated in excel from the images.
RNA shift efficiencies are the same (in this version).
Oligo shift efficiencies are calculated in L5,L3 by aL5,aL3 estimation.ipynb.

"""

In [None]:
import os, sys, re, pandas, collections
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import __init__
sys.path = __init__.add_paths(sys.path)

if not os.path.exists('figs/'):
    os.system('mkdir figs/')
    
figs_dir = 'figs/'

In [None]:
_df = pandas.read_excel('all_ligation_eff_estimates.xlsx')
_df['line'] = range(0, len(_df.index))

print(_df['Exp'].value_counts())

df = _df.copy()


df.set_index(['Method', 'Exp', 'line'], inplace=True)
print(df.head(2))

nc = df.loc['Protein shift', 'Exp28']
nylon = df.loc['RNA shift', 'Exp28']

nc.set_index(['Replicate'], inplace=True)
nylon.set_index(['Replicate'], inplace=True)

def rows_with_ligations(_slice):
    _slice = _slice[~np.isnan(_slice['% ligation'])].copy()
    _slice.set_index('Ligation', inplace=True)
    return _slice

rows = []
for replicate in set(nc.index):
    print(replicate)
    
    nc_slice = rows_with_ligations(nc.loc[replicate].copy())
    nylon_slice = rows_with_ligations(nylon.loc[replicate].copy())
    #print(_slice)
    
    rows.append({
        'Protein shift L5 ligation eff': nc_slice.loc['L5']['L5 ligation eff'],
        'Protein shift L3 ligation eff': nc_slice.loc['L3']['L3 ligation eff'],
        'RNA shift L5 ligation eff': nylon_slice.loc['L5']['L5 ligation eff'],
        'RNA shift L3 ligation eff': nylon_slice.loc['L3']['L3 ligation eff'],   
    })

scatter_df = pandas.DataFrame(rows)
scatter_df = 100 * scatter_df
print(scatter_df)
print('---')

fig = plt.figure()
plt.scatter(scatter_df['Protein shift L5 ligation eff'], scatter_df['RNA shift L5 ligation eff'], c='#DF587A')
plt.scatter(scatter_df['Protein shift L3 ligation eff'], scatter_df['RNA shift L3 ligation eff'], c='#74C6A0')
plt.xlim(0, 100)
plt.ylim(0, 100)
plt.xlabel('Protein shift method L5 ligation efficiency (%)')
plt.ylabel('RNA shift method L3 ligation efficiency (%)')

fig.set_figwidth(4)
fig.set_figheight(4)
plt.savefig(figs_dir + '/scatterplot_ligation_eff_by_nc_vs_intact_nylon.pdf')
plt.show()
plt.clf()

nah = """
print(df.loc['NC', 'Exp9']['L5 ligation eff'])
print(df.loc['NC', 'Exp22']['L5 ligation eff'])
print(df.loc['NC', 'Exp28']['L5 ligation eff'])
print(df.loc['Nylon, intact', 'Exp28']['L5 ligation eff'])
print(df.loc['Adapter shift', 'Exp35']['L5 ligation eff'])
"""

obj_color = {
    'L5': '#DF587A',
    'L3': '#74C6A0',
    700: '#DF587A',
    800: '#74C6A0',
}

# Both L5 and L3 ligation efficiencies
_df = _df[_df['Exp']!='Exp22']
_df['% ligation'] = 100 * _df['% ligation']

fig = plt.figure()

ax = sns.boxplot(data=_df, x='Method', y='% ligation', hue='Ligation', palette=obj_color,)#, alpha=0.3)
 
# Add transparency to colors
#for patch in ax.artists:
#    r, g, b, a = patch.get_facecolor()
#    patch.set_facecolor((r, g, b, .5))


#sns.stripplot(data=_df, x='Method', y='% ligation', hue='Ligation', dodge=True,
#              #c='k',
#             palette=obj_color
#             )
plt.xticks(rotation='vertical')
sns.despine()
fig.set_figwidth(2.5)
plt.ylim(0, 100)
plt.savefig(figs_dir + '/barchart_L5_and_L3_ligation_eff_by_method.pdf')
plt.show()
plt.clf()
print('--')
