In [None]:
import os, re, matplotlib, pandas, collections
import matplotlib.pyplot as plt
import numpy as np
import pandas, re
import seaborn as sns
__doc__ = """

Working notebook to analyze percent crosslinking:
percentCrosslinking.ipynb

This is an old notebook that analyzes percent crosslinking.

Both FBL and hnRNP C purified over a pmol of crosslinked RNA, 
while the negative control AURKA did not purify a detectable amount of crosslinked RNA (Figure 5C). 

Approximately 10% of hnRNP C protein molecules were crosslinked to RNA, compared with 2% of FBL proteins, 
and an undetectably low fraction of AURKA proteins (Figure 5D). 

Experiments:
Exp22: hnRNPC, FBL. Old method without hnRNP C standard.
Exp29: hnRNPC, FBL, AURKA. Old method without hnRNP C standard.
Exp53: hnRNPC. 
"""

In [None]:
fbl = pandas.read_excel('/Users/dfporter/pma/dataAndScripts/clip/experiments/exp29 hnRNPC FBL AURKA/exp29.xlsx',
                       sheetname='FBL WB')
old = """
hnrnpc = pandas.read_excel('/Users/dfporter/pma/clip/experiments/exp22 FBL hnRNPC/exp22.xlsx',
                       sheetname='hnRNPC wb')
"""
hnrnpc = pandas.read_excel('/Users/dfporter/pma/dataAndScripts/clip/experiments/Exp53/Exp53.xlsx',
                           sheetname='For figure')
print(hnrnpc)

In [None]:

sns.set(font_scale = 1.2)
sns.set_style('ticks')

for df in [fbl, hnrnpc]:
    if 'Fraction XL' in df.columns:
        df['% XL'] = [100 * val for val in df['Fraction XL']]    
        df = df[df['% XL']>-1].copy()

df = pandas.concat([hnrnpc, fbl], axis=0)
df['Protein crosslinked to RNA (%)'] = df['% XL']

df.index = df['Object'].tolist()
df = df[[x in ['Endogenous hnRNP C', 'FH-hnRNP C', 'FH-hnRNP C F54A', 'FBL', 'AURKA'] for x in df.index]]
df = df.loc[['Endogenous hnRNP C', 'FH-hnRNP C', 'FH-hnRNP C F54A', 'FBL', 'AURKA']]


"""Figure 5C"""
_kwargs = {'kind': 'bar', #'aspect': 1, 
           'data': df,
           'palette': sns.cubehelix_palette(2*len(set(df['Object'])), start=0, rot=-0.2)}

fig = plt.figure()
ax = sns.factorplot(y='Object', x='fmol RNA', **_kwargs, orient='horizontal')
plt.xlabel('fmol crosslinked RNA')
ax.fig.set_figwidth(12)
plt.savefig('/Users/dfporter/pma/dataAndScripts/clip/figs/fmol crosslinked RNA barchart.pdf')
plt.show()

plt.clf()

"""Figure 5D"""
g = sns.factorplot(y='Object', x='Protein crosslinked to RNA (%)', **_kwargs, orient='horizontal')
g.fig.set_figwidth(12)
plt.savefig('/Users/dfporter/pma/dataAndScripts/clip/figs/Crosslink rate barchart.pdf')
plt.show()
plt.clf()


In [None]:
rqcd = pandas.read_excel('/Users/dfporter/pma/clip/experiments/exp23 RQCD1 WT and mut/exp23.xlsx',
                       sheetname='Excision gel')
print(rqcd)
#plt.rcParams["axes.labelsize"] = 15
sns.set(font_scale = 1.5)
sns.set_style('ticks')


plt.clf()

sns.factorplot(x='Object', y='% Crosslinked', data=rqcd, kind='bar',
            palette=sns.cubehelix_palette(2*len(set(df['Object'])), start=0, rot=-0.2),
            aspect=0.7,
           )
plt.xlabel('')
plt.show()

plt.clf()

rqcd['fmol RNA purified'] = rqcd['Assumed fmol on beads']
rqcd['pmol protein purified'] = rqcd['total pmol protein on beads']

sns.factorplot(x='Object', y='pmol protein purified', data=rqcd, kind='bar',
            palette=sns.cubehelix_palette(2*len(set(df['Object'])), start=0, rot=-0.2),
            aspect=0.7,
           )
plt.xlabel('')
plt.show()

sns.factorplot(x='Object', y='fmol RNA purified', data=rqcd, kind='bar',
            palette=sns.cubehelix_palette(2*len(set(df['Object'])), start=0, rot=-0.2),
            aspect=0.7,
           )
plt.xlabel('')
plt.show()


rqcd = pandas.read_excel('/Users/dfporter/pma/clip/experiments/exp23 RQCD1 WT and mut/exp23.xlsx',
                       sheetname='lysate WB')
plt.clf()
sns.factorplot(x='Object', y='% Total protein', data=rqcd, kind='bar',
            palette=sns.cubehelix_palette(2*len(set(df['Object'])), start=0, rot=-0.2),
            aspect=0.7,
           )
plt.xlabel('')
plt.show()


In [None]:
sf3b1 = pandas.read_excel('/Users/dfporter/pma/clip/experiments/exp28/exp28 SF3B1.xlsx',
                       sheetname='Nylon SF3B1')

#plt.rcParams["axes.labelsize"] = 15
sns.set(font_scale = 1.5)
sns.set_style('ticks')


plt.clf()

sns.factorplot(x='Object', y='fmol RNA', data=sf3b1, kind='bar',
            palette=sns.cubehelix_palette(2*len(set(df['Object'])), start=0, rot=-0.2),
            aspect=0.7,
           )
plt.xlabel('')
plt.show()

plt.clf()
sf3b1 = pandas.read_excel('/Users/dfporter/pma/clip/experiments/exp28/exp28 SF3B1.xlsx',
                       sheetname='Gel')
print(sf3b1)
print(sf3b1['anti-FLAG signal'])
sf3b1['keep'] = [(1 if (re.search('SF3B1', x) or re.search('700E', x)) else 0) for x in sf3b1['Object']]
sf3b1 = sf3b1[sf3b1['keep']==1]
print(sf3b1)
sf3b1['SF3B1/Actin protein expression'] = [x/y for (x,y) in zip(sf3b1['anti-FLAG signal'].tolist(),
                                                    sf3b1['anti-Actin signal'].tolist())]
sns.factorplot(x='Object', y='SF3B1/Actin protein expression', data=sf3b1, kind='bar',
            palette=sns.cubehelix_palette(2*len(set(df['Object'])), start=0, rot=-0.2),
            aspect=0.7,
           )
plt.xlabel('')
plt.show()

plt.clf()