In [None]:
import os, sys, re, pandas, collections
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import scipy.stats as scs

"""
Exp35 hnRNPC: L5-BC17, L3-BC12. See 180201.
The input L3 oligo tested here is L3-BC12 (correct).
The input L5 oligo tested on 4/13 was L5-17.
"""

def load_sheet(
    fname='/Users/dfporter/pma/dataAndScripts/nonclip/adapter_quant/Shift based quantification protocol and results.xlsx', 
    sheet_name='180513_pre_CLIP'):
    
    df = pandas.read_excel(fname, sheet_name=sheet_name)

    if ('Skip' in df.columns) and any([type(x) == type('') for x in df['Skip'].tolist()]):
        df = df[df['Skip']!='Yes'].copy()
    
    df['FU'] = df['Signal']
    df['log_Signal'] = [np.log2(x) for x in df.Signal]
    
    try:
        df['Signal/fmol'] = [x/y for x,y in zip(df.Signal, df.fmols)]
        df['FU/fmol'] = [x/y for x,y in zip(df.Signal, df.fmols)]
        df['log_fmols'] = [np.log2(x) for x in df.fmols]
    except:
        print("Tried to make some fmol estimates but failed.")
    return df

def get_Ka(df):
    staples = df[df['Complex']=='Staple']
    #print(staples)
    means = staples.groupby(by='Object').mean()
    print(means)

    Ka5 = means.loc['αL5']['Signal']/50

    # 4/3 correction for incomplete label.
    Ka3 = (4/3) * means.loc['αL3']['Signal']/50

    K = {'αL3': Ka3, 'αL5': Ka5}
    
    def to_est_fmols(fu, obj):
        if obj in K:
            return fu/K[obj]
        else:
            return 0
    
    df['Est. fmols'] = [to_est_fmols(fu, obj) for fu,obj,fmols in zip(
    df['FU'], df['Object'], df['fmols'])]
    
    return df, K

In [None]:
obj_color = {
    'αL3': '#DF587A',
    'αL5': '#74C6A0',
    700: '#DF587A',
    800: '#74C6A0',
    'L5': '#DF587A',
    'L3': '#74C6A0',
    "5'": '#DF587A',
    "3'": '#74C6A0',
    'Hairpin': '#74C6A0',
    'Staple': 'k',
}

In [None]:


def get_L_over_antiL(df):

    _d = collections.defaultdict(dict)

    _sep = df.groupby(by=['Lane', 'Complex', 'Object']).mean()
    
    L3_vals_by_lane = {}
    L5_vals_by_lane = {}
    αL3_vals_by_lane = {}
    αL5_vals_by_lane = {}
    
    to_dict = {
        'αL3': αL3_vals_by_lane,
        'αL5': αL5_vals_by_lane,
        'L5': L5_vals_by_lane,
        'L3': L3_vals_by_lane,    
    }
    
    L3_over_αL3 = []
    L5_over_αL5 = []
    
    for lane, complx, obj in _sep.index:

        if lane == 0:
            continue

        if lane not in to_dict[obj]:
            to_dict[obj][lane] = {}
            
        to_dict[obj][lane] = _sep.loc[(lane, complx, obj)]['Signal']

    for lane in L3_vals_by_lane.keys():
        L3_over_αL3.append(L3_vals_by_lane[lane]/αL3_vals_by_lane[lane])

    for lane in L5_vals_by_lane.keys():
        L5_over_αL5.append(L5_vals_by_lane[lane]/αL5_vals_by_lane[lane])
        
    print(L3_over_αL3, L5_over_αL5)
    print(to_dict)
    print('---' * 140)

    return L3_over_αL3, L5_over_αL5

#df = load_sheet(sheet_name='180501_1')
df = load_sheet(fname='/Users/dfporter/pma/dataAndScripts/clip//experiments/exp35 hnRNPC FBL AURKA RPS3/exp35.xlsx',
               sheet_name='180501 hnRNPC R1,2,3 qRNA')
df['Replicate'] = ['R' + str(x)[:1] for x in df.Replicate]
#df.index = df['Object'].tolist()

df, K = get_Ka(df)
L3aL3_clip, L5aL5_clip = get_L_over_antiL(df)
df = load_sheet(sheet_name='180513_pre_CLIP')

L3aL3_in, L5aL5_in = get_L_over_antiL(df)

print(L3aL3_in, L5aL5_in)

arr = [{"Linker/αLinker FU": x, "CLIP": "pre-CLIP", "Adapter": 'L3'} for x in L3aL3_in]
arr.extend(
    [{"Linker/αLinker FU": x, "CLIP": "post-CLIP", "Adapter": 'L3'} for x in L3aL3_clip])
arr.extend(
    [{"Linker/αLinker FU": x, "CLIP": "pre-CLIP", "Adapter": 'L5'} for x in L5aL5_in])
arr.extend(
    [{"Linker/αLinker FU": x, "CLIP": "post-CLIP", "Adapter": 'L5'} for x in L5aL5_clip])

df = pandas.DataFrame(arr)

In [None]:
#print(df)

pre_mean = df.groupby(by=['Adapter', 'CLIP']).mean()
to_mean_in = {
    'L3': pre_mean.loc[('L3', 'pre-CLIP')]['Linker/αLinker FU'],
    'L5': pre_mean.loc[('L5', 'pre-CLIP')]['Linker/αLinker FU'],
    }
#print(pre_mean)
df['Linker/αLinker FU'] = [sig/to_mean_in[obj] for obj, sig in zip(
    df['Adapter'], df['Linker/αLinker FU'])]
#df[df['Adapter']=='L3'] = [
#    x/pre_mean.loc[('L3', 'pre-CLIP')]['Linker/αLinker FU'
#                                      ] for x in df[df['Adapter']=='L3']]
plt.clf()

fig = plt.figure()
b = sns.barplot(
    data=df, y='Linker/αLinker FU', x='Adapter', hue='CLIP',
    palette=sns.cubehelix_palette(start=-.4, rot=-.14, n_colors=1+len(set(df['CLIP']))),
    #width=0.1
           )


fig.set_figwidth(1.5)
fig.set_figheight(3)

fig.savefig('../../clip/figs/Linker_fluorescence_before_and_after_CLIP.pdf')
plt.show()

plt.clf()

In [None]:
def get_L_over_antiL(df):

    _d = collections.defaultdict(dict)

    _sep = df.groupby(by=['Lane', 'Complex', 'Object']).mean()
    
    L5_vals_by_lane = {}
    αL5_vals_by_lane = {}
    
    to_dict = {
        'αL5': αL5_vals_by_lane,
        'L5': L5_vals_by_lane,  
    }
    print('lllll', df)
    L5_over_αL5 = []
    
    for lane, complx, obj in _sep.index:

        if lane == 0:
            continue

        if lane not in to_dict[obj]:
            to_dict[obj][lane] = {}
            
        to_dict[obj][lane] = _sep.loc[(lane, complx, obj)]['Signal']


    for lane in L5_vals_by_lane.keys():
        L5_over_αL5.append(L5_vals_by_lane[lane]/αL5_vals_by_lane[lane])
        
    
    print(to_dict)
    print('---' * 140)
    print(L5_over_αL5)

    return L5_over_αL5

df = load_sheet(fname='/Users/dfporter/pma/dataAndScripts/clip//experiments/Exp70_stau1_fluor_loss/exp70_stau1_fluor_loss.xlsx',
               sheet_name='Fluor loss')
print(df)
print('----')
df['Replicate'] = ['R' + str(x)[:1] for x in df.Replicate]
#df.index = df['Object'].tolist()

#df, K = get_Ka(df)

L5aL5_clip = get_L_over_antiL(df[df['CLIP']=='post-CLIP'])
#df = load_sheet(sheet_name='180513_pre_CLIP')
print('pizza')
_pre = df[df['CLIP']=='pre-CLIP'].copy()

L5aL5_in = get_L_over_antiL(df[df['CLIP']=='pre-CLIP'])

mean_input = np.mean(L5aL5_in)

arr = [{"Linker/αLinker FU": x, "CLIP": "pre-CLIP", "Adapter": 'L5'} for x in L5aL5_in]
arr.extend(
    [{"Linker/αLinker FU": x, "CLIP": "post-CLIP", "Adapter": 'L5'} for x in L5aL5_clip])

df = pandas.DataFrame(arr)
df["Linker/αLinker FU"] = [100 * x/mean_input for x in df["Linker/αLinker FU"]]
plt.clf()

fig = plt.figure()
b = sns.boxplot(
    data=df, y='Linker/αLinker FU', x='CLIP',
    #color='k',
    palette='Greys',
    #palette=sns.cubehelix_palette(start=-.4, rot=-.14, n_colors=1+len(set(df['CLIP']))),
    #width=0.1
           )
#sns.stripplot(
#    data=df, y='Linker/αLinker FU', x='CLIP',
#    color='k', alpha=1, size=4
    #palette=sns.cubehelix_palette(start=-.4, rot=-.14, n_colors=1+len(set(df['CLIP']))),
    #width=0.1
#           )
plt.ylim(0, 110)
plt.ylabel('L5 linker fluorescence (%)', fontdict={'fontsize': 12})
plt.xticks(rotation=90)
fig.set_figwidth(1.5)
fig.set_figheight(3)

fig.savefig('../../clip/figs/Linker_fluorescence_change_from_preNC_membrane_CLIP_steps.pdf')
plt.show()

plt.clf()