# Import libraries 

In [1]:
%pylab inline
import scipy as sp
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.colors as mcolors
import pylab as pl

from matplotlib.colors import LogNorm, Normalize
from mpl_toolkits import mplot3d
from sklearn.metrics import mean_squared_error
from statannot import add_stat_annotation

import warnings
import itertools



Populating the interactive namespace from numpy and matplotlib


In [3]:
phylo=pd.read_excel('Data/Strains.xlsx')

phylo_dict=dict(zip(phylo['Sample Name'],phylo['Identifier']))
phylo_dict['RP1_A']='R_planticola1'
phylo_dict['RP1_B']='R_planticola1'


# K2: singles and pair effect experiment
## Setup
### Create lists of sample types for later use  

In [4]:
isolates=['CB2',  'EC1', 'EC10', 'EC11', 'EC12','EC14', 
          'EC16','EC17', 'EC18', 'EC9', 'Gh11','Gh12','Gh16',
          'Gh22', 'Gh24','Gh28', 'Gh29', 'Gh30', 'Gh35', 'Gh36','Gh38',
          'Gh40', 'Gh43', 'Gh44', 'Gh50', 'Gh54', 'Gh59', 'Gh60','Gh61',
          'Gh62', 'Gh67', 'Gh68', 'Gh9','KA', 'LA', 'OGI19-L','OGI33-L',
          'Og122', 'Og84', 'Og85', 'Og87', 'Og90','Og92', 'PAg3', 
          'PAl', 'PH', 'PK', 'PP', 'ParPR2', 'RP1_A','RP1_B', 
          'RP2', 'SF1', 'SF2', 'Ut12', 'Ut14', 'Ut22']

focals=  ['EColi_gfp','EA_gfp','RP1_gfp','PAg1_gfp','BI_gfp','CF_gfp']

unlabled_focals= ['Ecoli', 'EA', 'RP1', 'PAg1','BI','CF',]

blanks= ['Blank1','Blank2','Blank3','Blank4','Blank5', 'Blank']

timepoints=['t0','t1']

### Load and orginze data

In [13]:
chips_dict={}

chips_dict['c1']=pd.read_csv('Data/Autofluoresence_assay/chip1.csv')
chips_dict['c2']=pd.read_csv('Data/Autofluoresence_assay/chip2.csv')


chips_dict['c1']=chips_dict['c1'][['Hash','Label_left','Label_right','t0_norm2','t1_norm2',
                                   't0_Area','t1_Area','Total']]
chips_dict['c1']=chips_dict['c1'].rename(columns={'Label_left':'sample1','Label_right':'sample2','t0_norm2':'t0',
                                                  't1_norm2':'t1'})

chips_dict['c2']=chips_dict['c2'][['Hash','Label_left','Label_right','t0_norm2','t1_norm2',
                                   't0_Area','t1_Area','Total']]
chips_dict['c2']=chips_dict['c2'].rename(columns={'Label_left':'sample1','Label_right':'sample2','t0_norm2':'t0',
                                                  't1_norm2':'t1'})

## Normalize Data
- Subtract t0 value for each well individually
    - so we are measuring added biomass


In [6]:
n_chips_dict={}
for chip in chips_dict:
    df=chips_dict[chip].copy()

    #normalize each well to its own starting value
    df.t1=df.t1-df.t0
    
    #ensure no negative values or zero to avoid inf values later on
    df[timepoints]=df[timepoints].clip(lower=1)
     
    n_chips_dict[chip]=df

## Calculate effects

In [7]:
warnings.filterwarnings("ignore")
k2_effects_dict={}

for chip in list(n_chips_dict.keys()):
    tp='t1'
    data=n_chips_dict[chip].copy()
    data[['sample1', 'sample2']]=data[['sample1', 'sample2']].replace({'RP1_A':'RP1','RP1_B':'RP1',
                                                                       'Blank1':'Blank','Blank2':'Blank',
                                                                       'Blank3':'Blank','Blank4':'Blank',
                                                                       'Blank5':'Blank',})
    
        
     #get data for pair
    effect_data=data.groupby(['sample1', 'sample2'],
                             as_index=False)[tp].median()
    
    effect_data['Count']=data.groupby(['sample1', 'sample2'],
                                      as_index=False)[tp].count()[tp]

    effect_data['STE']=data.groupby(['sample1', 'sample2'],
                                      as_index=False)[tp].sem()[tp]

    k2_effects_dict[chip]=effect_data


In [8]:
all_data=k2_effects_dict['c1'].append(k2_effects_dict['c2'])

ulf_df=all_data.pivot_table (index='sample1', columns='sample2', 
                             values='t1', fill_value=0)
for i in ulf_df.index:
    for ii in list(ulf_df):
        if ulf_df.at[i,ii]==0:
            ulf_df.at[i,ii]=ulf_df.at[ii,i]
        elif ulf_df.at[ii,i]==0:
             ulf_df.at[ii,i]=ulf_df.at[i,ii]
        else: continue 

ulf_df=ulf_df[unlabled_focals].drop(index=focals+['Og85']).rename(columns={'Ecoli':'EColi'})

In [9]:
monos_dic={}
for chip in n_chips_dict:
    monos_dic[chip]=n_chips_dict[chip][n_chips_dict[chip].sample1==n_chips_dict[chip].sample2]

In [11]:
focal_monos=all_data[(all_data.sample1.isin(focals)) &
          (all_data.sample1==all_data.sample2)].groupby('sample1').median()['t1'].to_dict()

iso_monos=(monos_dic['c1'].append(monos_dic['c2'])).groupby('sample1').median()['t1'].to_dict()

usable_isos_dic={}
for f in focals:
    usable_isos=[]
    for i in iso_monos:
        if (focal_monos[f]/5)>iso_monos[i]:
            usable_isos.append(i)
    usable_isos_dic[f]=usable_isos

In [12]:
(pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in usable_isos_dic.items()]))
 .to_csv('usable_isos_per_target.csv'))