In [1]:
import numpy as np
import pandas as pd

import os
import glob

In [3]:
#plotting things

#%matplotlib qt5 -- I don't know what this is
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

from cycler import cycler


#All of Anandh's customized seaborn/matplotlib settings

sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 1.5})
sns.set_style("ticks")
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})

#%config InlineBackend.figure_f.ormats=['svg']

mpl.rc('axes', prop_cycle=(cycler('color', ['r', 'k', 'b','g','y','m','c']) ))

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

#mpl.rc('text', usetex=False)
#mpl.rc('text.latex', preamble=r'\usepackage{helvet}
#\renewcommand\familydefault{\sfdefault}\usepackage{sansmath}\sansmath')

    #If you want to use a different font
# mpl.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica'], 
#                  'serif': ['Helvetica']})

tw = 1.5
sns.set_style({"xtick.major.size": 3, "ytick.major.size": 3,
               "xtick.minor.size": 2, "ytick.minor.size": 2,
               'axes.labelsize': 16, 'axes.titlesize': 16,
               'xtick.major.width': tw, 'xtick.minor.width': tw,
               'ytick.major.width': tw, 'ytick.minor.width': tw})

mpl.rc('xtick', labelsize=14) 
mpl.rc('ytick', labelsize=14)
mpl.rc('axes', linewidth=1.5)
mpl.rc('legend', fontsize=14)
mpl.rc('figure', figsize=(9,8))

## Get data

In [4]:
directory = 'Z:/Reed/Projects/micro_consortia/DARPA_biocon/Task 1.1/A=B/20190214 A=B mar cfp yfp small screen 1/'

filename = '20190214 A=B mar 1.xlsx'

In [5]:
dd = pd.read_excel(directory + filename, sheet_name=None)

dd.keys()

odict_keys(['OD700_raw', 'CFP_raw', 'YFP_raw', 'OD700', 'CFP', 'YFP', 'OD700_tidy', 'CFP_tidy', 'YFP_tidy', 'dense_norm_cumsum_tidy', 'IDs', 'Exp'])

## Create condensed data

In [100]:
def join_fluor_and_od700 (dict_of_data):
    
    #get the data sheets you want to join
    useful_sheets = [x for x in dd.keys() if '_tidy' in x]

    #find the od sheet, which will be the master one
    od_sheet = [x for x in useful_sheets if 'OD700' in x]

    #get the others using od_sheet
    others = [x for x in useful_sheets if x not in od_sheet]
    
    #now that od_sheet is used, pop the value out of the list
    if len(od_sheet) == 1:
        od_sheet = od_sheet[0]
    else:
        raise ValueError("looking for the od_sheet with 'OD700' found more than one data sheet")

    
    #get the od time column
    ot = np.sort(np.unique(dict_of_data[od_sheet]['Time']))
    
    #rename od stuff
    dict_of_data[od_sheet] = dict_of_data[od_sheet].rename({'value' : od_sheet.lower().replace('_tidy', '')}, axis='columns')
    
    #for the remaining sheets of data you want to join
    for sheet in others:
        #get this one's time values
        ft = np.sort(np.unique(dict_of_data[sheet]['Time']))
        
        #create dict to identify fluor time column entries with od time columns entries
        #every time you see time (fluor time entry), replace it with (od time entry)
        time_replacement_dict = {x : y for x,y in zip(ft,ot)}
    
        #replace the time identifier column in the fluor df
        dict_of_data[sheet]['Time'] = dict_of_data[sheet]['Time'].map(time_replacement_dict)
        
        #do some renaming
        dict_of_data[sheet] = dict_of_data[sheet].rename({'value' : sheet.lower().replace('_tidy', '')}, axis='columns')
    

    #load all data for joining
    dataframes = [dict_of_data[sheet] for sheet in useful_sheets]
    
    all_data_joined = pd.concat(dataframes, axis='columns')
    
    all_data_joined = all_data_joined.loc[:,~all_data_joined.columns.duplicated()]
    
    #fix overflow errors
    all_data_joined = all_data_joined.replace("OVRFLW", 99999)
    
    return all_data_joined

In [101]:
dense = join_fluor_and_od700(dd)

## Create od normalized fluorescence

In [102]:
#create od normalized fluor data

dense_n = dense.copy()

dense_n['cfp_norm'] = dense_n['cfp'] / dense_n['od700']
dense_n['yfp_norm'] = dense_n['yfp'] / dense_n['od700']

## Create cumulative fluorescence

In [103]:
def cumsum_fluor (df):
    """
    create cumulative summed fluor data for fluorescence and od normalized fluorescence, append it to big datasheet
    """
    
    #get the columns you want to sum
    sum_these = [x for x in df.columns if 'fp' in x] #works only if target columns are unique in containing 'fp'
    
    #start with the orig df in the list so you can just give the final list to pd.concat
    add_these = []
    for well in np.unique(df['well']):

        i = df['well'] == well

        #sort values by time so you get the cum sum of the values in the correct time order
        sums = df.loc[i].sort_values('Time').loc[:, sum_these].cumsum()

        sums.columns = sums.columns + '_sum'
        
        add_these.append(sums)
    
    sums_together = pd.concat(add_these, axis='index')
    
    df_plus_sums = pd.concat([df, sums_together], axis='columns')
    
    return df_plus_sums

In [104]:
dense_n_sum = cumsum_fluor(dense_n)

['cfp', 'yfp', 'cfp_norm', 'yfp_norm']


In [107]:
dense_n_sum.to_csv(directory + 'condensed_normed_cumsum_data.csv')

## create a 2 stage non cum CFP YFP trace