In [1]:
#workhorses
import numpy as np
import pandas as pd


## Get the data

In [2]:
data_dict = pd.read_excel('../../Local Data/20181009 top 4 A B cell variants A=B sampling exp 1.xlsx', sheet_name=None)

In [3]:
data_dict.keys()

odict_keys(['OD700', 'YFP', 'BFP', 'IDs', 'Exp-btek'])

## Munge the data

### Functions

In [4]:
def get_sec (datetimeTime_obj):
    
    return (datetimeTime_obj.hour * 60 + datetimeTime_obj.minute) * 60 + datetimeTime_obj.second

In [5]:
def replace_time (data_sheet):
    
    df = data_sheet.copy()
    
    df['Time'] = df['Time'].apply(get_sec).divide(3600)
    
    return df

In [6]:
def replace_temp (data_sheet):
    
    df = data_sheet.copy()
    
    c = ['Temp C' if '°' in x else x for x in df.columns]
    
    df.columns = c
    
    return df

In [7]:
def add_id_info (data_sheet_melted, df_id):
    
    #make a copy of the dataframe so you can return the new one and set whatever name you want
    df = data_sheet_melted.copy()

    #initialize the columns to be used to store ID info
    all_id_columns = [col for col in df_id.columns if col not in 'well']

    for c in all_id_columns:
        df[c] = np.nan

    #get the list of all the wells you want to annotate
    wells = np.unique(df_id['well'])

    #loop over these wells and add the information to the initialized columns
    for w in wells:
        #get the ID information for this well
        append_this = df_id.loc[df_id['well'] == w, :]

        #loop over each column containing a unique piece of ID info
        for c in all_id_columns:

            #set the value for that column for that well
            df.loc[df['well'] == w, c] = append_this[c].values
                                                                #have to use .values otherwise index carries along and only
                                                                #sets the value for the data table index equal to the
                                                                #id table index

    return df

## Do the munging

In [8]:
keys_to_munge = ['OD700', 'YFP', 'BFP']

#replace time and temp by overwriting the original data
for key in keys_to_munge:
    
    data_dict[key] = replace_time(data_dict[key])
    data_dict[key] = replace_temp(data_dict[key])

### Assign well IDs

In [9]:
#get the well identifying information
ids = data_dict['IDs']

#get the data you want
od_raw = data_dict['OD700']
bfp_raw = data_dict['BFP']
yfp_raw = data_dict['YFP']

In [10]:
#independently melt the data so you can check and control the ID vars and value vars since that might differ per expt
od = pd.melt(od_raw, id_vars=['Time', 'Temp C'], var_name=['well'])

bfp = pd.melt(bfp_raw, id_vars=['Time', 'Temp C'], var_name=['well'])

yfp = pd.melt(yfp_raw, id_vars=['Time', 'Temp C'], var_name=['well'])

In [11]:
od_final = add_id_info(od, ids)

bfp_final = add_id_info(bfp, ids)

yfp_final = add_id_info(yfp, ids)

## Write the data to a new file

In [14]:
od_final.to_csv('../../Local Data/od700_tidy.csv', index=False)
bfp_final.to_csv('../../Local Data/bfp_tidy.csv', index=False)
yfp_final.to_csv('../../Local Data/yfp_tidy.csv', index=False)