In [1]:
import pandas as pd
import numpy as np

import itertools

## open data

In [4]:
directory = "Z:/Reed/Projects/micro_consortia/DARPA_biocon/Task 1.1/A+pTet-ccdA/20200107 capnresc 4 count/echo cfu/"

filename = "20200121 RM t0 capnresc 4.xlsx"

d = pd.read_excel(directory + filename, sheet_name=None)

In [5]:
d.keys()

odict_keys(['plate1 no seal', 'plate2 with seal', 'plate2 no seal', 'plate3 with seal', 'plate3 no seal', 'Exp'])

## get columns and rows named correctly

In [4]:
#for more than a few plates

plates = [x for x in d.keys() if '-' in x]
plates = [x for x in plates if 'tidy' not in x]

In [6]:
plates = [x for x in d.keys() if 'Exp' not in x]

In [8]:
#get plate
plate = plates[0]

df = d[plate]

#move the letter index in as column so you get numerical index for reference
df = df.reset_index()

#rename for clarity

#make generic for different plate types
rename = []
for col in df.columns.tolist():
    if isinstance(col, str):
        if "Unnamed" in col:
            rename.append(col)

df = df.rename(columns={rename[0]: 'channel', 'index': 'row'})

#reorder columns
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]

df = df[cols]

In [9]:
# rows aren't all named with the row letter, fill the NaNs with the letter
df['row'] = df['row'].fillna(method='ffill')

#make the channel names easier
split_channel_names = df['channel'].str.split(':', expand=True)

df['ch'] = split_channel_names[0]

#put the channel names where they're easy to see
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]

df = df[cols]

#if there are overflow values, replace them

df = df.replace({'OVRFLW': 99999})

## break it into long form

In [11]:
df.head()

Unnamed: 0,ch,channel,row,1,2,3,4,5,6,7,...,39,40,41,42,43,44,45,46,47,48
0,OD700,OD700:700,A,0.401,0.385,0.402,0.383,0.398,0.388,0.533,...,0.367,0.358,0.374,0.364,0.373,0.352,0.368,0.377,0.373,0.369
1,OD700,OD700:700,B,0.43,0.381,0.401,0.379,0.415,0.384,0.417,...,0.391,0.353,0.391,0.358,0.383,0.353,0.401,0.373,0.399,0.365
2,OD700,OD700:700,C,0.431,0.379,0.428,0.384,0.419,0.372,0.421,...,0.405,0.346,0.389,0.352,0.388,0.355,0.384,0.365,0.4,0.357
3,OD700,OD700:700,D,0.44,0.373,0.437,0.374,0.416,0.365,0.415,...,0.403,0.345,0.399,0.341,0.389,0.34,0.387,0.348,0.396,0.345
4,OD700,OD700:700,E,0.418,0.358,0.438,0.362,0.653,0.341,0.646,...,0.383,0.312,0.373,0.329,0.368,0.311,0.363,0.317,0.378,0.322


In [12]:
df = pd.melt(df.drop(columns = 'channel'), id_vars=['ch', 'row'], var_name='col').sort_values(by=['row', 'col'])

In [14]:
rs = df['row'].unique()
cs = df['col'].unique()

combos = list(itertools.product(rs, cs))

In [15]:
put_together = []
for comb in combos:
    row, col = comb
    
    i = (df['row'] == row) & (df['col'] == col)
    
    part = df.loc[i]
    channels = part['ch'].unique()
    
    one_row = {'row' : row, 'col' : col}
    for chan in channels:
       
        i2 = (part['ch'] == chan)
        
        entry = {chan : part.loc[i2, 'value'].values[0]}
        
        
        one_row.update(entry)

        
    x = pd.DataFrame(one_row, index=[0])
        
    put_together.append(x)

In [16]:
df = pd.concat(put_together).reset_index(drop=True)

In [78]:
# df.to_csv(directory + '{}_tidy.csv'.format(plate), index=False)

# STOP, move the generated csv files into the master data file, then delete them. Also add any easy to generate information you need to the files

## make a single table from all tidy ones

In [79]:
#reopen file after the sheets have been added
d = pd.read_excel(directory + filename, sheet_name=None)

In [80]:
tidy = [x for x in d.keys() if 'tidy' in x]

In [82]:
data_list = []

for name in tidy:
    data = d[name]
    
    data_list.append(data)

In [83]:
# pd.concat(data_list).to_csv(directory + 'tall_tidy.csv', index=False)

# STOP, move the generated csv file into the master data file, then delete it

## associate the ID's

In [84]:
#reopen file after the sheets have been added
d = pd.read_excel(directory + filename, sheet_name=None)

In [85]:
df = d['tall_tidy']

ids = d['IDs']

In [86]:
df.head()

Unnamed: 0,row,col,time,OD700,CFP,YFP,RFP
0,A,1,0,0.401,2536,89,1406
1,A,2,0,0.378,1838,91,1390
2,A,3,0,0.634,32354,139,1662
3,A,4,0,0.609,31674,135,1582
4,A,5,0,0.593,28726,141,1729


In [87]:
ids.head()

Unnamed: 0,row,cell,dil0,dile
0,A,2c1r,10,10000
1,B,2c1r,10,10000
2,C,2c1r,100,100000
3,D,2c1r,100,100000
4,E,2c1r,1000,1000000


In [88]:
stuff_to_add = ['cell', 'dil0', 'dile']
stuff_to_check = ['row']

for i in ids.index:
    check = ids.loc[i, stuff_to_check]
    
    add = ids.loc[i, stuff_to_add]
    
    i1 = df[stuff_to_check[0]] == check[stuff_to_check[0]]
    i = i1
    if len(stuff_to_check) == 2:
        i2 = df[stuff_to_check[1]] == check[stuff_to_check[1]]
        i = i1&i2
    
    for a in add.index.tolist():
        df.loc[i, a] = add[a]

In [89]:
df.head()

Unnamed: 0,row,col,time,OD700,CFP,YFP,RFP,cell,dil0,dile
0,A,1,0,0.401,2536,89,1406,2c1r,10.0,10000.0
1,A,2,0,0.378,1838,91,1390,2c1r,10.0,10000.0
2,A,3,0,0.634,32354,139,1662,2c1r,10.0,10000.0
3,A,4,0,0.609,31674,135,1582,2c1r,10.0,10000.0
4,A,5,0,0.593,28726,141,1729,2c1r,10.0,10000.0


In [90]:
df['dil'] = np.nan

#again a dumb loop over the whole thing instead of slicing, whatever it works
for t in df['time'].unique():
    
    where = df['time'] == t
    
    if t == 0:
        df.loc[where, 'dil'] = df.loc[where, 'dil0']
    else:
        df.loc[where, 'dil'] = df.loc[where, 'dile']

In [92]:
df = df.drop(columns=['dil0', 'dile'])

In [93]:
# df.to_csv(directory + 'tall_tidy_master.csv'.format(plate), index=False)

# you're done, you can delete the existing tall_tidy sheet and replace it with this master sheet, then edit the name back to tall_tidy if you like