In [1]:
import pandas as pd
import numpy as np

import itertools

## open data

In [2]:
directory = "Z:/Reed/Projects/lab misc/echo_cfu_count/"

filename = "20200123 1536 echu cfu test.xlsx"

d = pd.read_excel(directory + filename, sheet_name=None)

In [3]:
d.keys()

odict_keys(['Plate 1', 'Plate 2', 'Plate 3', 'Plate 4', 't0 seal', 't0', 'te', 'tef', 't0_tidy', 'te_tidy', 'tef_tidy', 'tall_tidy', 'cfu_counts', 'IDs', 'Exp'])

## get columns and rows named correctly

In [4]:
#for more than a few plates

plates = [x for x in d.keys() if '-' in x]
plates = [x for x in plates if 'tidy' not in x]

In [6]:
plates = ['t0', 'te', 'tef']

In [7]:
#get plate
plate = plates[0]

df = d[plate]

#move the letter index in as column so you get numerical index for reference
df = df.reset_index()

#rename for clarity

#make generic for different plate types
rename = []
for col in df.columns.tolist():
    if isinstance(col, str):
        if "Unnamed" in col:
            rename.append(col)

df = df.rename(columns={rename[0]: 'channel', 'index': 'row'})

#reorder columns
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]

df = df[cols]

In [8]:
# rows aren't all named with the row letter, fill the NaNs with the letter
df['row'] = df['row'].fillna(method='ffill')

#make the channel names easier
split_channel_names = df['channel'].str.split(':', expand=True)

df['ch'] = split_channel_names[0]

#put the channel names where they're easy to see
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]

df = df[cols]

#if there are overflow values, replace them

df = df.replace({'OVRFLW': 99999})

## break it into long form

In [72]:
df.head()

Unnamed: 0,ch,channel,row,1,2,3,4,5,6,7,...,39,40,41,42,43,44,45,46,47,48
0,OD700,OD700:700,A,0.385,0.376,0.591,0.385,0.389,0.597,0.4,...,0.375,0.356,0.553,0.369,0.375,0.35,0.384,0.366,0.368,0.354
1,CFP,"CFP:430,491",A,1974.0,1902.0,27791.0,1878.0,1927.0,27823.0,1813.0,...,1777.0,1786.0,21630.0,1674.0,1758.0,1879.0,1803.0,1771.0,1780.0,1860.0
2,YFP,"YFP:500,541",A,96.0,95.0,137.0,93.0,96.0,139.0,89.0,...,88.0,90.0,119.0,83.0,86.0,93.0,89.0,87.0,87.0,91.0
3,RFP,"RFP:545,591",A,1511.0,1456.0,1639.0,4382.0,1428.0,1721.0,3061.0,...,1375.0,1391.0,1486.0,1328.0,2086.0,1498.0,1364.0,1403.0,1341.0,1435.0
4,OD700,OD700:700,B,0.415,0.372,0.401,0.572,0.407,0.384,0.415,...,0.58,0.361,0.395,0.364,0.396,0.594,0.584,0.361,0.6,0.364


In [73]:
df = pd.melt(df.drop(columns = 'channel'), id_vars=['ch', 'row'], var_name='col').sort_values(by=['row', 'col'])

In [74]:
df

Unnamed: 0,ch,row,col,value
0,OD700,A,1,0.385
1,CFP,A,1,1974.000
2,YFP,A,1,96.000
3,RFP,A,1,1511.000
128,OD700,A,2,0.376
129,CFP,A,2,1902.000
130,YFP,A,2,95.000
131,RFP,A,2,1456.000
256,OD700,A,3,0.591
257,CFP,A,3,27791.000


In [75]:
rs = df['row'].unique()
cs = df['col'].unique()

combos = list(itertools.product(rs, cs))

In [76]:
put_together = []
for comb in combos:
    row, col = comb
    
    i = (df['row'] == row) & (df['col'] == col)
    
    part = df.loc[i]
    channels = part['ch'].unique()
    
    one_row = {'row' : row, 'col' : col}
    for chan in channels:
       
        i2 = (part['ch'] == chan)
        
        entry = {chan : part.loc[i2, 'value'].values[0]}
        
        
        one_row.update(entry)

        
    x = pd.DataFrame(one_row, index=[0])
        
    put_together.append(x)

In [77]:
df = pd.concat(put_together).reset_index(drop=True)

In [78]:
# df.to_csv(directory + '{}_tidy.csv'.format(plate), index=False)

# STOP, move the generated csv files into the master data file, then delete them. Also add any easy to generate information you need to the files

## make a single table from all tidy ones

In [79]:
#reopen file after the sheets have been added
d = pd.read_excel(directory + filename, sheet_name=None)

In [80]:
tidy = [x for x in d.keys() if 'tidy' in x]

In [82]:
data_list = []

for name in tidy:
    data = d[name]
    
    data_list.append(data)

In [83]:
# pd.concat(data_list).to_csv(directory + 'tall_tidy.csv', index=False)

# STOP, move the generated csv file into the master data file, then delete it

## associate the ID's

In [84]:
#reopen file after the sheets have been added
d = pd.read_excel(directory + filename, sheet_name=None)

In [85]:
df = d['tall_tidy']

ids = d['IDs']

In [86]:
df.head()

Unnamed: 0,row,col,time,OD700,CFP,YFP,RFP
0,A,1,0,0.401,2536,89,1406
1,A,2,0,0.378,1838,91,1390
2,A,3,0,0.634,32354,139,1662
3,A,4,0,0.609,31674,135,1582
4,A,5,0,0.593,28726,141,1729


In [87]:
ids.head()

Unnamed: 0,row,cell,dil0,dile
0,A,2c1r,10,10000
1,B,2c1r,10,10000
2,C,2c1r,100,100000
3,D,2c1r,100,100000
4,E,2c1r,1000,1000000


In [88]:
stuff_to_add = ['cell', 'dil0', 'dile']
stuff_to_check = ['row']

for i in ids.index:
    check = ids.loc[i, stuff_to_check]
    
    add = ids.loc[i, stuff_to_add]
    
    i1 = df[stuff_to_check[0]] == check[stuff_to_check[0]]
    i = i1
    if len(stuff_to_check) == 2:
        i2 = df[stuff_to_check[1]] == check[stuff_to_check[1]]
        i = i1&i2
    
    for a in add.index.tolist():
        df.loc[i, a] = add[a]

In [89]:
df.head()

Unnamed: 0,row,col,time,OD700,CFP,YFP,RFP,cell,dil0,dile
0,A,1,0,0.401,2536,89,1406,2c1r,10.0,10000.0
1,A,2,0,0.378,1838,91,1390,2c1r,10.0,10000.0
2,A,3,0,0.634,32354,139,1662,2c1r,10.0,10000.0
3,A,4,0,0.609,31674,135,1582,2c1r,10.0,10000.0
4,A,5,0,0.593,28726,141,1729,2c1r,10.0,10000.0


In [90]:
df['dil'] = np.nan

#again a dumb loop over the whole thing instead of slicing, whatever it works
for t in df['time'].unique():
    
    where = df['time'] == t
    
    if t == 0:
        df.loc[where, 'dil'] = df.loc[where, 'dil0']
    else:
        df.loc[where, 'dil'] = df.loc[where, 'dile']

In [92]:
df = df.drop(columns=['dil0', 'dile'])

In [93]:
# df.to_csv(directory + 'tall_tidy_master.csv'.format(plate), index=False)

# you're done, you can delete the existing tall_tidy sheet and replace it with this master sheet, then edit the name back to tall_tidy if you like