In [50]:
import pandas as pd
import numpy as np

## open data

In [51]:
directory = "C:/Users/geeze/Box/biocircuits/Reed/projects/DARPA_biocon/Task 1.1/A+pTet-ccdA/20200710 capnresc 4 small ind sets/ind set 1/raw t5 spots files/"

filename = "20200902 cap n resc 4 set 1 v2 t5 spots.xlsx"

data = pd.read_excel(directory + filename, sheet_name=None)

In [52]:
data.keys()

dict_keys(['1', '2', '3', '4', '5', '6', 'IDs'])

## get columns and rows named correctly

In [53]:
plates = [x for x in data.keys() if 'IDs' not in x]

In [54]:
processed_plates = []
for pid in plates:
    #get plate
    df = data[pid]

    #move the letter index in as column so you get numerical index for reference
    # df = df.reset_index()

    #rename for clarity

    #make generic for different plate types
    rename = []
    for col in df.columns.tolist():
        if isinstance(col, str):
            if "Unnamed" in col:
                rename.append(col)

    df = df.rename(columns={rename[0]: 'channel', 'index': 'row'})

    #reorder columns
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]

    df = df[cols]



    # rows aren't all named with the row letter, fill the NaNs with the letter
    df['row'] = df['row'].fillna(method='ffill')

    #make the channel names easier
    split_channel_names = df['channel'].str.split(':', expand=True)

    df['ch'] = split_channel_names[0]

    #put the channel names where they're easy to see
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]

    df = df[cols]

    #if there are overflow values, replace them

    df = df.replace({'OVRFLW': 99999})
    
    processed_plates.append(df)

In [55]:
processed_plates[0]

Unnamed: 0,ch,channel,row,1,2,3,4,5,6,7,...,15,16,17,18,19,20,21,22,23,24
0,OD,OD:700,A,0.06,0.049,0.053,0.043,0.04,0.414,0.051,...,0.538,0.883,0.042,0.041,0.039,0.59,0.442,0.07,0.04,0.046
1,OD,OD:600,A,0.069,0.057,0.061,0.05,0.047,0.514,0.061,...,0.66,1.054,0.05,0.049,0.047,0.716,0.549,0.079,0.048,0.056
2,OD,OD:700,B,0.461,0.052,0.44,0.371,0.04,0.56,0.802,...,0.039,0.039,0.502,0.046,0.054,0.039,0.042,0.418,0.053,0.706
3,OD,OD:600,B,0.57,0.059,0.534,0.453,0.047,0.691,0.969,...,0.047,0.046,0.605,0.055,0.065,0.046,0.049,0.518,0.062,0.852
4,OD,OD:700,C,1.084,0.04,0.04,0.04,0.413,0.072,0.049,...,0.04,0.039,0.039,0.044,0.04,0.04,0.041,0.043,0.453,0.04
5,OD,OD:600,C,1.246,0.048,0.048,0.046,0.507,0.084,0.058,...,0.048,0.047,0.046,0.053,0.048,0.047,0.048,0.051,0.555,0.048
6,OD,OD:700,D,0.043,0.04,0.89,0.04,0.445,0.403,0.039,...,0.041,0.04,0.045,0.041,0.044,0.04,0.041,0.464,0.052,0.039
7,OD,OD:600,D,0.049,0.048,1.062,0.047,0.549,0.497,0.047,...,0.05,0.047,0.052,0.048,0.052,0.047,0.048,0.561,0.064,0.046
8,OD,OD:700,E,0.041,0.04,0.041,0.039,0.04,0.04,0.04,...,0.039,0.063,0.469,0.442,0.042,0.04,0.786,0.077,0.041,0.054
9,OD,OD:600,E,0.048,0.047,0.048,0.046,0.047,0.049,0.047,...,0.047,0.071,0.579,0.541,0.05,0.047,0.961,0.087,0.049,0.066


## break it into long form

In [56]:
long_formed = []
for plate in processed_plates:
    #melt the data into long form, but I still want to pivot up the channels to be their own columns
    df = pd.melt(plate.drop(columns = 'ch'), id_vars=['channel', 'row'], var_name='col').sort_values(by=['row', 'col'])
    
    #go through the index of the df and move the channels and values to their own columns
    new_rows = []
    for i in df.index:
        ch = df.loc[i, 'channel']
        val = df.loc[i, 'value']

        l = df.loc[i, ['row', 'col']]
        l[ch] = val

        new_rows.append(pd.DataFrame(l).T)

    df = pd.concat(new_rows)
    
    #fill all the NaNs from the value/channel pivoting, then remove duplicates to get just a single row per well
    #with all the relevant measurements in their own columns
    new_rows = []
    for i in df.index:
        r = df.loc[i, 'row']
        c = df.loc[i, 'col']

        ir = df['row'] == r
        ic = df['col'] == c
        ind = ir&ic

        d = df.loc[ind]
        #this assumes that each channel column will have just a single value among NaNs, which should always be true
        d = d.fillna(method='ffill').fillna(method='bfill') #just fill everything

        new_rows.append(d)

    df = pd.concat(new_rows)

    df = df.loc[~df.duplicated()]
    
    long_formed.append(df)

In [57]:
#associate plate numbers to the new tables
plate_nums = [1,2,3,4,5,6]

for plate,num in zip(long_formed, plate_nums):
    plate['plate'] = num    

## make a single table from all tidy ones

In [58]:
df = pd.concat(long_formed)

## associate the ID's

In [59]:
ids = data['IDs']

In [60]:
stuff_to_add = ['dil', 'sample dil', 'well']
stuff_to_check = ['row', 'plate']

for i in ids.index:
    check = ids.loc[i, stuff_to_check]

    add = ids.loc[i, stuff_to_add]


    #gets multiple different indices, you want where they are all True
    z = df.loc[:,check.index] == check.values
    #this gets where they are all True
    composite_index = z.all(bool_only=True, axis='columns')

    #get the data you want, then assign the id information
    for x in add.index:
        df.loc[composite_index, x] = add[x]

In [61]:
df

Unnamed: 0,row,col,OD:700,OD:600,plate,dil,sample dil,well
0,A,1,0.060,0.069,1,128000.0,4.0,A4
32,A,2,0.049,0.057,1,128000.0,4.0,A4
64,A,3,0.053,0.061,1,128000.0,4.0,A4
96,A,4,0.043,0.050,1,128000.0,4.0,A4
128,A,5,0.040,0.047,1,128000.0,4.0,A4
...,...,...,...,...,...,...,...,...
638,P,20,0.041,0.048,6,1280000.0,4.0,D6
670,P,21,0.042,0.049,6,1280000.0,4.0,D6
702,P,22,0.041,0.048,6,1280000.0,4.0,D6
734,P,23,0.075,0.091,6,1280000.0,4.0,D6


In [62]:
#see if some of the channels you processed are OD, these don't get made into columns with good column names, fix them
od_cols = [x for x in df.columns if 'OD' in x]

#checks if its empty, returns false if od_cols is empty
if od_cols:
    
    rename_dict = {x : x.replace(':', '') for x in od_cols}
    
    df.rename(columns = rename_dict, inplace=True)

In [63]:
from openpyxl import load_workbook

path = directory + filename

book = load_workbook(path)
writer = pd.ExcelWriter(path, engine = 'openpyxl')
writer.book = book

df.to_excel(writer, sheet_name = 'all_tidy', index=False)


writer.save()
writer.close()