In [1]:
import pandas as pd
import numpy as np
import os.path as osp
import re
import glob
%load_ext dotenv
%dotenv env.sh
%run -m cytokit_nb.keyence

In [2]:
df = pd.read_csv('experiments-base.csv')
df

Unnamed: 0,name,dir
0,0000kT-0uM-np,untreated
1,0000kT-4uM-np,4uMsta
2,0250kT-0uM-np,025mioTcells-nopeptide
3,0500kT-0uM-np,05mioTcells-nopeptide
4,1000kT-0uM-np,1mioTcells-nopeptide
5,2000kT-0uM-np,2mioTcells-nopeptide
6,4000kT-0uM-np,4mioTcells-nopeptide
7,0250kT-0uM-wp,025mioTcells-peptide
8,0500kT-0uM-wp,05mioTcells-peptide
9,1000kT-0uM-wp,1mioTcells-peptide


In [3]:
def get_grids(r):
    grids = glob.glob(osp.join(os.environ['EXP_GROUP_RAW_DIR'], r['dir'], 'XY*')) 
    return [g.split(os.sep)[-1] for g in grids]
    
dfg = df.copy()
dfg['grid'] = df.apply(get_grids, axis=1)

# Stack grid list into rows
dfg = dfg.set_index(['name', 'dir'])['grid'].apply(pd.Series).stack().rename('grid').reset_index(level=-1, drop=True).reset_index()

# Assign replicate number
dfg = dfg.groupby(['name', 'dir'], group_keys=False)\
    .apply(lambda g: g.assign(rep=pd.Categorical(g['grid'], ordered=True, categories=g['grid'].sort_values()).codes + 1))
dfg.head()

Unnamed: 0,name,dir,grid,rep
0,0000kT-0uM-np,untreated,XY01,1
1,0000kT-0uM-np,untreated,XY02,2
2,0000kT-0uM-np,untreated,XY03,3
3,0000kT-4uM-np,4uMsta,XY01,1
4,0000kT-4uM-np,4uMsta,XY02,2


In [4]:
dfg.groupby(['name', 'dir'])['grid'].unique()

name           dir                   
0000kT-0uM-np  untreated                 [XY01, XY02, XY03]
0000kT-4uM-np  4uMsta                    [XY01, XY02, XY03]
0250kT-0uM-np  025mioTcells-nopeptide    [XY01, XY02, XY03]
0250kT-0uM-wp  025mioTcells-peptide      [XY01, XY02, XY03]
0500kT-0uM-np  05mioTcells-nopeptide     [XY01, XY02, XY03]
0500kT-0uM-wp  05mioTcells-peptide       [XY01, XY02, XY03]
1000kT-0uM-np  1mioTcells-nopeptide      [XY01, XY02, XY03]
1000kT-0uM-wp  1mioTcells-peptide        [XY01, XY02, XY03]
2000kT-0uM-np  2mioTcells-nopeptide      [XY01, XY02, XY03]
2000kT-0uM-wp  2mioTcells-peptide        [XY01, XY02, XY03]
4000kT-0uM-np  4mioTcells-nopeptide      [XY01, XY02, XY03]
4000kT-0uM-wp  4mioTcells-peptide        [XY01, XY02, XY03]
Name: grid, dtype: object

In [5]:
dfg.groupby(['name', 'dir'])['rep'].unique()

name           dir                   
0000kT-0uM-np  untreated                 [1, 2, 3]
0000kT-4uM-np  4uMsta                    [1, 2, 3]
0250kT-0uM-np  025mioTcells-nopeptide    [1, 2, 3]
0250kT-0uM-wp  025mioTcells-peptide      [1, 2, 3]
0500kT-0uM-np  05mioTcells-nopeptide     [1, 2, 3]
0500kT-0uM-wp  05mioTcells-peptide       [1, 2, 3]
1000kT-0uM-np  1mioTcells-nopeptide      [1, 2, 3]
1000kT-0uM-wp  1mioTcells-peptide        [1, 2, 3]
2000kT-0uM-np  2mioTcells-nopeptide      [1, 2, 3]
2000kT-0uM-wp  2mioTcells-peptide        [1, 2, 3]
4000kT-0uM-np  4mioTcells-nopeptide      [1, 2, 3]
4000kT-0uM-wp  4mioTcells-peptide        [1, 2, 3]
Name: rep, dtype: object

In [6]:

def get_row(r, info, base_conf):
    return r.append(pd.Series({
        'z_pitch': info['z_pitch'].iloc[0],
        'n_tiles': info['tile'].nunique(),
        'n_z': info['z'].nunique(),
        'n_ch': info['ch'].nunique(),
        'chs': tuple(sorted(info['ch'].unique())),
        'conf': 'experiment.yaml'
    })).sort_index()

def add_info(r):
    r = r.copy()
    path = osp.join(os.environ['EXP_GROUP_RAW_DIR'], r['dir'], r['grid'])
    info = analyze_keyence_dataset(path)
    base_conf = 'experiment.yaml'
    return get_row(r, info, base_conf)

dfi = pd.DataFrame([
    add_info(r)
    for _, r in dfg.iterrows()
])
dfi.head()

Unnamed: 0,chs,conf,dir,grid,n_ch,n_tiles,n_z,name,rep,z_pitch
0,"(1, 2, 3)",experiment.yaml,untreated,XY01,3,33,9,0000kT-0uM-np,1,200
1,"(1, 2, 3)",experiment.yaml,untreated,XY02,3,33,9,0000kT-0uM-np,2,200
2,"(1, 2, 3)",experiment.yaml,untreated,XY03,3,33,9,0000kT-0uM-np,3,200
3,"(1, 2, 3)",experiment.yaml,4uMsta,XY01,3,33,8,0000kT-4uM-np,1,200
4,"(1, 2, 3)",experiment.yaml,4uMsta,XY02,3,33,7,0000kT-4uM-np,2,200


In [7]:
dfi['n_tiles'].unique()

array([33])

In [8]:
dfi['z_pitch'].unique()

array([200])

In [9]:
dfi['chs'].unique()

array([('1', '2', '3')], dtype=object)

In [10]:
assert dfi['chs'].nunique() == 1
assert dfi['n_ch'].nunique() == 1
assert dfi['z_pitch'].nunique() == 1
assert dfi['n_tiles'].nunique() == 1

In [11]:
dfe = dfi.copy()
dfe = dfe.rename(columns={'name': 'cond'})
dfe = dfe.drop('chs', axis=1)
dfe.insert(0, 'name', dfe.apply(lambda r: '{}-{}-{}'.format(r['cond'], r['grid'], r['rep']), axis=1))
dfe.head()

Unnamed: 0,name,conf,dir,grid,n_ch,n_tiles,n_z,cond,rep,z_pitch
0,0000kT-0uM-np-XY01-1,experiment.yaml,untreated,XY01,3,33,9,0000kT-0uM-np,1,200
1,0000kT-0uM-np-XY02-2,experiment.yaml,untreated,XY02,3,33,9,0000kT-0uM-np,2,200
2,0000kT-0uM-np-XY03-3,experiment.yaml,untreated,XY03,3,33,9,0000kT-0uM-np,3,200
3,0000kT-4uM-np-XY01-1,experiment.yaml,4uMsta,XY01,3,33,8,0000kT-4uM-np,1,200
4,0000kT-4uM-np-XY02-2,experiment.yaml,4uMsta,XY02,3,33,7,0000kT-4uM-np,2,200


In [12]:
dfe

Unnamed: 0,name,conf,dir,grid,n_ch,n_tiles,n_z,cond,rep,z_pitch
0,0000kT-0uM-np-XY01-1,experiment.yaml,untreated,XY01,3,33,9,0000kT-0uM-np,1,200
1,0000kT-0uM-np-XY02-2,experiment.yaml,untreated,XY02,3,33,9,0000kT-0uM-np,2,200
2,0000kT-0uM-np-XY03-3,experiment.yaml,untreated,XY03,3,33,9,0000kT-0uM-np,3,200
3,0000kT-4uM-np-XY01-1,experiment.yaml,4uMsta,XY01,3,33,8,0000kT-4uM-np,1,200
4,0000kT-4uM-np-XY02-2,experiment.yaml,4uMsta,XY02,3,33,7,0000kT-4uM-np,2,200
5,0000kT-4uM-np-XY03-3,experiment.yaml,4uMsta,XY03,3,33,8,0000kT-4uM-np,3,200
6,0250kT-0uM-np-XY01-1,experiment.yaml,025mioTcells-nopeptide,XY01,3,33,7,0250kT-0uM-np,1,200
7,0250kT-0uM-np-XY02-2,experiment.yaml,025mioTcells-nopeptide,XY02,3,33,8,0250kT-0uM-np,2,200
8,0250kT-0uM-np-XY03-3,experiment.yaml,025mioTcells-nopeptide,XY03,3,33,8,0250kT-0uM-np,3,200
9,0250kT-0uM-wp-XY01-1,experiment.yaml,025mioTcells-peptide,XY01,3,33,7,0250kT-0uM-wp,1,200


In [13]:
dfe['name'].value_counts().sort_values().tail()

1000kT-0uM-np-XY01-1    1
0000kT-0uM-np-XY02-2    1
0000kT-0uM-np-XY03-3    1
0000kT-4uM-np-XY01-1    1
2000kT-0uM-wp-XY03-3    1
Name: name, dtype: int64

In [14]:
assert dfe['name'].is_unique

In [15]:
dfe.to_csv('experiments.csv', index=False)