In [1]:
import os
import re
import json
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
from scipy import ndimage
from scipy import optimize 
from sklearn.preprocessing import MinMaxScaler, PowerTransformer

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import utils

def scatter(w, k):
    return k * (1 / w**4)

def r_squared(y, y_hat):
    residuals = y - y_hat
    ss_res = np.sum(residuals**2)
    ss_tot = np.sum((y-np.mean(y))**2)
    r_squared = 1 - (ss_res / ss_tot)
    return r_squared


root_dir_walk = os.walk('..')

all_traces_summaries = []


for dirpath, dirnames, filenames in root_dir_walk:
    if (e := re.search('\.\./([0-9]+)', dirpath)) is not None:
        experiment_number = int(e.group(1))
        data = {}
        if 1 <= experiment_number <= 22:
            #if experiment_number not in experiment_configs.keys():
            for filename in filenames:
                fpath = os.path.join(dirpath, filename)
                if re.search('experiment-[0-9]+-per-well-summary\.csv', filename) is not None:
                    all_traces_summaries.append(os.path.join(dirpath, filename))

df  = pd.concat([pd.read_csv(i) for i in all_traces_summaries]).reset_index(drop=True)
df

Unnamed: 0,220,221,222,223,224,225,226,227,228,229,...,percent_dmso,protein_volume,concentrations,Kpi /mM,Kcl/mM,BSA,well_volume,bulk_dispensing,experiments,column
0,1.768,2.018,3.474,2.598,2.163,2.348,2.801,2.712,2.877,2.718,...,,,,,,,,,,
1,2.050,2.108,2.286,3.136,2.672,2.698,2.888,2.613,2.821,2.892,...,,,,,,,,,,
2,1.986,2.156,2.665,2.758,2.092,2.319,2.543,2.630,2.809,2.725,...,,,,,,,,,,
3,2.476,2.908,2.548,2.767,2.409,2.532,2.926,2.864,3.415,3.472,...,,,,,,,,,,
4,2.014,3.480,3.480,3.017,2.454,2.746,3.480,3.480,3.480,3.348,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13003,2.211,2.366,2.422,2.159,2.193,2.359,2.651,3.478,2.860,2.968,...,,,,,,,,,,
13004,2.190,3.049,2.650,2.433,2.372,3.235,2.877,2.839,3.480,3.119,...,,,,,,,,,,
13005,2.247,2.470,3.483,2.897,2.539,3.483,3.320,3.248,3.483,3.483,...,,,,,,,,,,
13006,2.384,2.677,3.478,3.171,3.165,2.887,3.079,3.478,2.653,2.842,...,,,,,,,,,,


In [5]:
df['protein_volume'].dropna()

2128     20.0
2129     20.0
2130     20.0
2131     20.0
2132     20.0
         ... 
12859    40.0
12860    40.0
12861    40.0
12862    40.0
12863    40.0
Name: protein_volume, Length: 7040, dtype: float64

In [4]:
df[[i for i in df.columns if not i.isnumeric()]]

Unnamed: 0,experiment_number,protein_concentration,plate_type,file,ligand,concentration,control,address,protein_days_thawed,hours_before_reading,...,percent_dmso,protein_volume,concentrations,Kpi /mM,Kcl/mM,BSA,well_volume,bulk_dispensing,experiments,column
0,5,4.01,Corning 3660,20190619_boi.CSV,,500.0000,False,A1,,,...,,,,,,,,,,
1,5,4.01,Corning 3660,20190619_boi.CSV,,250.0000,False,C1,,,...,,,,,,,,,,
2,5,4.01,Corning 3660,20190619_boi.CSV,,125.0000,False,E1,,,...,,,,,,,,,,
3,5,4.01,Corning 3660,20190619_boi.CSV,,62.5000,False,G1,,,...,,,,,,,,,,
4,5,4.01,Corning 3660,20190619_boi.CSV,,31.2500,False,I1,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13003,9,15.14,Corning 3660,20190701.CSV,4-Phenylimidazole,62.5000,True,H9,5.0,0.0,...,,,,,,,,,,
13004,9,15.14,Corning 3660,20190701.CSV,4-Phenylimidazole,31.2500,True,J9,5.0,0.0,...,,,,,,,,,,
13005,9,15.14,Corning 3660,20190701.CSV,4-Phenylimidazole,15.6250,True,L9,5.0,0.0,...,,,,,,,,,,
13006,9,15.14,Corning 3660,20190701.CSV,4-Phenylimidazole,7.8125,True,N9,5.0,0.0,...,,,,,,,,,,


In [6]:
print(f"{len(df['protein_volume'].dropna()) / len(df):2%}")
print(f"{len(df['well_volume'].dropna()) / len(df):2%}")

54.120541%
11.808118%
