# Input EDA
Exploratory Data Analysis of the Input Files

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import regex

print(pd.__version__)

In [None]:
# TODO: find a way to automatically pair input with output folder

inputdir_0 = 'data/input/'

sampledf_files = glob.glob(os.path.join(inputdir_0, '**/sampledataframe*.csv'), recursive=True)
solution_files = glob.glob(os.path.join(inputdir_0, '**/stocksolutions*.csv'), recursive=True)
print(sampledf_files)
print(solution_files)

# place all sample input file data into one dataframe
raw_sampledf = None
for f in sampledf_files:
    filename = f[len(inputdir_0):]
    batchname = os.path.dirname(f)[len(inputdir_0):]
    file_df = pd.read_csv(f)
    file_df.insert(0, 'file', filename, allow_duplicates=True) # record the file this data came from
    file_df.insert(1, 'batch', batchname, allow_duplicates=True) # record the batch this data came from
    raw_sampledf = pd.concat([raw_sampledf, file_df])

# place all solution input file data into one dataframe
raw_solutiondf = None
for f in solution_files:
    filename = f[len(inputdir_0):]
    batchname = os.path.dirname(f)[len(inputdir_0):]
    file_df = pd.read_csv(f)
    file_df.insert(0, 'file', filename, allow_duplicates=True) # record the file this data came from
    file_df.insert(1, 'batch', batchname, allow_duplicates=True) # record the batch this data came from
    raw_solutiondf = pd.concat([raw_solutiondf, file_df])

# Analyze SampleDataframe

In [None]:
raw_sampledf

In [None]:
# first, reset index and fix columns
sample_input_df = ( raw_sampledf.reset_index()
    .rename(columns={'index': 'sample_num'})
    .drop(columns='Unnamed: 0')
)
sample_input_df

In [None]:
sample_input_df.columns

In [None]:
# see number of samples in each batch
pd.set_option('max_columns', None)
sample_input_df.groupby('batch').apply(lambda group: group.count())

In [None]:
# now see number of unique parameters over all batches
pd.set_option('max_rows', 100)
sample_input_df.apply(lambda group: group.unique()).to_dict()

In [None]:
# get each batch's unique spincoating parameters
x = sample_input_df.filter(like='spincoat')
x.loc[:, 'batch'] = sample_input_df['batch']
pd.DataFrame(x.groupby('batch').apply(lambda group: group.apply(
    lambda col: col.unique()
))[0])

Some batches have multiple spincoats.

We create a column to count how many spincoats a sample undergoes.

In [None]:
# determine number of spincoats for each batch/task
# if a row does not have spincoat num i, it will have an na in the column "spincoat{i}_drop0_solvent_dict"
spin_cols = [c for c in sample_input_df.columns if 'spincoat' in c and '_drop0_solvent_dict' in c]
print(spin_cols)
sample_input_df['spincoat_count'] = sample_input_df.apply(
    lambda row : row[spin_cols].notna().sum(), axis=1)
pd.reset_option('max_rows')
sample_input_df

In [None]:
sample_input_df.groupby('batch').apply(lambda group: group['spincoat_count'].unique())

In [None]:
# group by number of spincoats and first solute/solvent used
pd.set_option('max_rows', None)
pd.DataFrame(sample_input_df.groupby(
    ['spincoat_count', # outer group by number of spincoats
    'spincoat0_drop1_solvent_dict', # group by antisolvent (not many different ones)
    'spincoat0_drop0_solvent_dict', 'spincoat0_drop0_solutes_dict', # then group by first solution
    'batch'], #then give batch name
    dropna=False
    ).size().rename('num_samples'))

In [None]:
# for batches with more than one spincoat, show them individually
# 2 spincoat batches 

spins_2 = pd.DataFrame(sample_input_df[sample_input_df['spincoat_count'] == 2]
    .groupby(['spincoat1_drop1_solvent_dict', # first group by antisolvent
        'spincoat1_drop0_solvent_dict', 'spincoat1_drop0_solutes_dict', # then by actual film solution
        'spincoat0_drop0_solvent_dict', 'spincoat0_drop0_solutes_dict',
        'batch'],
        dropna=False)
    .size().rename('num_samples')
)
spins_2

In [None]:
# 3 spincoat batches 

spins_3 = sample_input_df[sample_input_df['spincoat_count'] == 3]
count = spins_3.groupby('batch').size().rename('num_samples')
spins_3 = spins_3.join(count, on='batch')
spins_3 = spins_3.drop_duplicates(spins_3.filter(like='spincoat').columns.to_list())
spins_3

In [None]:
# 5 spincoat batches 

spins_5 = sample_input_df[sample_input_df['spincoat_count'] == 5]
count = spins_5.groupby('batch').size().rename('num_samples')
spins_5 = spins_5.join(count, on='batch')
spins_5 = spins_5.drop_duplicates(spins_5.filter(like='spincoat').columns.to_list())
spins_5

# Analyze StockSolutions

In [None]:
raw_solutiondf.head()

In [None]:
solution_input_df = raw_solutiondf.copy()

In [None]:
# see number of unique parameters over all batches
solution_input_df.apply(lambda col: col.unique()).to_dict()

In [None]:
# see the solutions used for each batch
solution_input_df.set_index(['Solvent', 'Solutes', 'batch']).sort_index().drop(columns='file')

# Analyze Maestro Netlist

In [None]:
import json

maestro_files = glob.glob(os.path.join(inputdir_0, '**/maestronetlist*.json'), recursive=True)
print(maestro_files)

# place all maestro input file data into one dataframe
raw_maestrodf = None
for f in maestro_files:
    filename = f[len(inputdir_0):]
    batchname = os.path.dirname(f)[len(inputdir_0):]
    file_df = pd.read_json(f)
    file_df.insert(0, 'file', filename, allow_duplicates=True) # record the file this data came from
    file_df.insert(1, 'batch', batchname, allow_duplicates=True) # record the batch this data came from
    raw_maestrodf = pd.concat([raw_maestrodf, file_df])


In [117]:
pd.reset_option('max_rows')
raw_maestrodf.head(10)

Unnamed: 0,file,batch,baselines_required,description,hotplate_setpoints,name,samples
Brightfield,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,[0.05],Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
Darkfield,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,[0.05],Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
PL_635nm,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,"[0.1, 20, 5]",Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
Transmission,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,"[0.2, 1, 0.05, 5, 15, 0.02]",Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
Hotplate1,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,100.0,WBG Molecular Sieve Dried Solvent,
sample0,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample0', 'storage_slot': {'slot': '..."
sample1,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample1', 'storage_slot': {'slot': '..."
sample2,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample2', 'storage_slot': {'slot': '..."
sample3,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample3', 'storage_slot': {'slot': '..."
sample4,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample4', 'storage_slot': {'slot': '..."


In [118]:
maestro_input_df = raw_maestrodf.reset_index().rename(columns={'index': 'task'})
maestro_input_df.head(10)

Unnamed: 0,task,file,batch,baselines_required,description,hotplate_setpoints,name,samples
0,Brightfield,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,[0.05],Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
1,Darkfield,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,[0.05],Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
2,PL_635nm,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,"[0.1, 20, 5]",Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
3,Transmission,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,"[0.2, 1, 0.05, 5, 15, 0.02]",Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
4,Hotplate1,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,100.0,WBG Molecular Sieve Dried Solvent,
5,sample0,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample0', 'storage_slot': {'slot': '..."
6,sample1,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample1', 'storage_slot': {'slot': '..."
7,sample2,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample2', 'storage_slot': {'slot': '..."
8,sample3,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample3', 'storage_slot': {'slot': '..."
9,sample4,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample4', 'storage_slot': {'slot': '..."


In [119]:
maestro_input_df.columns

Index(['task', 'file', 'batch', 'baselines_required', 'description',
       'hotplate_setpoints', 'name', 'samples'],
      dtype='object')

In [120]:
# see unique parameters
#need to cast to prevent unhashable type errors for pd.unique
maestro_input_df.apply(lambda col: col.astype(str).unique()).to_dict()

{'task': array(['Brightfield', 'Darkfield', 'PL_635nm', 'Transmission',
        'Hotplate1', 'sample0', 'sample1', 'sample2', 'sample3', 'sample4',
        'sample5', 'sample10', 'sample11', 'sample12', 'sample13',
        'sample14', 'sample15', 'sample16', 'sample17', 'sample18',
        'sample19', 'sample20', 'sample21', 'sample22', 'sample23',
        'sample24', 'sample25', 'sample26', 'sample27', 'sample28',
        'sample29', 'sample30', 'sample31', 'sample32', 'sample33',
        'sample34', 'sample35', 'sample36', 'sample37', 'sample38',
        'sample39', 'sample40', 'sample41', 'sample6', 'sample7',
        'sample8', 'sample9', 'PLImaging', 'Hotplate2', 'Hotplate3',
        'sample42', 'sample43', 'sample44', 'Photostability_405nm',
        'sample45', 'sample46', 'sample47', 'sample48', 'sample49',
        'sample50', 'sample51', 'sample52', 'sample53', 'sample54',
        'sample55', 'sample56', 'sample57', 'sample58', 'sample59'],
       dtype=object),
 'file': array(

In [121]:
# see hotplates
maestro_input_df[maestro_input_df['hotplate_setpoints'].notna()].set_index('batch')

Unnamed: 0_level_0,task,file,baselines_required,description,hotplate_setpoints,name,samples
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
20220414_Film_PL_Check,Hotplate1,20220414_Film_PL_Check/maestronetlist_WBG Mole...,,Moses prepped same nominal solution with bead ...,100.0,WBG Molecular Sieve Dried Solvent,
20220428_Excess_PbI2,Hotplate1,20220428_Excess_PbI2/maestronetlist_PbX2 Loadi...,,Purpose of this experiment is to optimize the ...,100.0,PbX2 Loading Optimization,
20220502_Excess_PbI2,Hotplate1,20220502_Excess_PbI2/maestronetlist_PbX2 Loadi...,,Drop conditions,100.0,PbX2 Loading Optimization,
20220503_Drop_optimization,Hotplate1,20220503_Drop_optimization/maestronetlist_PbX2...,,Drop conditions,100.0,PbX2 Loading Optimization,
20220504_PIN_Half_Cells,Hotplate1,20220504_PIN_Half_Cells/maestronetlist_2022050...,,Half-cells for Apoorva,100.0,20220504_PIN_Half_Cells,
20220505_Annealing_Optimization,Hotplate1,20220505_Annealing_Optimization/maestronetlist...,,Annealing optimization on thin films,100.0,20220505_Annealing_Optimization,
20220505_Annealing_Optimization,Hotplate2,20220505_Annealing_Optimization/maestronetlist...,,Annealing optimization on thin films,115.0,20220505_Annealing_Optimization,
20220505_Annealing_Optimization,Hotplate3,20220505_Annealing_Optimization/maestronetlist...,,Annealing optimization on thin films,130.0,20220505_Annealing_Optimization,
20220510_3xHalide_MACl_films,Hotplate1,20220510_3xHalide_MACl_films/maestronetlist_20...,,Substituting MAI with MACl,100.0,20220510_3xHalide_MACl_films,
20220510_3xHalide_MACl_films,Hotplate2,20220510_3xHalide_MACl_films/maestronetlist_20...,,Substituting MAI with MACl,115.0,20220510_3xHalide_MACl_films,


In [122]:
# see characterization
a = maestro_input_df.set_index(['batch']).sort_index()
(
    a[a['task'].isin(['Brightfield', 'Darkfield', 'PL_635nm', 'Transmission'])]
)

Unnamed: 0_level_0,task,file,baselines_required,description,hotplate_setpoints,name,samples
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
20220414_Film_PL_Check,Brightfield,20220414_Film_PL_Check/maestronetlist_WBG Mole...,[0.05],Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
20220414_Film_PL_Check,Darkfield,20220414_Film_PL_Check/maestronetlist_WBG Mole...,[0.05],Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
20220414_Film_PL_Check,PL_635nm,20220414_Film_PL_Check/maestronetlist_WBG Mole...,"[0.1, 20, 5]",Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
20220414_Film_PL_Check,Transmission,20220414_Film_PL_Check/maestronetlist_WBG Mole...,"[0.2, 1, 0.05, 5, 15, 0.02]",Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,
20220428_Excess_PbI2,Brightfield,20220428_Excess_PbI2/maestronetlist_PbX2 Loadi...,[0.05],Purpose of this experiment is to optimize the ...,,PbX2 Loading Optimization,
...,...,...,...,...,...,...,...
20221020_3A3X_B11/B11_2_Char,Transmission,20221020_3A3X_B11/B11_2_Char/maestronetlist_B1...,"[0.2, 1, 0.05, 5, 15, 0.02]",B11,,B11-char,
20221021_XEOL_Rerun/XEOL_rerun_inputs,Brightfield,20221021_XEOL_Rerun/XEOL_rerun_inputs/maestron...,[0.05],XEOL_rerun_inputs,,XEOL_rerun_inputs,
20221021_XEOL_Rerun/XEOL_rerun_inputs,Darkfield,20221021_XEOL_Rerun/XEOL_rerun_inputs/maestron...,[0.05],XEOL_rerun_inputs,,XEOL_rerun_inputs,
20221021_XEOL_Rerun/XEOL_rerun_inputs,PL_635nm,20221021_XEOL_Rerun/XEOL_rerun_inputs/maestron...,"[0.1, 20, 5]",XEOL_rerun_inputs,,XEOL_rerun_inputs,


In [123]:
maestro_input_df[maestro_input_df['samples'].notna()]

Unnamed: 0,task,file,batch,baselines_required,description,hotplate_setpoints,name,samples
5,sample0,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample0', 'storage_slot': {'slot': '..."
6,sample1,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample1', 'storage_slot': {'slot': '..."
7,sample2,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample2', 'storage_slot': {'slot': '..."
8,sample3,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample3', 'storage_slot': {'slot': '..."
9,sample4,20220414_Film_PL_Check/maestronetlist_WBG Mole...,20220414_Film_PL_Check,,Moses prepped same nominal solution with bead ...,,WBG Molecular Sieve Dried Solvent,"{'name': 'sample4', 'storage_slot': {'slot': '..."
...,...,...,...,...,...,...,...,...
1546,sample5,20221021_XEOL_Rerun/XEOL_rerun_inputs/maestron...,20221021_XEOL_Rerun/XEOL_rerun_inputs,,XEOL_rerun_inputs,,XEOL_rerun_inputs,"{'name': 'sample5', 'storage_slot': {'slot': '..."
1547,sample6,20221021_XEOL_Rerun/XEOL_rerun_inputs/maestron...,20221021_XEOL_Rerun/XEOL_rerun_inputs,,XEOL_rerun_inputs,,XEOL_rerun_inputs,"{'name': 'sample6', 'storage_slot': {'slot': '..."
1548,sample7,20221021_XEOL_Rerun/XEOL_rerun_inputs/maestron...,20221021_XEOL_Rerun/XEOL_rerun_inputs,,XEOL_rerun_inputs,,XEOL_rerun_inputs,"{'name': 'sample7', 'storage_slot': {'slot': '..."
1549,sample8,20221021_XEOL_Rerun/XEOL_rerun_inputs/maestron...,20221021_XEOL_Rerun/XEOL_rerun_inputs,,XEOL_rerun_inputs,,XEOL_rerun_inputs,"{'name': 'sample8', 'storage_slot': {'slot': '..."


In [124]:
# view samples json
maestro_samples = maestro_input_df.dropna(subset=['samples'])
index = maestro_samples.index
maestro_samples = pd.json_normalize(maestro_samples['samples'])
maestro_samples.index = index # correct the index
maestro_samples['batch'] = maestro_input_df['batch']
maestro_samples

Unnamed: 0,name,substrate,worklist,storage_slot.slot,storage_slot.tray,batch
5,sample0,glass,[{'details': {'destination': 'SpincoaterLiquid...,A1,Tray1,20220414_Film_PL_Check
6,sample1,glass,[{'details': {'destination': 'SpincoaterLiquid...,A2,Tray1,20220414_Film_PL_Check
7,sample2,glass,[{'details': {'destination': 'SpincoaterLiquid...,A3,Tray1,20220414_Film_PL_Check
8,sample3,glass,[{'details': {'destination': 'SpincoaterLiquid...,A4,Tray1,20220414_Film_PL_Check
9,sample4,glass,[{'details': {'destination': 'SpincoaterLiquid...,A5,Tray1,20220414_Film_PL_Check
...,...,...,...,...,...,...
1546,sample5,Glass,[{'details': {'destination': 'SpincoaterLiquid...,B1,Tray2,20221021_XEOL_Rerun/XEOL_rerun_inputs
1547,sample6,Glass,[{'details': {'destination': 'SpincoaterLiquid...,B2,Tray2,20221021_XEOL_Rerun/XEOL_rerun_inputs
1548,sample7,Glass,[{'details': {'destination': 'SpincoaterLiquid...,B3,Tray2,20221021_XEOL_Rerun/XEOL_rerun_inputs
1549,sample8,Glass,[{'details': {'destination': 'SpincoaterLiquid...,B4,Tray2,20221021_XEOL_Rerun/XEOL_rerun_inputs


In [125]:
# view unique elements
maestro_samples.apply(lambda col: col.astype(str).unique()).to_dict()

{'name': array(['sample0', 'sample1', 'sample2', 'sample3', 'sample4', 'sample5',
        'sample10', 'sample11', 'sample12', 'sample13', 'sample14',
        'sample15', 'sample16', 'sample17', 'sample18', 'sample19',
        'sample20', 'sample21', 'sample22', 'sample23', 'sample24',
        'sample25', 'sample26', 'sample27', 'sample28', 'sample29',
        'sample30', 'sample31', 'sample32', 'sample33', 'sample34',
        'sample35', 'sample36', 'sample37', 'sample38', 'sample39',
        'sample40', 'sample41', 'sample6', 'sample7', 'sample8', 'sample9',
        'sample42', 'sample43', 'sample44', 'sample45', 'sample46',
        'sample47', 'sample48', 'sample49', 'sample50', 'sample51',
        'sample52', 'sample53', 'sample54', 'sample55', 'sample56',
        'sample57', 'sample58', 'sample59'], dtype=object),
 'substrate': array(['glass', 'FTO', 'Glass', 'ITO', 'Si', 'Half-Cells', 'Char only',
        'HTL', 'PSK-pre', 'PSK-post', 'PSK'], dtype=object),
 'worklist': array(["[{

In [126]:
# view number of steps in a worklist
num_steps = maestro_samples['worklist'].apply(lambda row: len(row))
num_steps.name = 'num_steps'
maestro_samples['num_steps'] = num_steps
maestro_samples

Unnamed: 0,name,substrate,worklist,storage_slot.slot,storage_slot.tray,batch,num_steps
5,sample0,glass,[{'details': {'destination': 'SpincoaterLiquid...,A1,Tray1,20220414_Film_PL_Check,9
6,sample1,glass,[{'details': {'destination': 'SpincoaterLiquid...,A2,Tray1,20220414_Film_PL_Check,9
7,sample2,glass,[{'details': {'destination': 'SpincoaterLiquid...,A3,Tray1,20220414_Film_PL_Check,9
8,sample3,glass,[{'details': {'destination': 'SpincoaterLiquid...,A4,Tray1,20220414_Film_PL_Check,9
9,sample4,glass,[{'details': {'destination': 'SpincoaterLiquid...,A5,Tray1,20220414_Film_PL_Check,9
...,...,...,...,...,...,...,...
1546,sample5,Glass,[{'details': {'destination': 'SpincoaterLiquid...,B1,Tray2,20221021_XEOL_Rerun/XEOL_rerun_inputs,9
1547,sample6,Glass,[{'details': {'destination': 'SpincoaterLiquid...,B2,Tray2,20221021_XEOL_Rerun/XEOL_rerun_inputs,9
1548,sample7,Glass,[{'details': {'destination': 'SpincoaterLiquid...,B3,Tray2,20221021_XEOL_Rerun/XEOL_rerun_inputs,9
1549,sample8,Glass,[{'details': {'destination': 'SpincoaterLiquid...,B4,Tray2,20221021_XEOL_Rerun/XEOL_rerun_inputs,6


In [127]:
maestro_worklists = pd.DataFrame([maestro_samples['worklist'], num_steps]).T
maestro_worklists

Unnamed: 0,worklist,num_steps
5,[{'details': {'destination': 'SpincoaterLiquid...,9
6,[{'details': {'destination': 'SpincoaterLiquid...,9
7,[{'details': {'destination': 'SpincoaterLiquid...,9
8,[{'details': {'destination': 'SpincoaterLiquid...,9
9,[{'details': {'destination': 'SpincoaterLiquid...,9
...,...,...
1546,[{'details': {'destination': 'SpincoaterLiquid...,9
1547,[{'details': {'destination': 'SpincoaterLiquid...,9
1548,[{'details': {'destination': 'SpincoaterLiquid...,9
1549,[{'details': {'destination': 'SpincoaterLiquid...,6


In [128]:
# see number of steps
maestro_worklists['num_steps'].unique()

array([9, 15, 7, 3, 33, 21, 5, 13, 6], dtype=object)

In [129]:
# see names of steps
step_names = pd.json_normalize(maestro_worklists['worklist']).apply(lambda row: row.apply(
    lambda step: step['name'] if step is not None else None))
step_names.index = maestro_worklists.index
step_names

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32
5,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,
6,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,
7,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,
8,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,
9,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1546,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,
1547,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,
1548,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,
1549,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [130]:
# see what steps are in what order
pd.DataFrame([step_names.apply(lambda col: col.unique()),
    step_names.apply(lambda col: col.nunique())]).T.rename(columns={0:'unique steps', 1:'num unique'})

Unnamed: 0,unique steps,num unique
0,"[storage_to_spincoater, storage_to_characteriz...",2
1,"[spincoat, characterize]",2
2,"[spincoater_to_hotplate, spincoater_to_storage...",3
3,"[anneal, rest, None]",2
4,"[hotplate_to_storage, storage_to_characterizat...",3
5,"[rest, characterize, None, spincoat]",3
6,"[storage_to_characterization, storage_to_spinc...",4
7,"[characterize, spincoat, None, anneal]",3
8,"[characterization_to_storage, spincoater_to_ho...",3
9,"[None, anneal, rest]",2


In [131]:
# get unique combos of steps
steps_combos = pd.DataFrame(step_names.value_counts(dropna=False).sort_index())
steps_combos

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0,Unnamed: 22_level_0,Unnamed: 23_level_0,Unnamed: 24_level_0,Unnamed: 25_level_0,Unnamed: 26_level_0,Unnamed: 27_level_0,Unnamed: 28_level_0,Unnamed: 29_level_0,Unnamed: 30_level_0,Unnamed: 31_level_0,Unnamed: 32_level_0,0
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,Unnamed: 33_level_1
storage_to_characterization,characterize,characterization_to_storage,rest,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,48
storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,262
storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,,370
storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,250
storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,32
storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,32
storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,rest,,,,,,,,,,,,,,,,,,,,,,,,,,,,280
storage_to_spincoater,spincoat,spincoater_to_hotplate,anneal,hotplate_to_storage,,,,,,,,,,,,,,,,,,,,,,,,,,,,,26
storage_to_spincoater,spincoat,spincoater_to_storage,rest,storage_to_characterization,characterize,characterization_to_storage,,,,,,,,,,,,,,,,,,,,,,,,,,,32


Seems there are only 5 different tasks:
- Spincoating
- Annealing
- Characterization
- Rest
- Transport

### Analyze 1 netlist

In [215]:
idx = 0
sample = maestro_samples['worklist'].iloc[idx]
print(maestro_samples.iloc[idx])
sample

name                                                           sample0
substrate                                                        glass
worklist             [{'details': {'destination': 'SpincoaterLiquid...
storage_slot.slot                                                   A1
storage_slot.tray                                                Tray1
batch                                           20220414_Film_PL_Check
num_steps                                                            9
Name: 5, dtype: object


[{'details': {'destination': 'SpincoaterLiquidhandler', 'source': 'Tray1'},
  'id': 'storage_to_spincoater--19f96bbb-017f-4dcf-9523-12c6863023b2',
  'name': 'storage_to_spincoater',
  'precedent': None,
  'sample': 'sample0',
  'start': 175},
 {'details': {'drops': [{'air_gap': True,
     'blow_out': True,
     'height': 2,
     'pre_mix': [3, 50],
     'rate': 80,
     'reuse_tip': False,
     'slow_retract': True,
     'slow_travel': False,
     'solution': {'molarity': 1.2,
      'solutes': 'FA0.78_Cs0.17_MA0.05_Pb1.09_I2.62_Br0.491_Cl0.164',
      'solvent': 'DMF0.75_DMSO0.25',
      'well': {'labware': '4mL_Tray1', 'well': 'A1'}},
     'time': -1,
     'touch_tip': True,
     'volume': 30},
    {'air_gap': True,
     'blow_out': True,
     'height': 2,
     'pre_mix': [5, 100],
     'rate': 80,
     'reuse_tip': True,
     'slow_retract': True,
     'slow_travel': True,
     'solution': {'molarity': 0,
      'solutes': '',
      'solvent': 'MethylAcetate',
      'well': {'labware'

In [216]:
# the number of steps in the netlist
print(f'Number steps: {len(sample)}')

Number steps: 9


In [217]:
# see structure of netlist
for i, step in enumerate(sample):
    print(i, step.keys())

0 dict_keys(['details', 'id', 'name', 'precedent', 'sample', 'start'])
1 dict_keys(['details', 'id', 'name', 'precedent', 'sample', 'start'])
2 dict_keys(['details', 'id', 'name', 'precedent', 'sample', 'start'])
3 dict_keys(['details', 'id', 'name', 'precedent', 'sample', 'start'])
4 dict_keys(['details', 'id', 'name', 'precedent', 'sample', 'start'])
5 dict_keys(['details', 'id', 'name', 'precedent', 'sample', 'start'])
6 dict_keys(['details', 'id', 'name', 'precedent', 'sample', 'start'])
7 dict_keys(['details', 'duration', 'id', 'name', 'precedent', 'sample', 'start'])
8 dict_keys(['details', 'id', 'name', 'precedent', 'sample', 'start'])


In [218]:
from pprint import pprint # pretty print

for i, step in enumerate(sample):
    print(i, step['name']) # step number and name
    print(f"{step['precedent']}\n=> {step['id']}") # the step id before to current step id
    print(f"Sample: {step['sample']}")
    print(f"Start: {step['start']}") # start time
    pprint(step['details']) # details dictionary
    print('-' * 50)

0 storage_to_spincoater
None
=> storage_to_spincoater--19f96bbb-017f-4dcf-9523-12c6863023b2
Sample: sample0
Start: 175
{'destination': 'SpincoaterLiquidhandler', 'source': 'Tray1'}
--------------------------------------------------
1 spincoat
storage_to_spincoater--19f96bbb-017f-4dcf-9523-12c6863023b2
=> spincoat-d8f53558-2f4c-4c87-bf0b-d2f5f29de642
Sample: sample0
Start: 208
{'drops': [{'air_gap': True,
            'blow_out': True,
            'height': 2,
            'pre_mix': [3, 50],
            'rate': 80,
            'reuse_tip': False,
            'slow_retract': True,
            'slow_travel': False,
            'solution': {'molarity': 1.2,
                         'solutes': 'FA0.78_Cs0.17_MA0.05_Pb1.09_I2.62_Br0.491_Cl0.164',
                         'solvent': 'DMF0.75_DMSO0.25',
                         'well': {'labware': '4mL_Tray1', 'well': 'A1'}},
            'time': -1,
            'touch_tip': True,
            'volume': 30},
           {'air_gap': True,
         

#### Parse Spincoating Step

In [219]:
# now try parsing spincoat step details
# NOTE: seems like index 1270 confirms that "steps" array does not correspond to "drops" array
spincoat = sample[1]
details_df = pd.DataFrame({k:v for k,v in spincoat['details'].items() if k not in ['drops']})
details_df

Unnamed: 0,duration,start_times,steps
0,115,1.0,"{'acceleration': 200.0, 'duration': 5.0, 'rpm'..."
1,115,6.0,"{'acceleration': 2000.0, 'duration': 50.0, 'rp..."


In [220]:
# figure out if each step corresponds to a drop or if that's separate
print(f"Num Drops: {len(spincoat['details']['drops'])}")
drops_df = pd.DataFrame(spincoat['details']['drops'])
drops_df

Num Drops: 2


Unnamed: 0,air_gap,blow_out,height,pre_mix,rate,reuse_tip,slow_retract,slow_travel,solution,time,touch_tip,volume
0,True,True,2,"[3, 50]",80,False,True,False,"{'molarity': 1.2, 'solutes': 'FA0.78_Cs0.17_MA...",-1,True,30
1,True,True,2,"[5, 100]",80,True,True,True,"{'molarity': 0, 'solutes': '', 'solvent': 'Met...",33,False,100


#### Parse Annealing Step

In [221]:
anneal = sample[3]
anneal

{'details': {'duration': 1800, 'hotplate': 'Hotplate1', 'temperature': 100},
 'id': 'anneal-898ed138-93e4-4353-8a00-cb41d89c924d',
 'name': 'anneal',
 'precedent': 'spincoater_to_hotplate--56a24987-c104-4af0-a9d6-6bdbbd7b076d',
 'sample': 'sample0',
 'start': 350}

In [222]:
anneal_df = pd.DataFrame(anneal['details'], index=[0])
anneal_df

Unnamed: 0,duration,hotplate,temperature
0,1800,Hotplate1,100


#### Parse Characterization Step

In [227]:
char = sample[7]
char

{'details': {'characterization_tasks': [{'details': {'exposure_times': [0.1,
      5,
      20],
     'num_scans': 1},
    'duration': 25.32,
    'name': 'PL_635nm',
    'position': 287,
    'station': 'pl_red'},
   {'details': {'exposure_times': [0.02, 0.05, 0.2, 1, 5, 15], 'num_scans': 2},
    'duration': 46.54,
    'name': 'Transmission',
    'position': 287,
    'station': 'transmission'},
   {'details': {'exposure_time': 0.05, 'num_frames': 50},
    'duration': 2.5,
    'name': 'Darkfield',
    'position': 74.2,
    'station': 'darkfield'},
   {'details': {'exposure_time': 0.05, 'num_frames': 1},
    'duration': 0.05,
    'name': 'Brightfield',
    'position': 164.4,
    'station': 'brightfield'}]},
 'duration': 124,
 'id': 'characterize-0d452bf3-b792-496e-9926-7ef2a2488abe',
 'name': 'characterize',
 'precedent': 'storage_to_characterization--2008847a-40a8-4726-9a09-ec3dc5cfd41b',
 'sample': 'sample0',
 'start': 2483}

In [228]:
# seems spectra characterization has "num_scans"
# while image characterization has "num_frames"

tasks = char['details']['characterization_tasks']
print(f'Num Characterization Tasks: {len(tasks)}')
tasks_df = pd.DataFrame(tasks)
tasks_df

Num Characterization Tasks: 4


Unnamed: 0,details,duration,name,position,station
0,"{'exposure_times': [0.1, 5, 20], 'num_scans': 1}",25.32,PL_635nm,287.0,pl_red
1,"{'exposure_times': [0.02, 0.05, 0.2, 1, 5, 15]...",46.54,Transmission,287.0,transmission
2,"{'exposure_time': 0.05, 'num_frames': 50}",2.5,Darkfield,74.2,darkfield
3,"{'exposure_time': 0.05, 'num_frames': 1}",0.05,Brightfield,164.4,brightfield


#### Parse Rest Step

In [230]:
rest = sample[5]
rest

{'details': {'duration': 300},
 'id': 'rest-a781ff5d-4550-4312-a05e-18bb8df54729',
 'name': 'rest',
 'precedent': 'hotplate_to_storage--1157a2b7-3872-4c32-b72b-e0e00f87be7d',
 'sample': 'sample0',
 'start': 2168}

In [233]:
rest_df = pd.DataFrame(rest)
rest_df

Unnamed: 0,details,id,name,precedent,sample,start
duration,300,rest-a781ff5d-4550-4312-a05e-18bb8df54729,rest,hotplate_to_storage--1157a2b7-3872-4c32-b72b-e...,sample0,2168


#### Parse Transport Step

In [235]:
transport = sample[0]
transport

{'details': {'destination': 'SpincoaterLiquidhandler', 'source': 'Tray1'},
 'id': 'storage_to_spincoater--19f96bbb-017f-4dcf-9523-12c6863023b2',
 'name': 'storage_to_spincoater',
 'precedent': None,
 'sample': 'sample0',
 'start': 175}

In [238]:
trans_df = pd.DataFrame(transport)
trans_df

Unnamed: 0,details,id,name,precedent,sample,start
destination,SpincoaterLiquidhandler,storage_to_spincoater--19f96bbb-017f-4dcf-9523...,storage_to_spincoater,,sample0,175
source,Tray1,storage_to_spincoater--19f96bbb-017f-4dcf-9523...,storage_to_spincoater,,sample0,175
