In [1]:
import pandas as pd
import numpy as np
import psycopg2 

In [2]:
def remove_emptyrows(df):
    nan_mask = ~df.iloc[:,1].isna() #remove rows that have no value in the second column
    return df[nan_mask]

def read_sheet(filepath, sheet_name, ordering=False, usecols="A,B,D", meas=False):

    ## NOTE: ADD AN ARGUMENT TO DECIDE WHETHER OR NOT TO BRACKET THE SHEET
    
    ## Read Experimental Source Info
    df = pd.read_excel(
        filepath,
        sheet_name=sheet_name,
        usecols=usecols
    )
    
    #remove empty rows
    df_ = remove_emptyrows(df)
    
    sheet_dict = dict()

    if ordering==True:
        df_list = split_df(df_)
        for i, df in enumerate(df_list):
            sheet_dict[i] = table_to_dict(df)
    else:
        sheet_dict = table_to_dict(df_)
    
    return sheet_dict #returns a dataframe

def split_df(df_):
    
    split_idx_mask = df_.iloc[:,0].str.contains('#') #Find the object splits
    w = df_[split_idx_mask].index.values
    
    df_list = []
    
    for i in range(len(w)-1):
        next_df = df_.loc[w[i]+1:w[i+1]-1,:]
        df_list.append(next_df)    
    
    return df_list

def table_to_dict(df_):
    
    main_mask = pd.isna(df_.JSON)
    step_dict = dict(df_[main_mask].iloc[:,:2].values)

    for json_field in pd.unique(df_.JSON):

        if pd.isna(json_field):
            continue
        elif json_field=='data':
            json_mask = df_.JSON=='data'
            # lump key:value pairs into a second nested dict
        else:
            json_mask = df_.JSON==json_field
            step_dict[json_field] = dict(df_[json_mask].iloc[:,:2].values)

    return step_dict

# f = pd.ExcelFile(fpath)


In [13]:
fpath = r'db_feed\new_template_v5.xlsx'
exp_info = read_sheet(fpath, 'Data Origin')
solution_makeup = read_sheet(fpath, 'Solution Makeup', ordering=True)
# solution_processing = read_sheet(fpath, 'Solution Processing', ordering=True)
# device_fab = read_sheet(fpath, 'Device Fabrication')
substrate_pretreat = read_sheet(fpath, 'Substrate Pretreat', ordering=True)
# coating_process = read_sheet(fpath, 'Coating Process')
# post_process = read_sheet(fpath, 'Post-Processing', ordering=True)
device_meas = read_sheet(fpath, 'Device Measurement', usecols="A:G", ordering=True)
other_meas = read_sheet(fpath, 'Other Measurements', usecols="A:G", ordering=True)

In [14]:
exp_info

{'citation_type': 'literature',
 'meta': {'first_name': 'Rahul',
  'last_name': 'Venkatesh',
  'email': 'rvenkatesh6@gatech.edu',
  'DOI': '10.1038/srep24476 '}}

In [15]:
solution_makeup

{0: {'entity_type': 'solution', 'concentration': 4},
 1: {'entity_type': 'solvent', 'iupac_name': 'toluene'},
 2: {'entity_type': 'polymer',
  'common_name': 'DPP-DTT',
  'iupac_name': 'poly[2,5-(2-octyldodecyl)-3,6-diketopyrrolopyrrole-alt-5,5-(2,5-di(thien-2-yl)thieno [3,2-b]thiophene)]'}}

In [16]:
device_meas

{0: {'measurement_type': 'transfer_curve',
  'data': {'hole_mobility': 0.000202},
  'meta': {'mobility_regime': 'linear',
   'environment': 'air',
   'Vds': -3,
   'equipment_description': 'Agilent 4155C'}}}

In [17]:
other_meas

{0: {'measurement_type': 'uv_vis_film',
  'meta': {'equipment_description': 'Cary 60 UV-vis'},
  'data': {'A00_A01': 0.54, 'exciton_bandwidth': 50}},
 1: {'measurement_type': 'giwaxs', 'data': {'100_d_spacing': 27}}}

In [18]:
device_meas

{0: {'measurement_type': 'transfer_curve',
  'data': {'hole_mobility': 0.000202},
  'meta': {'mobility_regime': 'linear',
   'environment': 'air',
   'Vds': -3,
   'equipment_description': 'Agilent 4155C'}}}

In [19]:
exp_info

{'citation_type': 'literature',
 'meta': {'first_name': 'Rahul',
  'last_name': 'Venkatesh',
  'email': 'rvenkatesh6@gatech.edu',
  'DOI': '10.1038/srep24476 '}}

In [108]:
df = df_list[0]
df_error = df[~pd.isna(df.Error)]
keys = df_error['Device Performance'].values

sheet_dict[0]['data']['mobility']

0.000202

In [77]:
sheet_dict

{'measurement_type': 'hole_transfer_curve',
 'PASTE ABOVE THIS LINE #': '^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^',
 0: {'measurement_type': 'hole_transfer_curve',
  'data': {'mobility': 0.000202},
  'meta': {'mobility_regime': 'linear',
   'measurement_environment': 'Ambient',
   'Vds': -3,
   'equipment_model': 'Agilent 4155C'}}}

In [183]:
solution_makeup

{'Solvent 1 - Chemical name': '1,2-dichlorobenzene',
 'Solvent 1 - volume %': 100,
 'Polymer 1 - Name': 'DPP-DTT',
 'Polymer 1 - IUPAC Name': 'Poly[2,5-(2-octyldodecyl)-3,6-diketopyrrolopyrrole-alt-5,5-(2,5-di(thien-2-yl)thieno [3,2-b]thiophene)]',
 'Polymer 1 - wt% in solution': 80,
 'Polymer 1 - Molecular Weight (Mn)': 55,
 'Polymer 1 - Molecular Weight (Mw)': 199,
 'Polymer 1 - Polydispersity Index (PDI)': 3.62,
 'Polymer 2 - Name': 'PS',
 'Polymer 2 - IUPAC Name': 'polystyrene',
 'Polymer 2 - wt% in solution': 20,
 'Polymer 2 - Molecular Weight (Mn)': 2.18,
 'Polymer 2 - Molecular Weight (Mw)': 2.2,
 'Polymer 2 - Polydispersity Index (PDI)': 1.01,
 'Total Polymer Concentration': 4,
 'meta': {'Polymer 1 - supplier': 'Ossila'}}

In [184]:
solution_processing

{0: {'treatment_type': 'mixing',
  'process_step': 1,
  'meta': {'mixing speed': 250, 'temperature': 60, 'time': 1}}}

In [1]:
device_fab

NameError: name 'device_fab' is not defined

In [17]:
import pprint as pp

pp.pp(substrate_pretreat)

{0: {'treatment_type': 'chemical_treat',
     'process_step': 1,
     'params': {'environment': 'Ambient',
                'chemical name': 'methanol',
                'temperature': 25,
                'exposure_time': 0.25}},
 1: {'treatment_type': 'uv_ozone',
     'process_step': 2,
     'params': {'exposure time': 30},
     'meta': {'equipment_model': 'Entela T20'}},
 2: {'treatment_type': 'sam',
     'process_step': 3,
     'params': {'environment': 'Nitrogen',
                'SAM name': 'octadecyltrichlorosilane (OTS-18)',
                'temperature': 25,
                'exposure_time': 8}}}


In [187]:
coating_process

{'deposition_type': 'spin',
 'meta': {'coating_environment': 'Ambient',
  'spin_rate': 1500,
  'spin_time': 60}}

In [180]:
post_process

{0: {'treatment_type': 'annealing', 'meta': {}}}