## Python Code to Extract Data From Template and Transfer to PostGRE SQL
#### Authors : Aaron Liu, Rahul Venkatesh, Jessica Bonsu, Myeongyeon Lee 
##### Date Edited : 06-28-2023

to do :
* add if statements to code blocks to account for empty metas
* fix solvent issue
* finish measurement issue 

In [2]:
## Required Packages

import pandas as pd
import numpy as np
import psycopg2 as pg

import os
from psycopg2.extras import Json
from psycopg2.extensions import AsIs
import functools
import json
import sys

import requests
# import bibtexparser
import pprint

In [3]:
## Required Functions To Extract Information from Template

# Function to remove rows that have no value (NaN) in the second column
def remove_emptyrows(df):
    nan_mask = ~df.iloc[:,1].isna() 
    return df[nan_mask]

# Function to convert a sheet into dictionary data type
def read_sheet(filepath, sheet_name, ordering=False, usecols="A,B,D", meas=False):

    ## NOTE: ADD AN ARGUMENT TO DECIDE WHETHER OR NOT TO BRACKET THE SHEET
    ## NOTE : The argument "ordering" is used for sheets like solution processing or substrate pretreatmant where the order of the processing step matters
    ## NOTE : The argument "usecols" is to store information from particular columns in the excel sheet
    ## NOTE : The argument "meas" is used to 
    
    ## Read Sheet Information
    df = pd.read_excel(
        filepath,
        sheet_name=sheet_name,
        usecols=usecols
    )
    
    # Call Function To Remove empty rows
    df_ = remove_emptyrows(df)
    
    # Create an empty dictionary
    sheet_dict = dict()

    # To account for sheets where processing order is important
    if ordering==True:
        df_list = split_df(df_) #calls function split_df
        for i, df in enumerate(df_list):
            sheet_dict[i] = table_to_dict(df) #adds each table to the dictionary
    else:
        sheet_dict = table_to_dict(df_)
    
    return sheet_dict #returns a dataframe

def split_df(df_):
    #For sheets where processing order is important, this function finds tables with '#' in the name of the first column title and turns it into a df
    
    split_idx_mask = df_.iloc[:,0].str.contains('#') #Find the object splits
    w = df_[split_idx_mask].index.values
    
    df_list = []
    
    for i in range(len(w)-1):
        next_df = df_.loc[w[i]+1:w[i+1]-1,:]
        df_list.append(next_df)    
    
    return df_list

def table_to_dict(df_):
    
    main_mask = pd.isna(df_.JSON) # it flags rows that dont have a value for JSON column
    step_dict = dict(df_[main_mask].iloc[:,:2].values) # Stores rows that have "NaN" for JSON column in df_ as dict

    
    
    for json_field in pd.unique(df_.JSON): #read through unique JSON types (e.g. NaN, meta or data)

        if pd.isna(json_field): #ignore fields with JSON type as NaN
            continue
            
        # dictionary to store information with JSON type "data"
        elif json_field=='data':
            data_mask = df_.JSON=='data'
            
            # lump key:value pairs into a second nested data dict
            step_dict['data'] = dict()
            
            for i, s in df_[data_mask].iterrows():
                step_dict['data'][s[s.index[0]]] = s['value':'error_type'].dropna().to_dict()
        else:
            json_mask = df_.JSON==json_field
            step_dict[json_field] = dict(df_[json_mask].iloc[:,:2].values) # creates a new key for JSON types like meta and params and adds its corresponding values to it 

    return step_dict

# f = pd.ExcelFile(fpath)


### Reading and Extracting Data From Sheets in Template

In [4]:
#Reading Data From Sheets in Template

fpath = r'..\db_feed\v6_example.xlsx' #Add path for template file
#fpath = r'..\db_feed\v6_example_blend.xlsx' #Add path for template file
#fpath = r'..\db_feed\v6_example_4.xlsx' #Add path for template file

#Storing each sheet in the template file as a dictionary
exp_info = read_sheet(fpath, 'Data Origin')
solution_makeup = read_sheet(fpath, 'Solution Makeup', ordering=True)
solution_processing = read_sheet(fpath, 'Solution Treatment', ordering=True)
device_fab = read_sheet(fpath, 'Device Fabrication')
substrate_pretreat = read_sheet(fpath, 'Substrate Pretreat', ordering=True)
coating_process = read_sheet(fpath, 'Coating Process')
post_process = read_sheet(fpath, 'Post-Processing', ordering=True)
device_meas = read_sheet(fpath, 'Device Measurement', usecols="A:G", ordering=True)
other_meas = read_sheet(fpath, 'Other Measurements', usecols="A:G", ordering=True)

  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


In [5]:
#Use this code block to check how each sheet has been converted to a dictionary
other_meas

{0: {'measurement_type': 'uv_vis_film',
  'meta': {'equipment_description': 'Cary 60 UV-vis'},
  'data': {'A00_A01': {'value': 0.54},
   'exciton_bandwidth': {'value': 50, 'unit': 'meV'}}},
 1: {'measurement_type': 'giwaxs',
  'data': {'100_d_spacing': {'value': 27, 'unit': 'A'}}}}

### Transferring Information From Template To PostgreSQL

In [6]:
# Postgres python
from psycopg2.extras import Json 

# Adapters necessary for converting python data types to PostgreSQL compatible data types 
def addapt_numpy_float64(numpy_float64):
    return AsIs(numpy_float64)

def addapt_numpy_int64(numpy_int64):
    return AsIs(numpy_int64)

def nan_to_null(f,
        _NULL=AsIs('NULL'),
        _Float=pg.extensions.Float):
    if not np.isnan(f):
        return _Float(f)
    return _NULL

pg.extensions.register_adapter(np.float64, addapt_numpy_float64)
pg.extensions.register_adapter(np.int64, addapt_numpy_int64)
pg.extensions.register_adapter(float, nan_to_null)

param_dict = {
    "host"      : "127.0.0.1",
    "database"  : "ofetdb_testenv_ML",
    "user"      : "postgres",
    "password"  : "myL220q1W@",
    "port"      : "5432",
}

def connect(params_dict):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = pg.connect(**params_dict)
    except (Exception, pg.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn

def pg_query(sql, tup):
    
    try:
        # Database connection
        conn = connect(param_dict)
        cur = conn.cursor()
        
        
        
        # Pass SQL query, using string and placeholders
        cur.execute(sql, tup)
        
        # Fetch result
        fetched = cur.fetchone()[0]
        
        # Commit result
        conn.commit()
        print("Operation Successful")

        cur.close()
        conn.close()
        
    except (Exception, pg.DatabaseError) as error:
        # If database connection unsuccessful, then close connection 
        print("Error: %s" % error)
        conn.rollback()
        cur.close()
        conn.close()
    
    return fetched #return query result

In [7]:
from psycopg2.extras import Json

def convert_entry(entry_dict):
    
    #This function reads a dictionary and extracts the column names and values from it
    
    pg_entry = entry_dict
    for key in pg_entry.keys():
        if type(pg_entry[key])==dict:
            pg_entry[key]=Json(pg_entry[key])
    columns = pg_entry.keys()
    values = [pg_entry[column] for column in columns]
    
    return pg_entry, columns, values


###### Doubt 1 : 

I made a new database. we were not able to add any new records to the old database

- might have to manually fix this

### 1.Checking and Storing Experiment Information

In [8]:
import psycopg2
from psycopg2 import _json

exp_pg_entry, exp_columns, exp_values = convert_entry(exp_info)

#print(type(pg_entry))
#print(type(columns))
#print(exp_columns)
#print(type(values))
#print(values)

exp_columns

dict_keys(['citation_type', 'meta'])

In [9]:
sql = '''
    INSERT INTO experiment_info (%s) 
    VALUES %s
    ON CONFLICT (citation_type, meta) DO UPDATE
    SET (%s) = %s
    RETURNING exp_id
    
    '''
tup = (AsIs(','.join(exp_columns)), tuple(exp_values), AsIs(','.join(exp_columns)), tuple(exp_values))



exp_id = pg_query(sql, tup)
exp_id


Connecting to the PostgreSQL database...
Connection successful
Operation Successful


1

### 2.Checking and Storing Solution Information (Polymer, Solvent, Solution)

In [10]:
import psycopg2
from psycopg2 import _json

pg_entry_solution_makeup, columns_solution_makeup, values_solution_makeup = convert_entry(solution_makeup)

print(values_solution_makeup)
print(type(values_solution_makeup))

[<psycopg2._json.Json object at 0x00000238F1056D30>, <psycopg2._json.Json object at 0x00000238F4B09040>, <psycopg2._json.Json object at 0x00000238F5D799D0>]
<class 'list'>


In [11]:
# Storing Solution data

solution_data = values_solution_makeup[0].adapted

solution_data

{'entity_type': 'solution', 'concentration': 4}

In [12]:
# Storing Solvent data - accounting for multiple solvents
solvent_data_filtered = [json_obj for json_obj in values_solution_makeup if json_obj.adapted.get("entity_type") == "solvent"]

# Convert psycopg2._json.Json objects to JSON strings
solvent_data = [json_obj.adapted for json_obj in solvent_data_filtered]

solvent_data

[{'entity_type': 'solvent',
  'iupac_name': 'toluene',
  'pubchem_cid': 1140,
  'vol_frac': 1,
  'meta': {'supplier': 'Sigma Aldrich', 'batch_number': 's1234'}}]

In [13]:
# Storing Polymer data - accounting for multiple polymers
polymer_data_filtered = [json_obj for json_obj in values_solution_makeup if json_obj.adapted.get("entity_type") == "polymer"]

# Convert psycopg2._json.Json objects to JSON strings
polymer_data = [json_obj.adapted for json_obj in polymer_data_filtered]

polymer_data

[{'entity_type': 'polymer',
  'common_name': 'DPP-DTT',
  'iupac_name': 'poly[2,5-(2-octyldodecyl)-3,6-diketopyrrolopyrrole-alt-5,5-(2,5-di(thien-2-yl)thieno [3,2-b]thiophene)]',
  'mn': 55,
  'mw': 199,
  'dispersity': 3.62,
  'wt_frac': 1,
  'meta': {'supplier': 'Ossila', 'batch_number': 'M0311A2'}}]

In [17]:
# Storing Solution Makeup data

solution_makeup_data = []
solution_makeup_data.append(solution_data)
solution_makeup_data.append(solvent_data)
solution_makeup_data.append(polymer_data)
solution_makeup_data

[{'entity_type': 'solution', 'concentration': 4},
 [{'entity_type': 'solvent',
   'iupac_name': 'toluene',
   'pubchem_cid': 1140,
   'vol_frac': 1,
   'meta': {'supplier': 'Sigma Aldrich', 'batch_number': 's1234'}}],
 [{'entity_type': 'polymer',
   'common_name': 'DPP-DTT',
   'iupac_name': 'poly[2,5-(2-octyldodecyl)-3,6-diketopyrrolopyrrole-alt-5,5-(2,5-di(thien-2-yl)thieno [3,2-b]thiophene)]',
   'mn': 55,
   'mw': 199,
   'dispersity': 3.62,
   'wt_frac': 1,
   'meta': {'supplier': 'Ossila', 'batch_number': 'M0311A2'}}]]

##### Inserting into POLYMER, SOLVENT, SOLUTION, SOLUTION_MAKEUP_POLYMER, SOLUTION_MAKEUP_SOLVENT tables

This code should handle multiple solvents each with a vol_frac and multiple polymers each with a wt_frac, and it will check for the existence of a unique combination of concentration, polymer IDs, solvent IDs, wt_fracs, and vol_fracs. If the combination exists, it will assign the existing solution_id in all tables; otherwise, it will create a new solution_id.

In [15]:
import json
import psycopg2

# Establish a connection to the database
connection = psycopg2.connect(**param_dict)

# Create a cursor object to execute SQL commands
cursor = connection.cursor()

# Extract solution information
solution_data = solution_makeup_data[0]
concentration = solution_data['concentration']

# Extract solvent information
solvent_data = solution_makeup_data[1]
solvent_ids = []
vol_fracs = []
for solvent in solvent_data:
    pubchem_cid = solvent['pubchem_cid']
    iupac_name = solvent['iupac_name']
    vol_frac = solvent['vol_frac']
    solvent_ids.append((pubchem_cid, iupac_name))
    vol_fracs.append(vol_frac)

# Extract polymer information
polymer_data = solution_makeup_data[2]
polymer_ids = []
wt_fracs = []
for polymer in polymer_data:
    common_name = polymer['common_name']
    iupac_name = polymer['iupac_name']
    mn = polymer['mn']
    mw = polymer['mw']
    dispersity = polymer['dispersity']
    wt_frac = polymer['wt_frac']
    meta = json.dumps(polymer['meta'])
    polymer_ids.append((common_name, iupac_name, mn, mw, dispersity, meta))
    wt_fracs.append(wt_frac)

# Start transaction
with connection:
    with connection.cursor() as cursor:
        try:
            # Check if the unique combination exists
            select_solution_id_sql = '''
                SELECT sm.solution_id
                FROM SOLUTION_MAKEUP_SOLVENT sms
                JOIN SOLUTION_MAKEUP_POLYMER smp ON sms.solution_id = smp.solution_id
                JOIN SOLVENT s ON sms.solvent_id = s.pubchem_cid
                JOIN POLYMER p ON smp.polymer_id = p.polymer_id
                JOIN SOLUTION sm ON sms.solution_id = sm.solution_id
                WHERE sm.concentration = %s
                AND (s.pubchem_cid, s.iupac_name) IN %s
                AND (p.common_name, p.iupac_name, p.mn, p.mw, p.dispersity, p.meta) IN %s
                GROUP BY sm.solution_id
                HAVING COUNT(DISTINCT smp.polymer_id) = %s
                AND COUNT(DISTINCT sms.solvent_id) = %s
                AND ARRAY_AGG(sms.vol_frac) = %s::double precision[]
                AND ARRAY_AGG(smp.wt_frac) = %s::double precision[]
            '''
# it seems sm = solution_makeup, sms=solusion_makeup_solvent, smp=solution_makeup_polymer
# But there is no code to instruct these. How does this code work? is there no conflict?

            cursor.execute(select_solution_id_sql, (concentration, tuple(solvent_ids), tuple(polymer_ids), len(polymer_ids), len(solvent_ids), vol_fracs, wt_fracs))
            existing_solution = cursor.fetchone()
            if existing_solution:
                solution_id = existing_solution[0]
            else:
                # Insert into SOLUTION table
                insert_solution_sql = '''
                    INSERT INTO SOLUTION (concentration)
                    VALUES (%s)
                    RETURNING solution_id
                '''
                cursor.execute(insert_solution_sql, (concentration,))
                solution_id = cursor.fetchone()[0]

            for solvent_id, vol_frac in zip(solvent_ids, vol_fracs):
                pubchem_cid, iupac_name = solvent_id

                # Insert into SOLVENT table
                insert_solvent_sql = '''
                    INSERT INTO SOLVENT (pubchem_cid, iupac_name)
                    VALUES (%s, %s)
                    ON CONFLICT (iupac_name) DO UPDATE
                    SET (pubchem_cid, iupac_name) = (%s, %s)
                    RETURNING pubchem_cid
                '''
                cursor.execute(insert_solvent_sql, (pubchem_cid, iupac_name, pubchem_cid, iupac_name))
                solvent_id = cursor.fetchone()[0]

                # Insert into SOLUTION_MAKEUP_SOLVENT table
                insert_solution_makeup_solvent_sql = '''
                    INSERT INTO SOLUTION_MAKEUP_SOLVENT (solution_id, solvent_id, vol_frac)
                    VALUES (%s, %s, %s)
                '''
                cursor.execute(insert_solution_makeup_solvent_sql, (solution_id, solvent_id, vol_frac))

            for polymer_id, wt_frac in zip(polymer_ids, wt_fracs):
                common_name, iupac_name, mn, mw, dispersity, meta = polymer_id

                # Check if the polymer exists
                select_polymer_id_sql = '''
                    SELECT polymer_id
                    FROM POLYMER
                    WHERE common_name = %s
                    AND iupac_name = %s
                    AND mn = %s
                    AND mw = %s
                    AND dispersity = %s
                    AND meta = %s::jsonb
                '''
                cursor.execute(select_polymer_id_sql, (common_name, iupac_name, mn, mw, dispersity, meta))
                existing_polymer = cursor.fetchone()

                if existing_polymer:
                    polymer_id = existing_polymer[0]
                else:
                    # Insert into POLYMER table
                    insert_polymer_sql = '''
                        INSERT INTO POLYMER (common_name, iupac_name, mn, mw, dispersity, meta)
                        VALUES (%s, %s, %s, %s, %s, %s::jsonb)
                        RETURNING polymer_id
                    '''
                    cursor.execute(insert_polymer_sql, (common_name, iupac_name, mn, mw, dispersity, meta))
                    polymer_id = cursor.fetchone()[0]

                # Insert into SOLUTION_MAKEUP_POLYMER table
                insert_solution_makeup_polymer_sql = '''
                    INSERT INTO SOLUTION_MAKEUP_POLYMER (solution_id, polymer_id, wt_frac)
                    VALUES (%s, %s, %s)
                '''
                cursor.execute(insert_solution_makeup_polymer_sql, (solution_id, polymer_id, wt_frac))

            connection.commit()

            print("Solution makeup saved successfully!")
            print(solution_id)
        except Exception as e:
            connection.rollback()
            print("An error occurred:", str(e))

# Close the database connection
connection.close()


Solution makeup saved successfully!
1


### 3. Checking and Storing Device Information

In [16]:
import psycopg2
from psycopg2 import _json

device_fab_pg_entry, device_fab_columns, device_fab_values = convert_entry(device_fab)

#print(type(device_fab_pg_entry))
#print(type(device_fab_columns))
print(device_fab_columns)
print(type(device_fab_values))
print(device_fab_values)

dict_keys(['params', 'meta'])
<class 'list'>
[<psycopg2._json.Json object at 0x00000161CB9F1A60>, <psycopg2._json.Json object at 0x00000161CB9F1E50>]


In [17]:
sql = '''
    INSERT INTO DEVICE_FABRICATION (%s) 
    VALUES %s
    ON CONFLICT (params, meta) DO UPDATE
    SET (%s) = %s
    RETURNING device_fab_id
    
    '''
tup = (AsIs(','.join(device_fab_columns)), tuple(device_fab_values), AsIs(','.join(device_fab_columns)), tuple(device_fab_values))



device_fab_id = pg_query(sql, tup)
device_fab_id

Connecting to the PostgreSQL database...
Connection successful
Operation Successful


1

### 4. Checking and Storing Film Deposition Information 

In [18]:
import psycopg2
from psycopg2 import _json

coating_process_pg_entry, coating_process_columns, coating_process_values = convert_entry(coating_process)

#print(type(coating_process_pg_entry))
print(type(coating_process_columns))
print(coating_process_columns)
print(type(coating_process_values))
print(coating_process_values)

<class 'dict_keys'>
dict_keys(['deposition_type', 'params'])
<class 'list'>
['spin', <psycopg2._json.Json object at 0x00000161CD09FE80>]


In [19]:

sql = '''
    INSERT INTO FILM_DEPOSITION (%s) 
    VALUES %s
    ON CONFLICT (deposition_type, params, meta) DO UPDATE
    SET (%s) = %s
    RETURNING film_deposition_id
    
    '''
tup = (AsIs(','.join(coating_process_columns)), tuple(coating_process_values), AsIs(','.join(coating_process_columns)), tuple(coating_process_values))



film_deposition_id = pg_query(sql, tup)
film_deposition_id

Connecting to the PostgreSQL database...
Connection successful
Operation Successful


1

### 5. Checking and Storing the subprocess recipes (Solution Treatment, Substrate Pretreatment, Post Process)

###### 5.1 SOLUTION TREATMENT

In [20]:
solution_processing

{0: {'treatment_type': 'mixing',
  'process_step': 1,
  'params': {'mixing_speed': 250, 'temperature': 60, 'time': 1}},
 1: {'treatment_type': 'poor_solvent',
  'process_step': 2,
  'params': {'environment': 'air',
   'iupac_name': 'acetone',
   'pubchem_cid': 180,
   'vol_frac_added': 0.05}}}

In [21]:
import psycopg2
import json

# Function to insert data into SOLUTION_TREATMENT_STEP table
def insert_into_solution_treatment_step(cur, treatment_type, params, meta):
    # Check if the record already exists
    cur.execute(
        "SELECT solution_treatment_step_id FROM SOLUTION_TREATMENT_STEP WHERE treatment_type = %s AND params = %s::jsonb AND meta = %s::jsonb",
        (treatment_type, params, meta)
    )
    existing_id = cur.fetchone()

    if existing_id:
        solution_treatment_step_id = existing_id[0]
    else:
        # Insert data into SOLUTION_TREATMENT_STEP table
        cur.execute(
            "INSERT INTO SOLUTION_TREATMENT_STEP (treatment_type, params, meta) VALUES (%s, %s::jsonb, %s::jsonb) RETURNING solution_treatment_step_id",
            (treatment_type, params, meta)
        )
        solution_treatment_step_id = cur.fetchone()[0]

    return solution_treatment_step_id

# Function to insert data into SOLUTION_TREATMENT_ORDER table
def insert_into_solution_treatment_order(cur, solution_treatment_id, process_order, solution_treatment_step_id):
    # Check if the record already exists
    cur.execute(
        """
        SELECT solution_treatment_id
        FROM SOLUTION_TREATMENT_ORDER
        WHERE solution_treatment_id = %s
        AND process_order = %s
        AND solution_treatment_step_id = %s
        """,
        (solution_treatment_id, process_order, solution_treatment_step_id)
    )
    existing_combination = cur.fetchone()

    if not existing_combination:
        # Insert new record into SOLUTION_TREATMENT_ORDER table
        cur.execute(
            "INSERT INTO SOLUTION_TREATMENT_ORDER (solution_treatment_id, process_order, solution_treatment_step_id) VALUES (%s, %s, %s)",
            (solution_treatment_id, process_order, solution_treatment_step_id)
        )

    return solution_treatment_id

# Establish a connection to the PostgreSQL database
conn = psycopg2.connect(**param_dict)

# Create a cursor object to interact with the database
cur = conn.cursor()

solution_treatment_id = None

for treatment in solution_processing.values():
    # Convert params and meta to JSON format
    params_json = json.dumps(treatment.get('params', {}))
    meta_json = json.dumps(treatment.get('meta', {}))

    # Insert data into SOLUTION_TREATMENT_STEP table
    solution_treatment_step_id = insert_into_solution_treatment_step(cur, treatment['treatment_type'], params_json, meta_json)

    if solution_treatment_id is None:
        # Check if the record already exists in SOLUTION_TREATMENT table
        cur.execute(
            "SELECT solution_treatment_id FROM SOLUTION_TREATMENT WHERE solution_treatment_id IN (SELECT solution_treatment_id FROM SOLUTION_TREATMENT_ORDER WHERE solution_treatment_step_id = %s)",
            (solution_treatment_step_id,)
        )
        existing_id = cur.fetchone()

        if existing_id:
            solution_treatment_id = existing_id[0]
        else:
            # Insert data into SOLUTION_TREATMENT table
            cur.execute(
                "INSERT INTO SOLUTION_TREATMENT (solution_treatment_id) VALUES (DEFAULT) RETURNING solution_treatment_id"
            )
            solution_treatment_id = cur.fetchone()[0]

    # Insert data into SOLUTION_TREATMENT_ORDER table
    solution_treatment_id = insert_into_solution_treatment_order(cur, solution_treatment_id, treatment['process_step'], solution_treatment_step_id)

# Commit the changes to the database
print("Solution treatment saved successfully!")
print(solution_treatment_id)
conn.commit()

# Close the cursor and connection
cur.close()
conn.close()


Solution treatment saved successfully!
1


###### 5.2 SUBSTRATE PRETREATMENT

In [22]:
substrate_pretreat

{0: {'treatment_type': 'chemical_treat',
  'process_step': 1,
  'params': {'environment': 'air',
   'iupac_name': 'methanol',
   'temperature': 25,
   'time': 15},
  'meta': {'description': 'sonication'}},
 1: {'treatment_type': 'uv_ozone',
  'process_step': 2,
  'params': {'time': 30},
  'meta': {'equipment_model': 'Entela T20'}},
 2: {'treatment_type': 'sam',
  'process_step': 3,
  'params': {'sam_name': 'OTS-8',
   'iupac_name': 'octyltrichlorosilane',
   'pubchem_cid': 21354}}}

In [23]:
import psycopg2
import json

# Function to insert data into SUBSTRATE_PRETREAT_STEP table
def insert_into_substrate_pretreat_step(cur, treatment_type, params, meta):
    # Check if the record already exists
    cur.execute(
        "SELECT substrate_pretreat_step_id FROM SUBSTRATE_PRETREAT_STEP WHERE treatment_type = %s AND params = %s::jsonb AND meta = %s::jsonb",
        (treatment_type, params, meta)
    )
    existing_id = cur.fetchone()
# What is meaning of :: ?
    if existing_id:
        substrate_pretreat_step_id = existing_id[0]
    else:
        # Insert data into SUBSTRATE_PRETREAT_STEP table
        cur.execute(
            "INSERT INTO SUBSTRATE_PRETREAT_STEP (treatment_type, params, meta) VALUES (%s, %s::jsonb, %s::jsonb) RETURNING substrate_pretreat_step_id",
            (treatment_type, params, meta)
        )
        substrate_pretreat_step_id = cur.fetchone()[0]

    return substrate_pretreat_step_id

# Function to insert data into SUBSTRATE_PRETREAT_ORDER table
def insert_into_substrate_pretreat_order(cur, substrate_pretreat_id, process_order, substrate_pretreat_step_id):
    # Check if the record already exists
    cur.execute(
        """
        SELECT substrate_pretreat_id
        FROM SUBSTRATE_PRETREAT_ORDER
        WHERE substrate_pretreat_id = %s
        AND process_order = %s
        AND substrate_pretreat_step_id = %s
        """,
        (substrate_pretreat_id, process_order, substrate_pretreat_step_id)
    )
    existing_combination = cur.fetchone()

    if not existing_combination:
        # Insert new record into SUBSTRATE_PRETREAT_ORDER table
        cur.execute(
            "INSERT INTO SUBSTRATE_PRETREAT_ORDER (substrate_pretreat_id, process_order, substrate_pretreat_step_id) VALUES (%s, %s, %s)",
            (substrate_pretreat_id, process_order, substrate_pretreat_step_id)
        )

    return substrate_pretreat_id

# Establish a connection to the PostgreSQL database
conn = psycopg2.connect(**param_dict)

# Create a cursor object to interact with the database
cur = conn.cursor()

substrate_pretreat_id = None

for treatment in substrate_pretreat.values():
    # Convert params and meta to JSON format
    params_json = json.dumps(treatment.get('params', {}))
    meta_json = json.dumps(treatment.get('meta', {}))

    # Insert data into SUBSTRATE_PRETREAT_STEP table
    substrate_pretreat_step_id = insert_into_substrate_pretreat_step(cur, treatment['treatment_type'], params_json, meta_json)

    if substrate_pretreat_id is None:
        # Check if the record already exists in SUBSTRATE_PRETREAT table
        cur.execute(
            "SELECT substrate_pretreat_id FROM SUBSTRATE_PRETREAT WHERE substrate_pretreat_id IN (SELECT substrate_pretreat_id FROM SUBSTRATE_PRETREAT_ORDER WHERE substrate_pretreat_step_id = %s)",
            (substrate_pretreat_step_id,)
        )
        existing_id = cur.fetchone()

        if existing_id:
            substrate_pretreat_id = existing_id[0]
        else:
            # Insert data into SUBSTRATE_PRETREAT table
            cur.execute(
                "INSERT INTO SUBSTRATE_PRETREAT (substrate_pretreat_id) VALUES (DEFAULT) RETURNING substrate_pretreat_id"
            )
            substrate_pretreat_id = cur.fetchone()[0]

    # Insert data into SUBSTRATE_PRETREAT_ORDER table
    substrate_pretreat_id = insert_into_substrate_pretreat_order(cur, substrate_pretreat_id, treatment['process_step'], substrate_pretreat_step_id)

# Commit the changes to the database
print("Substrate pretreatment saved successfully!")
print(substrate_pretreat_id)
conn.commit()

# Close the cursor and connection
cur.close()
conn.close()


Substrate pretreatment saved successfully!
1


###### 5.3 POST PROCESSING TREATMENT

In [24]:
post_process

{0: {'treatment_type': 'annealing',
  'process_step': 1,
  'params': {'environment': 'air', 'temperature': 56, 'time': 0.16}}}

In [25]:
import psycopg2
import json

# Function to insert data into POSTPROCESS_STEP table
def insert_into_postprocess_step(cur, treatment_type, params, meta):
    # Check if the record already exists
    cur.execute(
        "SELECT postprocess_step_id FROM POSTPROCESS_STEP WHERE treatment_type = %s AND params = %s::jsonb AND meta = %s::jsonb",
        (treatment_type, params, meta)
    )
    existing_id = cur.fetchone()

    if existing_id:
        postprocess_step_id = existing_id[0]
    else:
        # Insert data into POSTPROCESS_STEP table
        cur.execute(
            "INSERT INTO POSTPROCESS_STEP (treatment_type, params, meta) VALUES (%s, %s::jsonb, %s::jsonb) RETURNING postprocess_step_id",
            (treatment_type, params, meta)
        )
        postprocess_step_id = cur.fetchone()[0]

    return postprocess_step_id

# Function to insert data into POSTPROCESS_ORDER table
def insert_into_postprocess_order(cur, postprocess_id, process_order, postprocess_step_id):
    # Check if the record already exists
    cur.execute(
        """
        SELECT postprocess_id
        FROM POSTPROCESS_ORDER
        WHERE postprocess_id = %s
        AND process_order = %s
        AND postprocess_step_id = %s
        """,
        (postprocess_id, process_order, postprocess_step_id)
    )
    existing_combination = cur.fetchone()

    if not existing_combination:
        # Insert new record into POSTPROCESS_ORDER table
        cur.execute(
            "INSERT INTO POSTPROCESS_ORDER (postprocess_id, process_order, postprocess_step_id) VALUES (%s, %s, %s)",
            (postprocess_id, process_order, postprocess_step_id)
        )

    return postprocess_id

# Establish a connection to the PostgreSQL database
conn = psycopg2.connect(**param_dict)

# Create a cursor object to interact with the database
cur = conn.cursor()

postprocess_id = None

for treatment in post_process.values():
    # Convert params and meta to JSON format
    params_json = json.dumps(treatment.get('params', {}))
    meta_json = json.dumps(treatment.get('meta', {}))

    # Insert data into POSTPROCESS_STEP table
    postprocess_step_id = insert_into_postprocess_step(cur, treatment['treatment_type'], params_json, meta_json)

    if postprocess_id is None:
        # Check if the record already exists in POSTPROCESS table
        cur.execute(
            "SELECT postprocess_id FROM POSTPROCESS WHERE postprocess_id IN (SELECT postprocess_id FROM POSTPROCESS_ORDER WHERE postprocess_step_id = %s)",
            (postprocess_step_id,)
        )
        existing_id = cur.fetchone()

        if existing_id:
            postprocess_id = existing_id[0]
        else:
            # Insert data into POSTPROCESS table
            cur.execute(
                "INSERT INTO POSTPROCESS (postprocess_id) VALUES (DEFAULT) RETURNING postprocess_id"
            )
            postprocess_id = cur.fetchone()[0]

    # Insert data into POSTPROCESS_ORDER table
    postprocess_id = insert_into_postprocess_order(cur, postprocess_id, treatment['process_step'], postprocess_step_id)

# Commit the changes to the database
print("Post Process treatment saved successfully!")
print(postprocess_id)
conn.commit()

# Close the cursor and connection
cur.close()
conn.close()


Post Process treatment saved successfully!
1


### 6. Checking and Storing information to the OFET_PROCESS TABLE and generating process_id

In [26]:
#printing the id's of attributes in OFET_PROCESS
print("solution_id is : {}".format(solution_id))
print("device_fab_id is : {}".format(device_fab_id))
print("solution_treatment_id is : {}".format(solution_treatment_id))
print("substrate_pretreat_id is : {}".format(substrate_pretreat_id))
print("film_deposition_id is : {}".format(film_deposition_id))
print("postprocess_id is : {}".format(postprocess_id))

ofet_process_columns = ['solution_id','solution_treatment_id','device_fab_id','substrate_pretreat_id','film_deposition_id','postprocess_id']
ofet_process_values = [solution_id,solution_treatment_id,device_fab_id,substrate_pretreat_id,film_deposition_id,postprocess_id]



solution_id is : 1
device_fab_id is : 1
solution_treatment_id is : 1
substrate_pretreat_id is : 1
film_deposition_id is : 1
postprocess_id is : 1


In [27]:
sql = '''
    INSERT INTO ofet_process (%s) 
    VALUES %s
    ON CONFLICT (solution_id, solution_treatment_id, device_fab_id, substrate_pretreat_id, film_deposition_id, postprocess_id) DO UPDATE
    SET (%s) = %s
    RETURNING process_id
    
    '''
tup = (AsIs(','.join(ofet_process_columns)), tuple(ofet_process_values), AsIs(','.join(ofet_process_columns)), tuple(ofet_process_values))

process_id = pg_query(sql, tup)
process_id


Connecting to the PostgreSQL database...
Connection successful
Operation Successful


1

### 7. Checking and Storing information to the SAMPLE TABLE and generating sample_id

In [28]:
#printing the id's of attributes in SAMPLE
print("exp_id is : {}".format(exp_id))
print("process_id is : {}".format(process_id))


sample_columns = ['exp_id','process_id']
sample_values = [exp_id,process_id]



exp_id is : 1
process_id is : 1


In [29]:
sql = '''
    INSERT INTO sample (%s) 
    VALUES %s
    ON CONFLICT (exp_id, process_id) DO UPDATE
    SET (%s) = %s
    RETURNING sample_id
    
    '''
tup = (AsIs(','.join(sample_columns)), tuple(sample_values), AsIs(','.join(sample_columns)), tuple(sample_values))

sample_id = pg_query(sql, tup)
sample_id

Connecting to the PostgreSQL database...
Connection successful
Operation Successful


1

### 8. Checking and Storing the measurement information 

#### 8.1 Storing Device Measurement Information 

In [30]:
device_meas

{0: {'measurement_type': 'transfer_curve',
  'data': {'hole_mobility': {'value': 0.000202,
    'unit': 'cm2/V-s',
    'replicates': 12,
    'error': 6e-05,
    'error_type': 'ci_95'},
   'hole_threshold_voltage': {'value': 2.5,
    'unit': 'V',
    'error': 0.1,
    'error_type': 'ci_95'}},
  'meta': {'mobility_regime': 'linear',
   'environment': 'air',
   'Vds': -3,
   'equipment_description': 'Agilent 4155C'}}}

In [31]:
device_meas_columns = ['measurement_type', 'data', 'meta']
device_meas_values = []

if device_meas[0]['meta'] is None:
    device_meas_values = [device_meas[0]['measurement_type'], device_meas[0]['data'], {}]
else:
    device_meas_values = [device_meas[0]['measurement_type'], device_meas[0]['data'], device_meas[0]['meta']]
#print(device_meas_values)

device_meas_columns = ['sample_id','measurement_type', 'data', 'meta']

device_meas_values.insert(0, sample_id)

device_meas_values[2] = json.dumps(device_meas_values[2])  # Convert 'data' to JSON
device_meas_values[3] = json.dumps(device_meas_values[3])  # Convert 'data' to JSON

print(device_meas_values)

[1, 'transfer_curve', '{"hole_mobility": {"value": 0.000202, "unit": "cm2/V-s", "replicates": 12, "error": 6e-05, "error_type": "ci_95"}, "hole_threshold_voltage": {"value": 2.5, "unit": "V", "error": 0.1, "error_type": "ci_95"}}', '{"mobility_regime": "linear", "environment": "air", "Vds": -3, "equipment_description": "Agilent 4155C"}']


In [32]:
sql = '''
    INSERT INTO measurement (%s) 
    VALUES %s
    ON CONFLICT (measurement_id,sample_id,measurement_type,data,meta) DO UPDATE
    SET (%s) = %s
    RETURNING measurement_id
    
    '''
tup = (AsIs(','.join(device_meas_columns)), tuple(device_meas_values), AsIs(','.join(device_meas_columns)), tuple(device_meas_values))

measurement_id = pg_query(sql, tup)
measurement_id

Connecting to the PostgreSQL database...
Connection successful
Error: 오류:  ON CONFLICT 절을 사용하는 경우, unique 나 exclude 제약 조건이 있어야 함



UnboundLocalError: local variable 'fetched' referenced before assignment

#### 8.1 Storing Other Measurement Information 

In [None]:
other_meas

In [None]:
for items in other_meas:
    #print(other_meas[items])
    other_meas_columns = ['measurement_type', 'data', 'meta']
    other_meas_values = []
    other_meas_values = [other_meas[items]['measurement_type'], other_meas[items]['data'], other_meas[items]['meta']]
    

    other_meas_columns = ['sample_id','measurement_type', 'data', 'meta']

    other_meas_values.insert(0, sample_id)

    other_meas_values[2] = json.dumps(other_meas_values[2])  # Convert 'data' to JSON
    other_meas_values[3] = json.dumps(other_meas_values[3])  # Convert 'data' to JSON

    print(device_meas_values)
    
    sql = '''
    INSERT INTO measurement (%s) 
    VALUES %s
    ON CONFLICT (measurement_id,sample_id,measurement_type,data,meta) DO UPDATE
    SET (%s) = %s
    RETURNING measurement_id
    
    '''
    tup = (AsIs(','.join(other_meas_columns)), tuple(other_meas_values), AsIs(','.join(other_meas_columns)), tuple(other_meas_values))

    measurement_id = pg_query(sql, tup)
    print(measurement_id)

In [None]:
other_meas[1]['data']