## Python Code to Extract Data From Template and Transfer to PostGRE SQL
#### Authors : Aaron Liu, Rahul Venkatesh, Jessica Bonsu, Myeongyeon Lee 
##### Date Edited : 07-19-2023

In [None]:
## Required Packages 
import pandas as pd
import numpy as np
import psycopg2 as pg
import os
from psycopg2.extras import Json
from psycopg2.extensions import AsIs
import functools
import json
import sys
import requests
import pprint
import bibtexparser
from psycopg2 import _json

#pass variables from run file
global fpath,param_dict,warning

In [None]:

# Function to remove rows that have no value (NaN) in the second column
def remove_emptyrows(df):
    nan_mask = ~df.iloc[:,1].isna() 
    return df[nan_mask]

# Function to convert a sheet into dictionary data type
def read_sheet(filepath, sheet_name, ordering=False, usecols="A,B,D", meas=False):
    
    ## Read Sheet Information
    df = pd.read_excel(
        filepath,
        sheet_name=sheet_name,
        usecols=usecols
    )
    
    # Call Function To Remove empty rows
    df_ = remove_emptyrows(df)
    
    # Create an empty dictionary
    sheet_dict = dict()

    # To account for sheets where processing order is important
    if ordering==True:
        df_list = split_df(df_) #calls function split_df
        for i, df in enumerate(df_list):
            sheet_dict[i] = table_to_dict(df) #adds each table to the dictionary
    else:
        sheet_dict = table_to_dict(df_)
    
    return sheet_dict #returns a dataframe

def split_df(df_):
    #For sheets where processing order is important, this function finds tables with '#' in the name of the first column title and turns it into a df
    
    split_idx_mask = df_.iloc[:,0].str.contains('#') #Find the object splits
    w = df_[split_idx_mask].index.values
    
    df_list = []
    
    for i in range(len(w)-1):
        next_df = df_.loc[w[i]+1:w[i+1]-1,:]
        df_list.append(next_df)    
    
    return df_list

def table_to_dict(df_):
    
    main_mask = pd.isna(df_.JSON) # it flags rows that dont have a value for JSON column
    step_dict = dict(df_[main_mask].iloc[:,:2].values) # Stores rows that have "NaN" for JSON column in df_ as dict
    
    for json_field in pd.unique(df_.JSON): #read through unique JSON types (e.g. NaN, meta or data)

        if pd.isna(json_field): #ignore fields with JSON type as NaN
            continue
            
        # dictionary to store information with JSON type "data"
        elif json_field=='data':
            data_mask = df_.JSON=='data'
            
            # lump key:value pairs into a second nested data dict
            step_dict['data'] = dict()
            
            for i, s in df_[data_mask].iterrows():
                step_dict['data'][s[s.index[0]]] = s['value':'error_type'].dropna().to_dict()
        else:
            json_mask = df_.JSON==json_field
            step_dict[json_field] = dict(df_[json_mask].iloc[:,:2].values) # creates a new key for JSON types like meta and params and adds its corresponding values to it 

    return step_dict

In [None]:

# Adapters necessary for converting python data types to PostgreSQL compatible data types 
def register_pg_adapters():
    def addapt_numpy_float64(numpy_float64):
        return AsIs(numpy_float64)

    def addapt_numpy_int64(numpy_int64):
        return AsIs(numpy_int64)

    def nan_to_null(f, _NULL=AsIs('NULL'), _Float=pg.extensions.Float):
        if not np.isnan(f):
            return _Float(f)
        return _NULL

    pg.extensions.register_adapter(np.float64, addapt_numpy_float64)
    pg.extensions.register_adapter(np.int64, addapt_numpy_int64)
    pg.extensions.register_adapter(float, nan_to_null)


def connect(params_dict):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
     #   print('Connecting to the PostgreSQL database...')
        conn = pg.connect(**params_dict)
    except (Exception, pg.DatabaseError) as error:
        print(error)
        sys.exit(1) 
 #   print("Connection successful")
    return conn

def pg_query(sql, tup):
    fetched = None 
    try:
        # Database connection
        conn = connect(param_dict)
        cur = conn.cursor()
        
                
        # Pass SQL query, using string and placeholders
        cur.execute(sql, tup)
        
        # Fetch result
        fetched = cur.fetchone()[0]
        
        # Commit result
        conn.commit()
    #    print("Operation Successful")

        cur.close()
        conn.close()
        
    except (Exception, pg.DatabaseError) as error:
        # If database connection unsuccessful, then close connection 
        print("Error: %s" % error)
        conn.rollback()
        cur.close()
        conn.close()
    
    return fetched #return query result

def convert_entry(entry_dict):
    
    #This function reads a dictionary and extracts the column names and values from it
    
    pg_entry = entry_dict
    for key in pg_entry.keys():
        if type(pg_entry[key])==dict:
            pg_entry[key]=Json(pg_entry[key])
    columns = pg_entry.keys()
    values = [pg_entry[column] for column in columns]
    
    return pg_entry, columns, values

# Given a valid doi string, return a dictionary of digital object information.
def doi2dict(doi):
    url = "http://dx.doi.org/" + doi
    headers = {"accept": "application/x-bibtex"}
    r = requests.get(url, headers=headers).text
    bibdata = bibtexparser.bparser.BibTexParser().parse(r)
    return bibdata.entries[0]




## Update filepath and param_dict 

In [None]:
register_pg_adapters()

#Storing each sheet in the template file as a dictionary
exp_info = read_sheet(fpath, 'Data Origin')
solution_makeup = read_sheet(fpath, 'Solution Makeup', ordering=True)
solution_processing = read_sheet(fpath, 'Solution Treatment', ordering=True)
device_fab = read_sheet(fpath, 'Device Fabrication')
substrate_pretreat = read_sheet(fpath, 'Substrate Pretreat', ordering=True)
coating_process = read_sheet(fpath, 'Coating Process')
post_process = read_sheet(fpath, 'Post-Processing', ordering=True)
device_meas = read_sheet(fpath, 'Device Measurement', usecols="A:G", ordering=True)
other_meas = read_sheet(fpath, 'Other Measurements', usecols="A:G", ordering=True)


In [None]:
import psycopg2

# Define a list of tables and their primary key columns (only considering tables that have a primary key _id as type SERIAL)
tables = [
    {'name': 'EXPERIMENT_INFO', 'pk_column': 'exp_id'},
    {'name': 'SOLUTION', 'pk_column': 'solution_id'},
    {'name': 'POLYMER', 'pk_column': 'polymer_id'},
    {'name': 'SOLUTION_TREATMENT', 'pk_column': 'solution_treatment_id'},
    {'name': 'SOLUTION_TREATMENT_STEP', 'pk_column': 'solution_treatment_step_id'},
    {'name': 'DEVICE_FABRICATION', 'pk_column': 'device_fab_id'},
    {'name': 'SUBSTRATE_PRETREAT', 'pk_column': 'substrate_pretreat_id'},
    {'name': 'SUBSTRATE_PRETREAT_STEP', 'pk_column': 'substrate_pretreat_step_id'},
    {'name': 'FILM_DEPOSITION', 'pk_column': 'film_deposition_id'},
    {'name': 'POSTPROCESS', 'pk_column': 'postprocess_id'},
    {'name': 'POSTPROCESS_STEP', 'pk_column': 'postprocess_step_id'},
    {'name': 'OFET_PROCESS', 'pk_column': 'process_id'},
    {'name': 'SAMPLE', 'pk_column': 'sample_id'},
    {'name': 'MEASUREMENT', 'pk_column': 'measurement_id'}
]

# Connect to the PostgreSQL database
conn = pg.connect(**param_dict)

# Iterate over each table and update the primary key serial
for table in tables:
    table_name = table['name']
    pk_column = table['pk_column']
    
    cursor = conn.cursor()
    
    # Get the current maximum primary key value from the table
    cursor.execute(f"SELECT MAX({pk_column}) FROM {table_name}")
    result = cursor.fetchone()
    max_id = result[0]
    
    # Increment the maximum primary key value by one
    next_id = max_id + 1
    
    # Update the serial before entering the next record
    cursor.execute(f"ALTER SEQUENCE {table_name.lower()}_{pk_column}_seq RESTART WITH {next_id}")
    
    conn.commit()
    cursor.close()

# Close the database connection
conn.close()


### Reading and Extracting Data From Sheets in Template

### Transferring Information From Template To PostgreSQL

### 1.Checking and Storing Experiment Information

In [None]:
def store_experimental_info(exp_info):

    #if publication type is literature fix the date issue
    def custom_encoder(obj):
        if isinstance(obj, datetime.datetime):
            return obj.strftime('%m/%d/%Y')
        if isinstance(obj, psycopg2.extras.Json):
            return obj.adapted
        raise TypeError("Object of type {} is not JSON serializable".format(type(obj)))

    # Convert the dictionary to JSON using the custom encoder
    exp_info = json.dumps(exp_info, default=custom_encoder)

    try:
        # Parse the JSON string into a dictionary
        exp_info = json.loads(exp_info)

        # Now exp_info_dict is a dictionary
        # print(exp_info)
    except json.JSONDecodeError as e:
        print("Error decoding JSON:", str(e))

    if exp_info['citation_type'] == 'literature':
        doi = exp_info['meta']['doi']
        doi_info = doi2dict(doi)  # Fetch additional information using DOI

        # Add the additional information to the existing dictionary
        exp_info['meta'].update(doi_info)

    exp_pg_entry, exp_columns, exp_values = convert_entry(exp_info)
    sql = '''
        INSERT INTO experiment_info (%s) 
        VALUES %s
        ON CONFLICT (citation_type, meta) DO UPDATE
        SET (%s) = %s
        RETURNING exp_id
        '''
    tup = (AsIs(','.join(exp_columns)), tuple(exp_values), AsIs(','.join(exp_columns)), tuple(exp_values))
    exp_id = pg_query(sql, tup)
    
    # Move the print statement here, after obtaining exp_id
    print("Experiment information stored successfully with id : {}".format(exp_id))
    
    return exp_id  # Now return exp_id after printing


### 2.Checking and Storing Solution Information (Polymer, Solvent, Solution) including blends



In [None]:
def store_solution_info(solution_makeup):
    pg_entry_solution_makeup, columns_solution_makeup, values_solution_makeup = convert_entry(solution_makeup)
    solution_data = values_solution_makeup[0].adapted

    # Storing Solvent data - accounting for multiple solvents
    solvent_data_filtered = [json_obj for json_obj in values_solution_makeup if json_obj.adapted.get("entity_type") == "solvent"]

    # Convert psycopg2._json.Json objects to JSON strings
    solvent_data = [json_obj.adapted for json_obj in solvent_data_filtered]

    # Storing Polymer data - accounting for multiple polymers
    polymer_data_filtered = [json_obj for json_obj in values_solution_makeup if json_obj.adapted.get("entity_type") == "polymer"]

    # Convert psycopg2._json.Json objects to JSON strings
    polymer_data = [json_obj.adapted for json_obj in polymer_data_filtered]

    ## if there is no value for PDI or Mn for the Insulating polymer you can add empty values in this step. if there is do nothing.
    for data in polymer_data:
        if data['entity_type'] == 'polymer':
        # Check if 'mn' and 'dispersity' are missing and set them to None
            if 'mn' not in data:
                data['mn'] = None
            if 'dispersity' not in data:
                data['dispersity'] = None
    
    # Storing Solution Makeup data
    solution_makeup_data = []
    solution_makeup_data.append(solution_data)
    solution_makeup_data.append(solvent_data)
    solution_makeup_data.append(polymer_data)

    # Extract solution information
    solution_data = solution_makeup_data[0]
    concentration = solution_data['concentration']

    # Extract solvent information
    solvent_data = solution_makeup_data[1]
    solvent_ids = []
    vol_fracs = []
    for solvent in solvent_data:
        pubchem_cid = solvent['pubchem_cid']
        iupac_name = solvent['iupac_name']
        vol_frac = solvent['vol_frac']
        solvent_ids.append((pubchem_cid, iupac_name))
        vol_fracs.append(vol_frac)

    # Extract polymer information
    polymer_data = solution_makeup_data[2]
    polymer_ids = []
    wt_fracs = []
    for polymer in polymer_data:
        common_name = polymer['common_name']
        iupac_name = polymer['iupac_name']
        mn = polymer['mn']
        mw = polymer['mw']
        dispersity = polymer['dispersity']
        wt_frac = polymer['wt_frac']
        meta = json.dumps(polymer['meta'])
        polymer_ids.append((common_name, iupac_name, mn, mw, dispersity, meta))
        wt_fracs.append(wt_frac)

    return concentration, solvent_ids, vol_fracs, polymer_ids, wt_fracs


In [None]:
def insert_solution_info(param_dict, solution_makeup):
    concentration, solvent_ids, vol_fracs, polymer_ids, wt_fracs = store_solution_info(solution_makeup)

    conn = pg.connect(**param_dict)

    with conn:
        with conn.cursor() as cursor:
            try:
                # Check if the unique combination exists
                select_solution_id_sql = '''
                    SELECT sm.solution_id
                    FROM SOLUTION_MAKEUP_SOLVENT sms
                    JOIN SOLUTION_MAKEUP_POLYMER smp ON sms.solution_id = smp.solution_id
                    JOIN SOLVENT s ON sms.solvent_id = s.pubchem_cid
                    JOIN POLYMER p ON smp.polymer_id = p.polymer_id
                    JOIN SOLUTION sm ON sms.solution_id = sm.solution_id
                    WHERE sm.concentration = %s
                    AND (s.pubchem_cid, s.iupac_name) IN %s
                    AND (p.common_name, p.iupac_name, p.mn, p.mw, p.dispersity, p.meta) IN %s
                    GROUP BY sm.solution_id
                    HAVING COUNT(DISTINCT smp.polymer_id) = %s
                    AND COUNT(DISTINCT sms.solvent_id) = %s
                    AND ARRAY_AGG(sms.vol_frac) = %s::double precision[]
                    AND ARRAY_AGG(smp.wt_frac) = %s::double precision[]
                '''

                cursor.execute(select_solution_id_sql, (concentration, tuple(solvent_ids), tuple(polymer_ids), len(polymer_ids), len(solvent_ids), vol_fracs, wt_fracs))
                existing_solution = cursor.fetchone()

                # Checking if there is an existing solution
                if existing_solution:
                    solution_id = existing_solution[0]
                else:
                    # Insert into SOLUTION table
                    insert_solution_sql = '''
                        INSERT INTO SOLUTION (concentration)
                        VALUES (%s)
                        RETURNING solution_id
                    '''
                    cursor.execute(insert_solution_sql, (concentration,))
                    solution_id = cursor.fetchone()[0]

                # Reading Solvent data
                for solvent_id, vol_frac in zip(solvent_ids, vol_fracs):
                    pubchem_cid, iupac_name = solvent_id

                    # Check if the solvent exists
                    select_solvent_id_sql = '''
                        SELECT pubchem_cid
                        FROM SOLVENT
                        WHERE iupac_name = %s
                    '''
                    cursor.execute(select_solvent_id_sql, (iupac_name,))
                    existing_solvent = cursor.fetchone()

                    if existing_solvent:
                        solvent_id = existing_solvent[0]
                    else:
                        # Insert into SOLVENT table
                        insert_solvent_sql = '''
                            INSERT INTO SOLVENT (pubchem_cid, iupac_name)
                            VALUES (%s, %s)
                            RETURNING pubchem_cid
                        '''
                        cursor.execute(insert_solvent_sql, (pubchem_cid, iupac_name))
                        solvent_id = cursor.fetchone()[0]

                    # Insert or update SOLUTION_MAKEUP_SOLVENT table
                    insert_solution_makeup_solvent_sql = '''
                        INSERT INTO SOLUTION_MAKEUP_SOLVENT (solution_id, solvent_id, vol_frac)
                        VALUES (%s, %s, %s)
                        ON CONFLICT (solution_id, solvent_id, vol_frac) DO UPDATE
                        SET solution_id = SOLUTION_MAKEUP_SOLVENT.solution_id,
                            solvent_id = SOLUTION_MAKEUP_SOLVENT.solvent_id,
                            vol_frac = SOLUTION_MAKEUP_SOLVENT.vol_frac
                    '''
                    cursor.execute(insert_solution_makeup_solvent_sql, (solution_id, solvent_id, vol_frac))

                # Reading the polymer data
                for polymer_id, wt_frac in zip(polymer_ids, wt_fracs):
                    common_name, iupac_name, mn, mw, dispersity, meta = polymer_id

                    # Check if the polymer exists
                    select_polymer_id_sql = '''
                        SELECT polymer_id
                        FROM POLYMER
                        WHERE common_name = %s
                        AND iupac_name = %s
                        AND mn = %s
                        AND mw = %s
                        AND dispersity = %s
                        AND meta = %s::jsonb
                    '''
                    cursor.execute(select_polymer_id_sql, (common_name, iupac_name, mn, mw, dispersity, meta))
                    existing_polymer = cursor.fetchone()

                    if existing_polymer:
                        polymer_id = existing_polymer[0]
                    else:
                        # Insert into POLYMER table
                        insert_polymer_sql = '''
                            INSERT INTO POLYMER (common_name, iupac_name, mn, mw, dispersity, meta)
                            VALUES (%s, %s, %s, %s, %s, %s::jsonb)
                            RETURNING polymer_id
                        '''
                        cursor.execute(insert_polymer_sql, (common_name, iupac_name, mn, mw, dispersity, meta))
                        polymer_id = cursor.fetchone()[0]

                    # Insert or update SOLUTION_MAKEUP_POLYMER table
                    insert_solution_makeup_polymer_sql = '''
                        INSERT INTO SOLUTION_MAKEUP_POLYMER (solution_id, polymer_id, wt_frac)
                        VALUES (%s, %s, %s)
                        ON CONFLICT (solution_id, polymer_id, wt_frac) DO UPDATE
                        SET solution_id = SOLUTION_MAKEUP_POLYMER.solution_id,
                            polymer_id = SOLUTION_MAKEUP_POLYMER.polymer_id,
                            wt_frac = SOLUTION_MAKEUP_POLYMER.wt_frac
                    '''
                    cursor.execute(insert_solution_makeup_polymer_sql, (solution_id, polymer_id, wt_frac))

                conn.commit()
                print("Solution makeup information saved successfully with id: {}".format(solution_id))

                return solution_id

            except Exception as e:
                conn.rollback()
                print("An error occurred:", str(e))

    # Close the database connection
    conn.close()


### 3. Checking and Storing Device Information


In [None]:
def store_device_fabrication(device_fab, param_dict):
    # Start transaction
    conn = pg.connect(**param_dict)

    try:
        with conn:
            with conn.cursor() as cursor:
                # Convert device fabrication entry to columns and values
                device_fab_pg_entry, device_fab_columns, device_fab_values = convert_entry(device_fab)

                # If meta information is missing, add it as an empty dictionary
                device_fab_columns_list = list(device_fab_columns)
                if 'meta' not in device_fab_columns_list:
                    device_fab_columns_list.append('meta')
                    device_fab_values.append({})

                # Convert values to JSON strings if they are dictionaries
                device_fab_values = [json.dumps(value) if isinstance(value, dict) else value for value in device_fab_values]

                # Insert into DEVICE_FABRICATION table
                sql = '''
                    INSERT INTO DEVICE_FABRICATION (%s) 
                    VALUES %s
                    ON CONFLICT (params, meta) DO UPDATE
                    SET (%s) = %s
                    RETURNING device_fab_id
                '''
                tup = (AsIs(','.join(device_fab_columns_list)), tuple(device_fab_values), AsIs(','.join(device_fab_columns_list)), tuple(device_fab_values))
                cursor.execute(sql, tup)
                device_fab_id = cursor.fetchone()[0]

                print("Device fabrication information saved successfully with id: {}".format(device_fab_id))

    except Exception as e:
        print("An error occurred:", str(e))

    # Close the database connection
    conn.close()
    
    return device_fab_id


### 4. Checking and Storing Film Deposition Information


In [None]:
def store_film_deposition(coating_process, param_dict):
    # Start transaction
    conn = pg.connect(**param_dict)

    try:
        with conn:
            with conn.cursor() as cursor:
                # Convert coating process entry to columns and values
                coating_process_pg_entry, coating_process_columns, coating_process_values = convert_entry(coating_process)

                # If meta information is missing, add it as an empty dictionary
                coating_process_columns_list = list(coating_process_columns)
                if 'meta' not in coating_process_columns_list:
                    coating_process_columns_list.append('meta')
                    coating_process_values.append({})

                # Convert values to JSON strings if they are dictionaries
                coating_process_values = [json.dumps(value) if isinstance(value, dict) else value for value in coating_process_values]

                # Insert into FILM_DEPOSITION table
                sql = '''
                    INSERT INTO FILM_DEPOSITION (%s) 
                    VALUES %s
                    ON CONFLICT (deposition_type, params, meta) DO UPDATE
                    SET (%s) = %s
                    RETURNING film_deposition_id
                '''
                tup = (AsIs(','.join(coating_process_columns_list)), tuple(coating_process_values), AsIs(','.join(coating_process_columns_list)), tuple(coating_process_values))
                cursor.execute(sql, tup)
                film_deposition_id = cursor.fetchone()[0]

                print("Film deposition information saved successfully with id: {}".format(film_deposition_id))

    except Exception as e:
        print("An error occurred:", str(e))

    # Close the database connection
    conn.close()
    
    return film_deposition_id


### 5. Checking and Storing the subprocess recipes (Solution Treatment, Substrate Pretreatment, Post Process)


###### 5.1 SOLUTION TREATMENT

In [None]:
def store_solution_treatment(param_dict, solution_processing):
    # Function to insert data into SOLUTION_TREATMENT_STEP table
    def insert_into_solution_treatment_step(cur, treatment_type, params, meta):
        # Check if the record already exists
        cur.execute(
            "SELECT solution_treatment_step_id FROM SOLUTION_TREATMENT_STEP WHERE treatment_type = %s AND params = %s::jsonb AND meta = %s::jsonb",
            (treatment_type, params, meta)
        )
        existing_id = cur.fetchone()

        if existing_id:
            solution_treatment_step_id = existing_id[0]
        else:
            # Insert data into SOLUTION_TREATMENT_STEP table
            cur.execute(
                "INSERT INTO SOLUTION_TREATMENT_STEP (treatment_type, params, meta) VALUES (%s, %s::jsonb, %s::jsonb) RETURNING solution_treatment_step_id",
                (treatment_type, params, meta)
            )
            solution_treatment_step_id = cur.fetchone()[0]

        return solution_treatment_step_id

    # Function to insert data into SOLUTION_TREATMENT_ORDER table
    def insert_into_solution_treatment_order(cur, solution_treatment_id, process_order, solution_treatment_step_id):
        # Check if the record already exists
        cur.execute(
            """
            SELECT solution_treatment_id
            FROM SOLUTION_TREATMENT_ORDER
            WHERE solution_treatment_id = %s
            AND process_order = %s
            AND solution_treatment_step_id = %s
            """,
            (solution_treatment_id, process_order, solution_treatment_step_id)
        )
        existing_combination = cur.fetchone()

        if not existing_combination:
            # Insert new record into SOLUTION_TREATMENT_ORDER table
            cur.execute(
                "INSERT INTO SOLUTION_TREATMENT_ORDER (solution_treatment_id, process_order, solution_treatment_step_id) VALUES (%s, %s, %s)",
                (solution_treatment_id, process_order, solution_treatment_step_id)
            )

        return solution_treatment_id

    # Establish a connection to the PostgreSQL database
    conn = pg.connect(**param_dict)

    # Create a cursor object to interact with the database
    cur = conn.cursor()

    solution_treatment_id = None

    for treatment in solution_processing.values():
        # Convert params and meta to JSON format
        params_json = json.dumps(treatment.get('params', {}))
        meta_json = json.dumps(treatment.get('meta', {}))

        # Insert data into SOLUTION_TREATMENT_STEP table
        solution_treatment_step_id = insert_into_solution_treatment_step(cur, treatment['treatment_type'], params_json, meta_json)

        if solution_treatment_id is None:
            # Check if the record already exists in SOLUTION_TREATMENT table
            cur.execute(
                "SELECT solution_treatment_id FROM SOLUTION_TREATMENT WHERE solution_treatment_id IN (SELECT solution_treatment_id FROM SOLUTION_TREATMENT_ORDER WHERE solution_treatment_step_id = %s)",
                (solution_treatment_step_id,)
            )
            existing_id = cur.fetchone()

            if existing_id:
                solution_treatment_id = existing_id[0]
            else:
                # Insert data into SOLUTION_TREATMENT table
                cur.execute(
                    "INSERT INTO SOLUTION_TREATMENT (solution_treatment_id) VALUES (DEFAULT) RETURNING solution_treatment_id"
                )
                solution_treatment_id = cur.fetchone()[0]

        # Insert data into SOLUTION_TREATMENT_ORDER table
        solution_treatment_id = insert_into_solution_treatment_order(cur, solution_treatment_id, treatment['process_step'], solution_treatment_step_id)

    # Commit the changes to the database
    print("Solution treatment saved successfully with id : {}".format(solution_treatment_id))
    conn.commit()

    # Close the cursor and connection
    cur.close()
    conn.close()
    
    return solution_treatment_id


###### 5.2 SUBSTRATE PRETREATMENT


In [None]:

def store_substrate_pretreatment(param_dict, substrate_pretreat):
    # Function to insert data into SUBSTRATE_PRETREAT_STEP table
    def insert_into_substrate_pretreat_step(cur, treatment_type, params, meta):
        # Check if the record already exists
        cur.execute(
            "SELECT substrate_pretreat_step_id FROM SUBSTRATE_PRETREAT_STEP WHERE treatment_type = %s AND params = %s::jsonb AND meta = %s::jsonb",
            (treatment_type, params, meta)
        )
        existing_id = cur.fetchone()

        if existing_id:
            substrate_pretreat_step_id = existing_id[0]
        else:
            # Insert data into SUBSTRATE_PRETREAT_STEP table
            cur.execute(
                "INSERT INTO SUBSTRATE_PRETREAT_STEP (treatment_type, params, meta) VALUES (%s, %s::jsonb, %s::jsonb) RETURNING substrate_pretreat_step_id",
                (treatment_type, params, meta)
            )
            substrate_pretreat_step_id = cur.fetchone()[0]

        return substrate_pretreat_step_id

    # Function to insert data into SUBSTRATE_PRETREAT_ORDER table
    def insert_into_substrate_pretreat_order(cur, substrate_pretreat_id, process_order, substrate_pretreat_step_id):
        # Check if the record already exists
        cur.execute(
            """
            SELECT substrate_pretreat_id
            FROM SUBSTRATE_PRETREAT_ORDER
            WHERE substrate_pretreat_id = %s
            AND process_order = %s
            AND substrate_pretreat_step_id = %s
            """,
            (substrate_pretreat_id, process_order, substrate_pretreat_step_id)
        )
        existing_combination = cur.fetchone()

        if not existing_combination:
            # Insert new record into SUBSTRATE_PRETREAT_ORDER table
            cur.execute(
                "INSERT INTO SUBSTRATE_PRETREAT_ORDER (substrate_pretreat_id, process_order, substrate_pretreat_step_id) VALUES (%s, %s, %s)",
                (substrate_pretreat_id, process_order, substrate_pretreat_step_id)
            )

        return substrate_pretreat_id

    # Establish a connection to the PostgreSQL database
    conn = pg.connect(**param_dict)

    # Create a cursor object to interact with the database
    cur = conn.cursor()

    substrate_pretreat_id = None

    for treatment in substrate_pretreat.values():
        # Convert params and meta to JSON format
        params_json = json.dumps(treatment.get('params', {}))
        meta_json = json.dumps(treatment.get('meta', {}))

        # Insert data into SUBSTRATE_PRETREAT_STEP table
        substrate_pretreat_step_id = insert_into_substrate_pretreat_step(cur, treatment['treatment_type'], params_json, meta_json)

        if substrate_pretreat_id is None:
            # Check if the record already exists in SUBSTRATE_PRETREAT table
            cur.execute(
                "SELECT substrate_pretreat_id FROM SUBSTRATE_PRETREAT WHERE substrate_pretreat_id IN (SELECT substrate_pretreat_id FROM SUBSTRATE_PRETREAT_ORDER WHERE substrate_pretreat_step_id = %s)",
                (substrate_pretreat_step_id,)
            )
            existing_id = cur.fetchone()

            if existing_id:
                substrate_pretreat_id = existing_id[0]
            else:
                # Insert data into SUBSTRATE_PRETREAT table
                cur.execute(
                    "INSERT INTO SUBSTRATE_PRETREAT (substrate_pretreat_id) VALUES (DEFAULT) RETURNING substrate_pretreat_id"
                )
                substrate_pretreat_id = cur.fetchone()[0]

        # Insert data into SUBSTRATE_PRETREAT_ORDER table
        substrate_pretreat_id = insert_into_substrate_pretreat_order(cur, substrate_pretreat_id, treatment['process_step'], substrate_pretreat_step_id)

    # Commit the changes to the database
    print("Substrate pretreatment saved successfully with id : {}".format(substrate_pretreat_id))
    conn.commit()

    # Close the cursor and connection
    cur.close()
    conn.close()
    
    return substrate_pretreat_id 


###### 5.3 POST PROCESSING TREATMENT

In [None]:
def store_post_process_treatment(param_dict, post_process):
    # Function to insert data into POSTPROCESS_STEP table
    def insert_into_postprocess_step(cur, treatment_type, params, meta):
        # Check if the record already exists
        cur.execute(
            "SELECT postprocess_step_id FROM POSTPROCESS_STEP WHERE treatment_type = %s AND params = %s::jsonb AND meta = %s::jsonb",
            (treatment_type, params, meta)
        )
        existing_id = cur.fetchone()

        if existing_id:
            postprocess_step_id = existing_id[0]
        else:
            # Insert data into POSTPROCESS_STEP table
            cur.execute(
                "INSERT INTO POSTPROCESS_STEP (treatment_type, params, meta) VALUES (%s, %s::jsonb, %s::jsonb) RETURNING postprocess_step_id",
                (treatment_type, params, meta)
            )
            postprocess_step_id = cur.fetchone()[0]

        return postprocess_step_id

    # Function to insert data into POSTPROCESS_ORDER table
    def insert_into_postprocess_order(cur, postprocess_id, process_order, postprocess_step_id):
        cur.execute(
            "SELECT postprocess_id FROM POSTPROCESS_ORDER WHERE postprocess_id = %s AND process_order = %s",
            (postprocess_id, process_order)
        )
        existing_record = cur.fetchone()
        
        if existing_record:
            cur.execute(
                "UPDATE POSTPROCESS_ORDER SET postprocess_step_id = %s WHERE postprocess_id = %s AND process_order = %s",
                (postprocess_step_id, postprocess_id, process_order)
            )
        else:
            cur.execute(
                "INSERT INTO POSTPROCESS_ORDER (postprocess_id, process_order, postprocess_step_id) VALUES (%s, %s, %s)",
                (postprocess_id, process_order, postprocess_step_id)
            )

    # Establish a connection to the PostgreSQL database
    conn = pg.connect(**param_dict)

    try:
        # Create a cursor object to interact with the database
        with conn.cursor() as cur:
            postprocess_id = None

            for treatment in post_process.values():
                # Convert params and meta to JSON format
                params_json = json.dumps(treatment.get('params', {}))
                meta_json = json.dumps(treatment.get('meta', {}))

                # Insert data into POSTPROCESS_STEP table
                postprocess_step_id = insert_into_postprocess_step(cur, treatment['treatment_type'], params_json, meta_json)

                if postprocess_id is None:
                    # Check if the record already exists in POSTPROCESS table
                    cur.execute(
                        "SELECT postprocess_id FROM POSTPROCESS WHERE postprocess_id IN (SELECT postprocess_id FROM POSTPROCESS_ORDER WHERE postprocess_step_id = %s)",
                        (postprocess_step_id,)
                    )
                    existing_id = cur.fetchone()

                    if existing_id:
                        postprocess_id = existing_id[0]
                    else:
                        # Insert data into POSTPROCESS table
                        cur.execute(
                            "INSERT INTO POSTPROCESS (postprocess_id) VALUES (DEFAULT) RETURNING postprocess_id"
                        )
                        postprocess_id = cur.fetchone()[0]

                # Insert data into POSTPROCESS_ORDER table
                insert_into_postprocess_order(cur, postprocess_id, treatment['process_step'], postprocess_step_id)

        # Commit the changes to the database
        conn.commit()

        print("Post Process treatment saved successfully with id: {}".format(postprocess_id))
        return postprocess_id
    except Exception as e:
        # Rollback the transaction in case of any exception
        conn.rollback()
        raise e
    finally:
        # Close the connection
        conn.close()


### 6. Checking and Storing information to the OFET_PROCESS TABLE and generating process_id


In [None]:

solution_id= insert_solution_info(param_dict, solution_makeup)
solution_treatment_id= store_solution_treatment(param_dict, solution_processing)
device_fab_id=store_device_fabrication(device_fab, param_dict)
substrate_pretreat_id=store_substrate_pretreatment(param_dict, substrate_pretreat)
film_deposition_id=store_film_deposition(coating_process, param_dict)
postprocess_id=store_post_process_treatment(param_dict, post_process)


In [None]:
#insert information into OFET_Process table 
def insert_ofet_process(solution_id,solution_treatment_id,device_fab_id,substrate_pretreat_id,film_deposition_id,postprocess_id):
    ofet_process_columns = ['solution_id','solution_treatment_id','device_fab_id','substrate_pretreat_id','film_deposition_id','postprocess_id']
    ofet_process_values = [solution_id,solution_treatment_id,device_fab_id,substrate_pretreat_id,film_deposition_id,postprocess_id]

    sql = '''
        INSERT INTO ofet_process (%s) 
        VALUES %s
        ON CONFLICT (solution_id, solution_treatment_id, device_fab_id, substrate_pretreat_id, film_deposition_id, postprocess_id) DO UPDATE
        SET (%s) = %s
        RETURNING process_id

        '''
    tup = (AsIs(','.join(ofet_process_columns)), tuple(ofet_process_values), AsIs(','.join(ofet_process_columns)), tuple(ofet_process_values))

    process_id = pg_query(sql, tup)
    
    return process_id

    print("All processing information saved successfully with id : {}".format(process_id))


### 7. Checking and Storing information to the SAMPLE TABLE and generating sample_id


In [None]:
exp_id=store_experimental_info(exp_info)
process_id = insert_ofet_process(solution_id,solution_treatment_id,device_fab_id,substrate_pretreat_id,film_deposition_id,postprocess_id)


In [None]:
#insert information into sample table 
def insert_into_sample_table(pg_query, exp_id, process_id):
    sample_columns = ['exp_id', 'process_id','meta']
    sample_values = [exp_id, process_id,"{}"] 

    sql = '''
        INSERT INTO sample (%s) 
        VALUES %s
        ON CONFLICT (exp_id, process_id, meta) DO UPDATE        
        SET (%s) = %s
        RETURNING sample_id
        '''

    tup = (AsIs(','.join(sample_columns)), tuple(sample_values), AsIs(','.join(sample_columns)), tuple(sample_values))

    sample_id = pg_query(sql, tup)
    
    return sample_id

    print("sample_id is: {}".format(sample_id))

In [None]:
sample_id=insert_into_sample_table(pg_query, exp_id, process_id)

### 8. Checking and Storing the measurement information 

In [None]:
#### Storing  Measurement Information 
def insert_measurement(measurement):
        measurement_pg_entry, measurement_columns, measurement_values = convert_entry(measurement)

        measurement_columns_list = list(measurement_columns)  # Convert dict_keys to a list
        measurement_columns_list.insert(0, 'sample_id')
        measurement_values.insert(0, sample_id)

        # If meta information is missing
        if 'meta' not in measurement_columns_list:
            measurement_columns_list.append('meta')
            measurement_values.append({})

        measurement_values = [json.dumps(value) if isinstance(value, dict) else value for value in measurement_values]

        sql = '''
            INSERT INTO measurement (%s) 
            VALUES %s
            ON CONFLICT (sample_id, measurement_type, data, meta) DO UPDATE
            SET (%s) = %s
            RETURNING measurement_id
            '''

        tup = (
            AsIs(','.join(measurement_columns_list)),
            tuple(measurement_values),
            AsIs(','.join(measurement_columns_list)),
            tuple(measurement_values)
        )

        measurement_id = pg_query(sql, tup)
        
        return measurement_id
    
def insert_into_measurement_table(pg_query, sample_id, device_meas, other_meas):
    device_measurement_id = insert_measurement(device_meas[0])
    print("Device measurement information saved successfully with id: {}".format(device_measurement_id))

    for measurement in other_meas.values():
        other_measurement_id = insert_measurement(measurement)
        print("Other measurement information saved successfully with id: {}".format(other_measurement_id))
        
    print("Record stored successfully")

In [None]:
measurement_id=insert_into_measurement_table(pg_query, sample_id, device_meas, other_meas)
