In [1]:
import pandas as pd
import numpy as np
import psycopg2 as pg
import os

In [2]:
import requests
import json
import bibtexparser
import pprint

# Postgres python
from psycopg2.extras import Json

param_dict = {
    "host"      : "127.0.0.1",
    "database"  : "ofetdb_v2",
    "user"      : "postgres",
    "password"  : "password",
    "port"      : "5432",
}

def connect(params_dict):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = pg.connect(**params_dict)
    except (Exception, pg.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn

def doi2dict(doi):
    #create url
    url = "http://dx.doi.org/" + doi
    
    #create dictionary of http bibtex headers that requests will retrieve from the url
    headers = {"accept": "application/x-bibtex"}
    
    #reqeusts information specified by bibtex from url
    r = requests.get(url, headers = headers).text    
    
    #parse the returned bibtex text to a dictionary
    #NOTE: USE bibtexparser.customization to split strings into list, etc. (https://bibtexparser.readthedocs.io/en/master/bibtexparser.html?highlight=bparser#module-bibtexparser.bparser)
    bibdata = bibtexparser.bparser.BibTexParser().parse(r)
    
    # # print doi metadata
    # pp = pprint.PrettyPrinter(indent=4)
    # pp.pprint(bibdata.entries[0])
    
    #return dict of metadata
    return bibdata.entries[0]

In [5]:
fname = '../db_feed/DPPDTT/DPPDTT_dataset_feed_D6_ALcopy.xlsx'
df = pd.read_excel(fname, sheet_name='sample')

In [7]:
df.head()

Unnamed: 0,lab_notebook_id,doi,polymer_name,mw_kda,mn_kda,dispersity,sidechain,polymer_wt_pct,solvent_iupac_name,boiling_point_C,...,ofet.environment,ofet.SS_V_decade,ofet.transfer_curve.Vds_V,ofet.transfer_curve.Vg_min_V,ofet.transfer_curve.Vg_max_V,ofet.output curve.Vg_min_V,ofet.output_curve .Vg_max_V,ofet.output_curve.Vd_min_V,ofet.output_curve.Vd_max_V,comment
0,,10.1039/C5TC02579F,DPP-DTT,299,90,3.32,C2H3-(C8H17)(C10H21),100,C6H5CL,132.0,...,Ambient,8.3,,-60,30,,,,,
1,,10.1038/srep24476,DPP-DTT,199,55,3.62,C2H3-(C8H17)(C10H21),40,C6H4CL2,180.0,...,Ambient,,,-60,20,-60.0,0.0,-60.0,0.0,
2,,10.1039/c8sm02517g,DPP-DTT,279,77,3.65,C2H3-(C8H17)(C10H21),100,CHCL3,61.2,...,Nitrogen,,,-20,10,-60.0,0.0,-60.0,0.0,
3,,10.1039/c5tc02133b,DPP-DTT,50,20,2.5,C2H3-(C8H17)(C10H21),25,CHCL3,61.2,...,Nitrogen,,-100.0,-100,100,-100.0,0.0,,,
4,RV_1.99_M317_5,10.1021/acsmaterialslett.1c00320,DPP-DTT,292,143,2.03,C2H3-(C8H17)(C10H21),100,C6H5CL,132.0,...,Nitrogen,,-80.0,-100,10,-80.0,0.0,-80.0,0.0,Backward Curve Data used


In [None]:
exp_info = dict()

In [57]:
# https://stackoverflow.com/questions/29461933/insert-python-dictionary-using-psycopg2

exp_info = dict()

for i, row in df.iterrows():
    if i%10==0:
        print(i)
    if pd.isna(row.lab_notebook_id)==False:
        exp_info['source_type'] = 'laboratory'
        s = row.lab_notebook_id
        exp_info['metadata'] = {
            'lab_notebook_id': s.split('_',2)[0]+'_'+s.split('_',2)[1],
            'first_name': 'Rahul',
            'last_name': 'Venkatesh',
            'email': 'rvenkatesh6@gatech.edu'
        }
    if pd.isna(row.doi)==False:
        exp_info['source_type'] = 'literature'
        exp_info['metadata'] = doi2dict(row.doi)
        
    conn = connect(param_dict)
    cur = conn.cursor()
    query = """
    INSERT INTO experiment_info (source_type, metadata) 
    VALUES (%s, %s)
    ON CONFLICT (metadata)
    DO NOTHING;
    """ #use the psycopg2.Json object to convert to a readable json
    data = (exp_info['source_type'], Json(exp_info['metadata']))

    try:
        cur.execute(query, data)
        conn.commit()
        print("Operation Successful")
    except (Exception, pg.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cur.close()

    conn.close()

0
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
10
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operatio

Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
100
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation Successful
Connecting to the PostgreSQL database...
Connection successful
Operation

In [49]:


conn = connect(param_dict)
cur = conn.cursor()
query = """
INSERT INTO experiment_info (source_type, metadata) 
VALUES (%s, %s)
ON CONFLICT (metadata)
DO NOTHING;
""" #use the psycopg2.Json object to convert to a readable json
data = (exp_info['source_type'], Json(exp_info['metadata']))

try:
    cur.execute(query, data)
    conn.commit()
    print("Operation Successful")
except (Exception, pg.DatabaseError) as error:
    print("Error: %s" % error)
    conn.rollback()
    cur.close()

conn.close()

Connecting to the PostgreSQL database...
Connection successful
Operation Successful


In [44]:
data = (exp_info['source_type'], Json(exp_info['metadata']))
data

('literature', <psycopg2._json.Json at 0x1e331caca90>)