### doi2dict function

Given a doi string, converts to the metadata entry for the journal article

In [5]:
import requests
import json
import pandas as pd
import bibtexparser
import pprint

def doi2dict(doi):
    #create url
    url = "http://dx.doi.org/" + doi
    
    #create dictionary of http bibtex headers that requests will retrieve from the url
    headers = {"accept": "application/x-bibtex"}
    
    #reqeusts information specified by bibtex from url
    r = requests.get(url, headers = headers).text    
    
    #parse the returned bibtex text to a dictionary
    #NOTE: USE bibtexparser.customization to split strings into list, etc. (https://bibtexparser.readthedocs.io/en/master/bibtexparser.html?highlight=bparser#module-bibtexparser.bparser)
    bibdata = bibtexparser.bparser.BibTexParser().parse(r)
    
    #return dict of metadata
    return bibdata.entries[0]

doi = '10.1021/acsami.1c20994'
doi2 = '10.1021/acscentsci.9b00476'

doidict = doi2dict(doi2)

In [6]:
doidict

{'journal': '{ACS} Central Science',
 'title': '{BigSMILES}: A Structurally-Based Line Notation for Describing Macromolecules',
 'author': 'Tzyy-Shyang Lin and Connor W. Coley and Hidenobu Mochigase and Haley K. Beech and Wencong Wang and Zi Wang and Eliot Woods and Stephen L. Craig and Jeremiah A. Johnson and Julia A. Kalow and Klavs F. Jensen and Bradley D. Olsen',
 'pages': '1523--1531',
 'number': '9',
 'volume': '5',
 'publisher': 'American Chemical Society ({ACS})',
 'month': 'sep',
 'year': '2019',
 'url': 'https://doi.org/10.1021%2Facscentsci.9b00476',
 'doi': '10.1021/acscentsci.9b00476',
 'ENTRYTYPE': 'article',
 'ID': 'Lin_2019'}

### Set up connection details for PostgreSQL

In [10]:
# Postgres python
import psycopg2 as pg
import numpy as np
from psycopg2.extras import Json 
import os
from psycopg2.extras import Json
from psycopg2.extensions import AsIs
import functools
import json
import sys

# Adapters necessary for converting python data types to PostgreSQL compatible data types 
def addapt_numpy_float64(numpy_float64):
    return AsIs(numpy_float64)

def addapt_numpy_int64(numpy_int64):
    return AsIs(numpy_int64)

def nan_to_null(f,
        _NULL=AsIs('NULL'),
        _Float=pg.extensions.Float):
    if not np.isnan(f):
        return _Float(f)
    return _NULL

pg.extensions.register_adapter(np.float64, addapt_numpy_float64)
pg.extensions.register_adapter(np.int64, addapt_numpy_int64)
pg.extensions.register_adapter(float, nan_to_null)

param_dict = {
    "host"      : "127.0.0.1",
    "database"  : "ofetdb_testenv",
    "user"      : "postgres",
    "password"  : "password",
    "port"      : "5432",
}

def connect(params_dict):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = pg.connect(**params_dict)
    except (Exception, pg.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn

In [17]:
a = Json(doidict)
print(a)

'{"journal": "{ACS} Central Science", "title": "{BigSMILES}: A Structurally-Based Line Notation for Describing Macromolecules", "author": "Tzyy-Shyang Lin and Connor W. Coley and Hidenobu Mochigase and Haley K. Beech and Wencong Wang and Zi Wang and Eliot Woods and Stephen L. Craig and Jeremiah A. Johnson and Julia A. Kalow and Klavs F. Jensen and Bradley D. Olsen", "pages": "1523--1531", "number": "9", "volume": "5", "publisher": "American Chemical Society ({ACS})", "month": "sep", "year": "2019", "url": "https://doi.org/10.1021%2Facscentsci.9b00476", "doi": "10.1021/acscentsci.9b00476", "ENTRYTYPE": "article", "ID": "Lin_2019"}'


In [19]:
import psycopg2

kwargs = {
    'database': 'test',
    'user': 'postgres',
    'password': 'password',
    'host': '127.0.0.1',
    'port': '5432'
}

# %% Create Tables for EXPERIMENT_INFO

conn = psycopg2.connect(**kwargs)

print("Connnection Successful")

cur = conn.cursor()
cur.execute(
    '''
    CREATE TABLE IF NOT EXISTS EXPERIMENT_INFO (
        exp_id              SERIAL          PRIMARY KEY,
        citation_type       VARCHAR(20),
        meta                JSONB,
        UNIQUE(citation_type, meta)
    );
    '''
)

print("Table(s) created successfully")
conn.commit()

print("Operation successful")
conn.close()

Connnection Successful
Table(s) created successfully
Operation successful
