In [2]:
import pandas as pd
import psycopg2
import requests
import json
import numpy as np

In [3]:
# Insert query
q_ct_business_insert = """
INSERT INTO ct_business (
    credentialid,
    name,
    type,
    fullcredentialcode,
    credentialtype,
    credentialnumber,
    credential,
    status,
    active,
    issuedate,
    effectivedate,
    expirationdate,
    address,
    city,
    state,
    zip,
    recordrefreshedon,
    statusreason,
    businessname,
    credentialsubcategory,
    dba
)

VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (credentialid) DO NOTHING;
"""

In [4]:
# Get data
response = requests.get("https://data.ct.gov/resource/ngch-56tr.json")

In [12]:
response_json = response.json()

In [13]:
# Save data in .txt file in order to not hit the endpoint repeatedly
with open("data.txt", 'w') as file:
    json.dump(response_json, file)

In [14]:
df = pd.read_json('data.txt')

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   credentialid           1000 non-null   int64  
 1   name                   1000 non-null   object 
 2   type                   1000 non-null   object 
 3   fullcredentialcode     1000 non-null   object 
 4   credentialtype         1000 non-null   object 
 5   credentialnumber       996 non-null    object 
 6   credential             1000 non-null   object 
 7   status                 1000 non-null   object 
 8   active                 1000 non-null   int64  
 9   issuedate              988 non-null    object 
 10  effectivedate          986 non-null    object 
 11  expirationdate         983 non-null    object 
 12  address                1000 non-null   object 
 13  city                   1000 non-null   object 
 14  state                  996 non-null    object 
 15  zip  

In [None]:
df.head()

In [None]:
single_business = list(df.loc[0,].values)

In [None]:
# Print out the single business
single_business

In [None]:
# Convert numpy integers to Postgres compatible integers
single_business_converted_int = [int(value) if isinstance(value, np.int64) else value for value in single_business]

In [None]:
# Check data types by printing a tuple of each item in the list
[(item, type(item)) for item in single_business_converted_int]

## Insert one value into the database

In [None]:
# Connect to database
conn = psycopg2.connect("host=127.0.0.1 dbname=ct_business_db user=postgres password=Summer2024@")
conn.set_session(autocommit=True)

# Create cursor
cur = conn.cursor()
cur.execute(q_ct_business_insert, single_business_converted_int)

In [None]:
cur.execute("""SELECT * FROM ct_business""")
rows = cur.fetchall()
for row in rows:
    print(row)

In [None]:
conn.close()