# FDA NTC ETL Pipeline
- API website: https://open.fda.gov/data/downloads/
- Objective: Collect drug information from the FDA NDC (National Drug Code Directory) API

In [1]:
# Requesting the API and getting a status code
import requests
response = requests.get("https://api.fda.gov/drug/ndc.json")
print(response.status_code)

200


In [2]:
import json

# Request the API
response = requests.get("https://api.fda.gov/drug/ndc.json")
data = response.json()

# Checking if the request was successful then returning a sample of the data
if response.status_code == 200:
    data = response.json()
    print(json.dumps(data['meta'], indent=4, sort_keys=True))
else:
    print('Failed to fetch data', response.status_code)

{
    "disclaimer": "Do not rely on openFDA to make decisions regarding medical care. While we make every effort to ensure that data is accurate, you should assume all results are unvalidated. We may limit or otherwise restrict your access to the API in line with our Terms of Service.",
    "last_updated": "2025-02-05",
    "license": "https://open.fda.gov/license/",
    "results": {
        "limit": 1,
        "skip": 0,
        "total": 128582
    },
    "terms": "https://open.fda.gov/terms/"
}


In [3]:
import psycopg2
from dotenv import load_dotenv
import os

# load credentials
load_dotenv()
db_user = os.getenv('db_user')
db_password = os.getenv('db_password')

# connect to the postgres database
conn = psycopg2.connect(
    host = 'localhost',
    database = 'postgres',
    user = db_user,
    password = db_password)

cursor = conn.cursor()

In [4]:
# enable autocommit so CREATE DATABASE can be used
conn.autocommit = True
# create the new database
cursor.execute("DROP DATABASE \"Postgres 16 - Localhost - FDA-NDC-ETL_db\"")

In [5]:
# enable autocommit so CREATE DATABASE can be used
conn.autocommit = True
# create the new database
cursor.execute("CREATE DATABASE \"Postgres 16 - Localhost - FDA-NDC-ETL_db\"")

In [27]:
# reconnect to the access the new database
conn = psycopg2.connect(
    host = 'localhost',
    database = 'Postgres 16 - Localhost - FDA-NDC-ETL_db',
    user = db_user,
    password = db_password)

cursor = conn.cursor()

In [29]:
# create the table
cursor.execute(
'''
CREATE TABLE IF NOT EXISTS FDA_Drugs_db (
    product_ndc TEXT PRIMARY KEY,
    brand_name TEXT,
    marketing_start_date DATE
                                        );
'''
)
# Commit
conn.commit()

In [30]:
# testing the first 
first_result = data['results'][0]

product_ndc = first_result['product_ndc']
brand_name = first_result['brand_name']
marketing_start_date = first_result['marketing_start_date']

In [31]:
# add data to the table
cursor.execute(
    '''
    INSERT INTO FDA_Drugs_db (product_ndc, brand_name, marketing_start_date)
    VALUES (%s, %s, %s)
    ''', 
    (product_ndc, brand_name, marketing_start_date)
)
# Commit the transaction
conn.commit()

In [34]:
# add data to the table
sql_query = '''
    select * from FDA_Drugs_db
    '''

In [35]:
# Execute the query
cursor.execute(sql_query)
# Fetch all the results
results = cursor.fetchall()

In [38]:
import pandas as pd

df = pd. DataFrame(results)

# see results
df

Unnamed: 0,0,1,2
0,79903-485,Liquid Bandage,2012-01-12
