In [1]:
import config
import sqlite3
import requests
import pandas as pd

In [7]:
# get data
EIA_API_KEY = config.EIA_API_KEY
API_ROUTE = "https://api.eia.gov/v2/international/data/?frequency=monthly&data[0]=value&start=2002-12&end=2024-06&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000"
data = None
try:
    response = requests.get(API_ROUTE, headers={"X-Api-Key": EIA_API_KEY})
    response.raise_for_status()  # Raise an HTTPError for bad responses
    data = response.json()
    data = data['response']['data']
except requests.exceptions.HTTPError as http_err:
    print(f"HTTP error occurred: {http_err}")
except Exception as err:
    print(f"Other error occurred: {err}")

In [14]:
# put the data into a pandas dataframe
if data is None:
    print("Data is None")
    exit()
df = pd.DataFrame(data)
# Sort by time period
df_sorted = df.sort_values(by='period', ascending=False)
# print(df_sorted.head())

In [15]:
# write to sqlite
conn = sqlite3.connect('energy_data.db')
cursor = conn.cursor()

cursor.execute('''CREATE TABLE IF NOT EXISTS energy_data (
                    country TEXT,
                    period TEXT,
                    energy_source TEXT,
                    value REAL,
                    unit TEXT)''')

for _, row in df.iterrows():
    value = row['value'] 
    
    if value == 'w' or value == '--':
        value = None
        
    cursor.execute('''INSERT INTO energy_data (country, period, energy_source, value, unit) 
                        VALUES (?, ?, ?, ?, ?)''', 
                        (row['countryRegionName'], row['period'], row['productName'], value, row['unit']))

conn.commit()
conn.close()

In [16]:
# query the data for a specific country and energy source
conn = sqlite3.connect('energy_data.db')
cursor = conn.cursor()

# Example query: get all data for a specific country and energy source
country = 'United States'
energy_source = 'Other liquids'
cursor.execute('''SELECT * FROM energy_data WHERE country=? AND energy_source=?''', (country, energy_source))

rows = cursor.fetchall()
for row in rows:
    print(row)

conn.close()

('United States', '2024-06', 'Other liquids', 1608.043, 'TBPD')
('United States', '2024-06', 'Other liquids', 1608.043, 'TBPD')
('United States', '2024-05', 'Other liquids', 1534.33, 'TBPD')
('United States', '2024-05', 'Other liquids', 1534.33, 'TBPD')
('United States', '2024-04', 'Other liquids', 1526.25, 'TBPD')
('United States', '2024-04', 'Other liquids', 1526.25, 'TBPD')
