In [1]:
#Import the required modules
import os
from dotenv import load_dotenv

import requests
import json

import pandas as pd
import numpy as np

import pickle

In [2]:
#Load secret api_key from .env file
load_dotenv()

api_key = os.getenv('eia_api_key')
#print('eia_api_key: ', api_key)

In [3]:
# List EIA feature names.
FEATURE_NAMES = [
    'WTI_Price',
    'Oil_Production_OPEC',
    'Oil_Production_nonOPEC',
    'Oil_Production_World',
    'Henry_Hub_NG_Price',
    'Oil_Production_US',
    'Petrol_Consumption_OECD',
    'Petrol_Consumption_nonOECD',
    'US_CPI',
    'US_PPI',
    'US_PPI_Petroleum',
    'US_PMI',
    'Petroleum_Inventory_OECD',
    'Crude_Oil_Inventory_Total',
    'Crude_Oil_Inventory_SPR',
    'Crude_Oil_Inventory_nonSPR',
    'Refiner_Wholesale_Gasoline_Price',
    'Refiner_Wholesale_Diesel_Price',
    'Brent_Price']

# List EAT Series IDs for the features
FEATURE_KEYS = [
    'STEO.WTIPUUS.M',
    'STEO.COPR_OPEC.M',
    'STEO.PAPR_NONOPEC.M',
    'INTL.55-1-WORL-TBPD.M',
    'NG.RNGWHHD.M',
    'STEO.COPRPUS.M',
    'STEO.PATC_OECD.M',
    'STEO.PATC_NON_OECD.M',
    'STEO.CICPIUS.M',
    'STEO.WPCPIUS.M',
    'STEO.WP57IUS.M',
    'STEO.ZOMNIUS.M',
    'STEO.PASC_OECD_T3.M',
    'STEO.PASXPUS.M',
    'STEO.COSQPUS.M',
    'STEO.COSXPUS.M',
    'STEO.MGWHUUS.M',
    'STEO.DSWHUUS.M',
    'STEO.BREPUUS.M']

In [4]:
# Initialize list; this is the final list that will store all the data from the json pull. 
read_data = []

# Pull in data via EIA API
for i in range(len(FEATURE_KEYS)):
    url = 'http://api.eia.gov/series/?api_key=' + api_key +'&series_id=' + FEATURE_KEYS[i]
               
    r = requests.get(url)
    json_data = r.json()
    
    if r.status_code == 200:
        print('Success: ', FEATURE_KEYS[i])
    else:
        print('Error!')
    
    # Cast the json pull to pandas dataframe
    df = pd.DataFrame(json_data.get('series')[0].get('data'),
                      columns = ['Date', FEATURE_NAMES[i]])
    df['Date'] = pd.to_datetime(df['Date'], format='%Y%m', errors="raise")
    df.set_index('Date', drop=True, inplace=True)
    
    #display(df.head(3).T)
    
    # Concat the pandas dataframe into a list
    read_data.append(df)

Success:  STEO.WTIPUUS.M
Success:  STEO.COPR_OPEC.M
Success:  STEO.PAPR_NONOPEC.M
Success:  INTL.55-1-WORL-TBPD.M
Success:  NG.RNGWHHD.M
Success:  STEO.COPRPUS.M
Success:  STEO.PATC_OECD.M
Success:  STEO.PATC_NON_OECD.M
Success:  STEO.CICPIUS.M
Success:  STEO.WPCPIUS.M
Success:  STEO.WP57IUS.M
Success:  STEO.ZOMNIUS.M
Success:  STEO.PASC_OECD_T3.M
Success:  STEO.PASXPUS.M
Success:  STEO.COSQPUS.M
Success:  STEO.COSXPUS.M
Success:  STEO.MGWHUUS.M
Success:  STEO.DSWHUUS.M
Success:  STEO.BREPUUS.M


In [5]:
#find the longest dataseries
max_val = len(read_data[0])
max_idx = 0

for i in range(0, len(read_data)):
    name = read_data[i].columns[0]
    size = len(read_data[i])
    phrase = str(i) + ': '+ name + ' (1*' + str(size) + ')'
    
    if size>max_val:
        max_val = size
        max_idx = i
    
    print(phrase)

0: WTI_Price (1*408)
1: Oil_Production_OPEC (1*372)
2: Oil_Production_nonOPEC (1*372)
3: Oil_Production_World (1*353)
4: Henry_Hub_NG_Price (1*309)
5: Oil_Production_US (1*408)
6: Petrol_Consumption_OECD (1*408)
7: Petrol_Consumption_nonOECD (1*408)
8: US_CPI (1*408)
9: US_PPI (1*408)
10: US_PPI_Petroleum (1*408)
11: US_PMI (1*408)
12: Petroleum_Inventory_OECD (1*252)
13: Crude_Oil_Inventory_Total (1*408)
14: Crude_Oil_Inventory_SPR (1*408)
15: Crude_Oil_Inventory_nonSPR (1*408)
16: Refiner_Wholesale_Gasoline_Price (1*408)
17: Refiner_Wholesale_Diesel_Price (1*408)
18: Brent_Price (1*408)


In [6]:
#initiate the df with the the longest timeseries
df = read_data[max_idx]
df.index = df.index.astype('datetime64[ns]')
display(df.head(3).T)

Date,2023-12-01,2023-11-01,2023-10-01
WTI_Price,89.0,89.0,89.0


In [7]:
for i in range(0, len(read_data)):
    if i != max_idx:
        df_temp = read_data[i]
        df_temp.index = df_temp.index.astype('datetime64[ns]')
        df = pd.merge_ordered(df, df_temp, on = 'Date')

In [8]:
display(df.tail(3).T)

Unnamed: 0,405,406,407
Date,2023-10-01 00:00:00,2023-11-01 00:00:00,2023-12-01 00:00:00
WTI_Price,89.0,89.0,89.0
Oil_Production_OPEC,28.925455,28.924614,28.923774
Oil_Production_nonOPEC,67.160378,67.236176,66.954439
Oil_Production_World,,,
Henry_Hub_NG_Price,,,
Oil_Production_US,12.71779,12.90779,12.97429
Petrol_Consumption_OECD,46.537855,46.509344,47.061164
Petrol_Consumption_nonOECD,53.94027,54.868195,55.696039
US_CPI,3.056389,3.06452,3.073038


In [9]:
# Change directory one step back and save as the root directory
ROOT_DIR = os.path.normpath(os.getcwd() + os.sep + os.pardir)

# Change direcotry to the 'data\raw' folder
path = ROOT_DIR + '\\data\\interim\\'

# Set a file name
file_save = path + 'eia_data_raw.csv'
df.to_csv(file_save)

print('Save:\n', file_save)

Save:
 D:\gitProjects\springboard_capstone_1\Springboard_Capstone_01\data\interim\eia_data_raw.csv
