In [1]:
#Import the required modules
import os
from dotenv import load_dotenv

import requests
import json

import pandas as pd
import numpy as np

import pickle

In [2]:
#Load secret api_key from .env file
load_dotenv()

api_key = os.getenv('eia_api_key')
#print('eia_api_key: ', api_key)

In [3]:
# List EIA feature names.
FEATURE_NAMES = [
    'WTI_Price',
    'Oil_Production_OPEC',
    'Oil_Production_nonOPEC',
    'Oil_Production_World',
    'Henry_Hub_NG_Price',
    'Oil_Production_US',
    'Petrol_Consumption_OECD',
    'Petrol_Consumption_nonOECD',
    'US_CPI',
    'US_PPI',
    'US_PPI_Petroleum',
    'US_PMI',
    'Petroleum_Inventory_OECD',
    'Crude_Oil_Inventory_Total',
    'Crude_Oil_Inventory_SPR',
    'Crude_Oil_Inventory_nonSPR',
    'Refiner_Wholesale_Gasoline_Price ',
    'Refiner_Wholesale_Diesel_Price',
    'Brent_Price']

# List EAT Series IDs for the features
FEATURE_KEYS = [
    'STEO.WTIPUUS.M',
    'STEO.COPR_OPEC.M',
    'STEO.PAPR_NONOPEC.M',
    'INTL.55-1-WORL-TBPD.M',
    'NG.RNGWHHD.M',
    'STEO.COPRPUS.M',
    'STEO.PATC_OECD.M',
    'STEO.PATC_NON_OECD.M',
    'STEO.CICPIUS.M',
    'STEO.WPCPIUS.M',
    'STEO.WP57IUS.M',
    'STEO.ZOMNIUS.M',
    'STEO.PASC_OECD_T3.M',
    'STEO.PASXPUS.M',
    'STEO.COSQPUS.M',
    'STEO.COSXPUS.M',
    'STEO.MGWHUUS.M',
    'STEO.DSWHUUS.M',
    'STEO.BREPUUS.M']

In [4]:
# Initialize list; this is the final list that will store all the data from the json pull. 
final_data = []

# Pull in data via EIA API
for i in range(len(FEATURE_KEYS)):
    url = 'http://api.eia.gov/series/?api_key=' + api_key +'&series_id=' + FEATURE_KEYS[i]
               
    r = requests.get(url)
    json_data = r.json()
    
    if r.status_code == 200:
        print('Success: ', FEATURE_KEYS[i])
    else:
        print('Error!')
    
    # Cast the json pull to pandas dataframe
    df = pd.DataFrame(json_data.get('series')[0].get('data'),
                      columns = ['Date', FEATURE_NAMES[i]])
    df.set_index('Date', drop=True, inplace=True)
    
    # Concat the pandas dataframe into a list
    final_data.append(df)

Success:  STEO.WTIPUUS.M
Success:  STEO.COPR_OPEC.M
Success:  STEO.PAPR_NONOPEC.M
Success:  INTL.55-1-WORL-TBPD.M
Success:  NG.RNGWHHD.M
Success:  STEO.COPRPUS.M
Success:  STEO.PATC_OECD.M
Success:  STEO.PATC_NON_OECD.M
Success:  STEO.CICPIUS.M
Success:  STEO.WPCPIUS.M
Success:  STEO.WP57IUS.M
Success:  STEO.ZOMNIUS.M
Success:  STEO.PASC_OECD_T3.M
Success:  STEO.PASXPUS.M
Success:  STEO.COSQPUS.M
Success:  STEO.COSXPUS.M
Success:  STEO.MGWHUUS.M
Success:  STEO.DSWHUUS.M
Success:  STEO.BREPUUS.M


In [35]:
# Print head of first and last dataframe in the list
print(final_data[0].head())
print('\n')
print(final_data[-1].head())

        WTI_Price
Date             
202312       89.0
202311       89.0
202310       89.0
202309       90.0
202308       90.0


        Brent_Price
Date               
202312         95.0
202311         95.0
202310         95.0
202309         96.0
202308         96.0


In [36]:
# Print the Current Working Directory (CWD)
print('CWD: ', os.getcwd())

CWD:  D:\gitProjects\springboard_capstone_1\Springboard_Capstone_01\notebooks


In [37]:
# Change directory one step back and save as the root directory
ROOT_DIR = os.path.normpath(os.getcwd() + os.sep + os.pardir)
print(ROOT_DIR)

D:\gitProjects\springboard_capstone_1\Springboard_Capstone_01


In [38]:
#Change direcotry to the 'data\intetrim' folder
data_dir = ROOT_DIR + '\\data\\interim\\'

# Set a file name
file_name = data_dir + 'eia_data_raw.csv'

In [39]:
# open file in write binary mode
myfile = open(file_name, "wb")  

# dump list data into file 
pickle.dump(final_data, myfile)  

# close file pointer
myfile.close()  