# Read Data from EIA API

The U.S. Energy Information Administration is committed to its free and open data by making it available through an Application Programming Interface (API) and its open data tools. EIA's API is multi-facetted and contains the following time-series data sets organized by the main energy categories.

## Libraries

### Installation

In [1]:
!pip install python-dotenv



### Import

In [2]:
#Import the required modules
import os
from dotenv import load_dotenv

import requests
import json

import pandas as pd
import numpy as np

import pickle

import csv

## Load API key from ".env"

In [3]:
#Load secret api_key from .env file
load_dotenv()

api_key = os.getenv('eia_api_key')
#print('eia_api_key: ', api_key)

## Specify EIA features to import

In [4]:
# List EAI Series IDs for the features
SERIES_IDS_DICT = {
    'WTIPUUS':'steo',
    'COPR_OPEC':'steo',
    'PAPR_NONOPEC':'steo',
    'COPRPUS':'steo',
    'PATC_OECD':'steo',
    'PATC_NON_OECD':'steo',
    'CICPIUS':'steo',
    'WPCPIUS':'steo',
    'WP57IUS':'steo',
    'ZOMNIUS':'steo',
    'PASC_OECD_T3':'steo',
    'PASXPUS':'steo',
    'COSQPUS':'steo',
    'COSXPUS':'steo',
    'MGWHUUS':'steo',
    'DSWHUUS':'steo',
    'BREPUUS':'steo',
    'WORL': 'international',
    'RNGWHHD': 'natural-gas'}

In [5]:
FEATURE_NAMES = ['wti',
				 'oil_opec',
				 'oil_nonopec',
				 'oil_world',
				 'henry_hub_ng',
				 'oil_us',
				 'petrol_oecd',
				 'petrol_nonoecd',
				 'us_cpi',
				 'us_ppi',
				 'us_ppi_petrol',
				 'us_pmi',
				 'petroleum_inv_oecd',
				 'crude_inv_total',
				 'crude_inv_spr',
				 'crude_inv_nonspr',
				 'refiner_gas_price',
				 'refiner_diesel_price',
				 'brent']

In [6]:
list(SERIES_IDS_DICT.keys())

['WTIPUUS',
 'COPR_OPEC',
 'PAPR_NONOPEC',
 'COPRPUS',
 'PATC_OECD',
 'PATC_NON_OECD',
 'CICPIUS',
 'WPCPIUS',
 'WP57IUS',
 'ZOMNIUS',
 'PASC_OECD_T3',
 'PASXPUS',
 'COSQPUS',
 'COSXPUS',
 'MGWHUUS',
 'DSWHUUS',
 'BREPUUS',
 'WORL',
 'RNGWHHD']

## Request to EIA API & store respose

In [7]:
def get_url_eia_api(series_dict, feature_id):
    
    if series_dict[feature_id] == 'steo':    
        url = 'https://api.eia.gov/v2/steo/data/?api_key=' + api_key +\
                '&frequency=monthly&data[0]=value&facets[seriesId][]='+ feature_id +\
                '&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000'
        description_field = 'seriesDescription'
        
    elif series_dict[feature_id] == 'international':
        url = 'https://api.eia.gov/v2/international/data/?api_key=' + api_key +\
                '&frequency=monthly&data[0]=value&facets[activityId][]=1'+\
                '&facets[productId][]=55&facets[countryRegionId][]=' + feature_id +\
                '&facets[unit][]=TBPD&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000'
        description_field = 'productName'
        
    elif series_dict[feature_id] == 'natural-gas':
        url = 'https://api.eia.gov/v2/natural-gas/pri/fut/data/?api_key=' + api_key +\
                '&frequency=monthly&data[0]=value&facets[series][]=' + feature_id +\
                '&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000'
        description_field = 'series-description'

    else:
        print('Feature ID not properly defined!')
        
    return url, description_field

In [8]:
# Initialize list; this is the final list that will store all the data from the json pull. 
read_data = []
feature_description = {}
i = 0

# Pull in data via EIA API
for feature_id in list(SERIES_IDS_DICT.keys()):
    
    url, description_field = get_url_eia_api(SERIES_IDS_DICT, feature_id)
               
    r = requests.get(url)
    json_data = r.json()
    
    if r.status_code == 200:
        print('Success: ', feature_id)
    else:
        print('Error!')
    
    
    #get seriesDescription
    feature_description[feature_id] = json_data['response']['data'][0][description_field]
    print(feature_description[feature_id])
    
    # Cast data from the json pull to pandas dataframe
    df = pd.DataFrame(json_data['response']['data'], columns = ['period', 'value'])
    
    #rename columns
    df.columns = ['Date', FEATURE_NAMES[i]]
    i += 1

    # Chage datetime and set as index
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m', errors="raise")
    df.set_index('Date', drop=True, inplace=True)
    
    display(df.head(3))
    
    # Concat the pandas dataframe into a list
    read_data.append(df)
    print('\n-------------------------')


Success:  WTIPUUS
West Texas Intermediate Crude Oil Price


Unnamed: 0_level_0,wti
Date,Unnamed: 1_level_1
2024-12-01,80.0
2024-11-01,80.0
2024-10-01,80.0



-------------------------
Success:  COPR_OPEC
Crude Oil Production, OPEC Total


Unnamed: 0_level_0,oil_opec
Date,Unnamed: 1_level_1
2024-12-01,28.069
2024-11-01,28.18
2024-10-01,28.391



-------------------------
Success:  PAPR_NONOPEC
Total non-OPEC liquids


Unnamed: 0_level_0,oil_nonopec
Date,Unnamed: 1_level_1
2024-12-01,69.649661
2024-11-01,69.764144
2024-10-01,69.442809



-------------------------
Success:  COPRPUS
U.S. Crude Oil Production


Unnamed: 0_level_0,oil_world
Date,Unnamed: 1_level_1
2024-12-01,13.10955
2024-11-01,13.01466
2024-10-01,12.86789



-------------------------
Success:  PATC_OECD
Liquid Fuels Consumption, Total OECD


Unnamed: 0_level_0,henry_hub_ng
Date,Unnamed: 1_level_1
2024-12-01,46.985417
2024-11-01,46.350832
2024-10-01,46.419088



-------------------------
Success:  PATC_NON_OECD
Liquid Fuels Consumption, Total non-OECD


Unnamed: 0_level_0,oil_us
Date,Unnamed: 1_level_1
2024-12-01,57.564441
2024-11-01,56.548445
2024-10-01,55.499004



-------------------------
Success:  CICPIUS
Consumer Price Index (all urban consumers)


Unnamed: 0_level_0,petrol_oecd
Date,Unnamed: 1_level_1
2024-12-01,3.132805
2024-11-01,3.1289
2024-10-01,3.124305



-------------------------
Success:  WPCPIUS
Producer Price Index: All Commodities


Unnamed: 0_level_0,petrol_nonoecd
Date,Unnamed: 1_level_1
2024-12-01,2.428705
2024-11-01,2.428541
2024-10-01,2.428635



-------------------------
Success:  WP57IUS
Producer Price Index: Petroleum


Unnamed: 0_level_0,us_cpi
Date,Unnamed: 1_level_1
2024-12-01,2.508673
2024-11-01,2.551265
2024-10-01,2.537718



-------------------------
Success:  ZOMNIUS
Manufacturing Production Index


Unnamed: 0_level_0,us_ppi
Date,Unnamed: 1_level_1
2024-12-01,99.69413
2024-11-01,99.60249
2024-10-01,99.51358



-------------------------
Success:  PASC_OECD_T3
OECD End-of-period Commercial Crude Oil and Other Liquids Inventory


Unnamed: 0_level_0,us_ppi_petrol
Date,Unnamed: 1_level_1
2024-12-01,2828.065644
2024-11-01,2851.490568
2024-10-01,2849.466677



-------------------------
Success:  PASXPUS
Total End-of-period Commercial Crude Oil and Other Liquids Inventory


Unnamed: 0_level_0,us_pmi
Date,Unnamed: 1_level_1
2024-12-01,1239.044
2024-11-01,1254.982
2024-10-01,1259.034



-------------------------
Success:  COSQPUS
Strategic Petroleum Reserve


Unnamed: 0_level_0,petroleum_inv_oecd
Date,Unnamed: 1_level_1
2024-12-01,345.6779
2024-11-01,345.6779
2024-10-01,345.6779



-------------------------
Success:  COSXPUS
Crude Oil Inventory (excluding SPR)


Unnamed: 0_level_0,crude_inv_total
Date,Unnamed: 1_level_1
2024-12-01,441.3223
2024-11-01,450.1912
2024-10-01,445.9828



-------------------------
Success:  MGWHUUS
Refiner Wholesale Gasoline Price


Unnamed: 0_level_0,crude_inv_spr
Date,Unnamed: 1_level_1
2024-12-01,227.8153
2024-11-01,236.5365
2024-10-01,244.194



-------------------------
Success:  DSWHUUS
Diesel Fuel Refiner Wholesale Price


Unnamed: 0_level_0,crude_inv_nonspr
Date,Unnamed: 1_level_1
2024-12-01,270.9054
2024-11-01,276.2665
2024-10-01,259.0308



-------------------------
Success:  BREPUUS
Brent crude oil spot price


Unnamed: 0_level_0,refiner_gas_price
Date,Unnamed: 1_level_1
2024-12-01,85.0
2024-11-01,85.0
2024-10-01,85.0



-------------------------
Success:  WORL
Crude oil, NGPL, and other liquids


Unnamed: 0_level_0,refiner_diesel_price
Date,Unnamed: 1_level_1
2023-02-01,98808.944413
2023-01-01,98233.41669
2022-12-01,97989.14105



-------------------------
Success:  RNGWHHD
Henry Hub Natural Gas Spot Price (Dollars per Million Btu)


Unnamed: 0_level_0,brent
Date,Unnamed: 1_level_1
2023-05-01,2.15
2023-04-01,2.16
2023-03-01,2.31



-------------------------


## Cast all data to single DataFrame

### Find the feature with most records to append all data

In [9]:
#find the longest dataseries
max_val = len(read_data[0])
max_idx = 0

# print field names and size for read_data
for i in range(0, len(read_data)):
    name = read_data[i].columns[0]
    size = len(read_data[i])
    phrase = str(i) + ': '+ name + ' (1*' + str(size) + ')'
    
    if size>max_val:
        max_val = size
        max_idx = i
    
    print(phrase)

0: wti (1*420)
1: oil_opec (1*384)
2: oil_nonopec (1*384)
3: oil_world (1*420)
4: henry_hub_ng (1*420)
5: oil_us (1*420)
6: petrol_oecd (1*420)
7: petrol_nonoecd (1*420)
8: us_cpi (1*420)
9: us_ppi (1*420)
10: us_ppi_petrol (1*264)
11: us_pmi (1*420)
12: petroleum_inv_oecd (1*420)
13: crude_inv_total (1*420)
14: crude_inv_spr (1*420)
15: crude_inv_nonspr (1*420)
16: refiner_gas_price (1*420)
17: refiner_diesel_price (1*362)
18: brent (1*317)


### Initiate a dataframe with biggest size feature
Using the feature with biggest number of records as the first field ensures that all rows can be appended to the the dataframe

In [10]:
#initiate the df with the the longest timeseries
df = read_data[max_idx]
df.index = df.index.astype('datetime64[ns]')
display(df.head(3))

Unnamed: 0_level_0,wti
Date,Unnamed: 1_level_1
2024-12-01,80.0
2024-11-01,80.0
2024-10-01,80.0


### Merge_order all data frames

In [11]:
# Merge_order all data frames on date 
for i in range(0, len(read_data)):
    if i != max_idx:
        df_temp = read_data[i]
        df_temp.index = df_temp.index.astype('datetime64[ns]')
        df = pd.merge_ordered(df, df_temp, on = 'Date')

## Check the final dataframe

In [12]:
display(df.head(10).T)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Date,1990-01-01 00:00:00,1990-02-01 00:00:00,1990-03-01 00:00:00,1990-04-01 00:00:00,1990-05-01 00:00:00,1990-06-01 00:00:00,1990-07-01 00:00:00,1990-08-01 00:00:00,1990-09-01 00:00:00,1990-10-01 00:00:00
wti,22.863,22.113,20.388,18.426,18.2,16.695,18.454,27.307,33.508,36.04
oil_opec,,,,,,,,,,
oil_nonopec,,,,,,,,,,
oil_world,7.546173,7.497242,7.433341,7.407173,7.328342,7.105838,7.173263,7.286634,7.22382,7.541889
henry_hub_ng,42.477491,43.172266,42.289847,40.389046,40.404323,40.943268,41.593816,43.196503,40.513194,41.151578
oil_us,24.986265,24.986039,24.985897,24.985671,24.985235,24.985666,24.985875,24.985666,24.985424,24.985462
petrol_oecd,1.275,1.28,1.286,1.289,1.291,1.299,1.305,1.316,1.325,1.334
petrol_nonoecd,1.143341,1.147157,1.148556,1.140791,1.142422,1.146698,1.155263,1.1636,1.173352,1.194494
us_cpi,0.718,0.645,0.633,0.651,0.653,0.639,0.623,0.745,0.867,0.969


In [13]:
display(df.tail(10).T)

Unnamed: 0,410,411,412,413,414,415,416,417,418,419
Date,2024-03-01 00:00:00,2024-04-01 00:00:00,2024-05-01 00:00:00,2024-06-01 00:00:00,2024-07-01 00:00:00,2024-08-01 00:00:00,2024-09-01 00:00:00,2024-10-01 00:00:00,2024-11-01 00:00:00,2024-12-01 00:00:00
wti,78.0,78.0,78.0,78.0,79.0,79.0,79.0,80.0,80.0,80.0
oil_opec,28.369,28.408,28.447,28.536,28.523,28.512,28.502,28.391,28.18,28.069
oil_nonopec,68.130188,68.15356,68.403136,68.823639,69.264973,69.122894,68.97151,69.442809,69.764144,69.649661
oil_world,12.61201,12.63899,12.61096,12.63583,12.72078,12.79398,12.77607,12.86789,13.01466,13.10955
henry_hub_ng,46.186973,45.640922,45.348564,46.223902,46.500507,46.778128,46.393339,46.419088,46.350832,46.985417
oil_us,56.147475,56.119295,56.551216,57.207962,56.563034,56.175368,56.940917,55.499004,56.548445,57.564441
petrol_oecd,3.087879,3.090874,3.095711,3.100957,3.107322,3.112854,3.118263,3.124305,3.1289,3.132805
petrol_nonoecd,2.43452,2.427114,2.424784,2.424332,2.42905,2.429883,2.430122,2.428635,2.428541,2.428705
us_cpi,2.538394,2.556884,2.560309,2.555419,2.552534,2.565398,2.550709,2.537718,2.551265,2.508673


## Store the data

In [14]:
# Change directory one step back and save as the root directory
ROOT_DIR = os.path.normpath(os.getcwd() + os.sep + os.pardir)

# Change direcotry to the 'data\raw' folder
path = ROOT_DIR + '\\data\\interim\\'

In [15]:
# Set a file name
file_data = path + 'eia_data.csv'
df.to_csv(file_data)
print('Save:\n', file_data)

Save:
 d:\gitProjects\WTI_Crude_Oil_Price_Prediction_Using_ML\data\interim\eia_data.csv


In [16]:
#save meta data
file_metadata = path + 'eia_data_description.csv'

# Open a CSV file for writing
with open(file_metadata, "w", newline="") as file:
    # Create a writer object
    writer = csv.writer(file)
    
    # Write the dictionary to the CSV file with two columns
    for key, value in feature_description.items():
        writer.writerow([key, value])
# Close the file
print('Save:\n', file_metadata)

Save:
 d:\gitProjects\WTI_Crude_Oil_Price_Prediction_Using_ML\data\interim\eia_data_description.csv
