# Read Data from EIA API

The U.S. Energy Information Administration is committed to its free and open data by making it available through an Application Programming Interface (API) and its open data tools. EIA's API is multi-facetted and contains the following time-series data sets organized by the main energy categories.

## Libraries

### Installation

In [1]:
pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


### Import

In [2]:
#Import the required modules
import os
from dotenv import load_dotenv

import requests
import json

import pandas as pd
import numpy as np

import pickle

import csv

## Load API key from ".env"

In [3]:
#Load secret api_key from .env file
load_dotenv()

api_key = os.getenv('eia_api_key')
#print('eia_api_key: ', api_key)

## Specify EIA features to import

In [4]:
# List EAI Series IDs for the features
SERIES_IDS_DICT = {
    'WTIPUUS':'steo',
    'COPR_OPEC':'steo',
    'PAPR_NONOPEC':'steo',
    'COPRPUS':'steo',
    'PATC_OECD':'steo',
    'PATC_NON_OECD':'steo',
    'CICPIUS':'steo',
    'WPCPIUS':'steo',
    'WP57IUS':'steo',
    'ZOMNIUS':'steo',
    'PASC_OECD_T3':'steo',
    'PASXPUS':'steo',
    'COSQPUS':'steo',
    'COSXPUS':'steo',
    'MGWHUUS':'steo',
    'DSWHUUS':'steo',
    'BREPUUS':'steo',
    'WORL': 'international',
    'RNGWHHD': 'natural-gas'}

list(SERIES_IDS_DICT.keys())

['WTIPUUS',
 'COPR_OPEC',
 'PAPR_NONOPEC',
 'COPRPUS',
 'PATC_OECD',
 'PATC_NON_OECD',
 'CICPIUS',
 'WPCPIUS',
 'WP57IUS',
 'ZOMNIUS',
 'PASC_OECD_T3',
 'PASXPUS',
 'COSQPUS',
 'COSXPUS',
 'MGWHUUS',
 'DSWHUUS',
 'BREPUUS',
 'WORL',
 'RNGWHHD']

## Request to EIA API & store respose

In [5]:
def get_url_eia_api(series_dict, feature_id):
    
    if series_dict[feature_id] == 'steo':    
        url = 'https://api.eia.gov/v2/steo/data/?api_key=' + api_key +\
                '&frequency=monthly&data[0]=value&facets[seriesId][]='+ feature_id +\
                '&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000'
        description_field = 'seriesDescription'
        
    elif series_dict[feature_id] == 'international':
        url = 'https://api.eia.gov/v2/international/data/?api_key=' + api_key +\
                '&frequency=monthly&data[0]=value&facets[activityId][]=1'+\
                '&facets[productId][]=55&facets[countryRegionId][]=' + feature_id +\
                '&facets[unit][]=TBPD&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000'
        description_field = 'productName'
        
    elif series_dict[feature_id] == 'natural-gas':
        url = 'https://api.eia.gov/v2/natural-gas/pri/fut/data/?api_key=' + api_key +\
                '&frequency=monthly&data[0]=value&facets[series][]=' + feature_id +\
                '&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000'
        description_field = 'series-description'

    else:
        print('Feature ID not properly defined!')
        
    return url, description_field

In [6]:
# Initialize list; this is the final list that will store all the data from the json pull. 
read_data = []
feature_description = {}

# Pull in data via EIA API
for feature_id in list(SERIES_IDS_DICT.keys()):
    
    url, description_field = get_url_eia_api(SERIES_IDS_DICT, feature_id)
               
    r = requests.get(url)
    json_data = r.json()
    
    if r.status_code == 200:
        print('Success: ', feature_id)
    else:
        print('Error!')
    
    
    #get seriesDescription
    feature_description[feature_id] = json_data['response']['data'][0][description_field]
    print(feature_description[feature_id])
    
    # Cast data from the json pull to pandas dataframe
    df = pd.DataFrame(json_data['response']['data'], columns = ['period', 'value'])
    
    #rename columns
    df.columns = ['Date', feature_id]

    # Chage datetime and set as index
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m', errors="raise")
    df.set_index('Date', drop=True, inplace=True)
    
    display(df.head(3))
    
    # Concat the pandas dataframe into a list
    read_data.append(df)
    print('\n-------------------------')


Success:  WTIPUUS
West Texas Intermediate Crude Oil Price


Unnamed: 0_level_0,WTIPUUS
Date,Unnamed: 1_level_1
2024-12-01,72.0
2024-11-01,72.0
2024-10-01,72.0



-------------------------
Success:  COPR_OPEC
Crude Oil Production, OPEC Total


Unnamed: 0_level_0,COPR_OPEC
Date,Unnamed: 1_level_1
2024-12-01,28.88
2024-11-01,28.991
2024-10-01,29.202



-------------------------
Success:  PAPR_NONOPEC
Total non-OPEC liquids


Unnamed: 0_level_0,PAPR_NONOPEC
Date,Unnamed: 1_level_1
2024-12-01,69.202363
2024-11-01,69.352227
2024-10-01,69.099035



-------------------------
Success:  COPRPUS
U.S. Crude Oil Production


Unnamed: 0_level_0,COPRPUS
Date,Unnamed: 1_level_1
2024-12-01,12.93507
2024-11-01,12.84815
2024-10-01,12.69427



-------------------------
Success:  PATC_OECD
Liquid Fuels Consumption, Total OECD


Unnamed: 0_level_0,PATC_OECD
Date,Unnamed: 1_level_1
2024-12-01,46.983737
2024-11-01,46.382377
2024-10-01,46.39168



-------------------------
Success:  PATC_NON_OECD
Liquid Fuels Consumption, Total non-OECD


Unnamed: 0_level_0,PATC_NON_OECD
Date,Unnamed: 1_level_1
2024-12-01,57.552057
2024-11-01,56.535749
2024-10-01,55.492283



-------------------------
Success:  CICPIUS
Consumer Price Index (all urban consumers)


Unnamed: 0_level_0,CICPIUS
Date,Unnamed: 1_level_1
2024-12-01,3.138122
2024-11-01,3.133612
2024-10-01,3.128954



-------------------------
Success:  WPCPIUS
Producer Price Index: All Commodities


Unnamed: 0_level_0,WPCPIUS
Date,Unnamed: 1_level_1
2024-12-01,2.396435
2024-11-01,2.391997
2024-10-01,2.388834



-------------------------
Success:  WP57IUS
Producer Price Index: Petroleum


Unnamed: 0_level_0,WP57IUS
Date,Unnamed: 1_level_1
2024-12-01,2.308539
2024-11-01,2.359443
2024-10-01,2.401871



-------------------------
Success:  ZOMNIUS
Manufacturing Production Index


Unnamed: 0_level_0,ZOMNIUS
Date,Unnamed: 1_level_1
2024-12-01,102.8307
2024-11-01,102.6802
2024-10-01,102.5079



-------------------------
Success:  PASC_OECD_T3
OECD End-of-period Commercial Crude Oil and Other Liquids Inventory


Unnamed: 0_level_0,PASC_OECD_T3
Date,Unnamed: 1_level_1
2024-12-01,2919.799081
2024-11-01,2940.438514
2024-10-01,2934.096927



-------------------------
Success:  PASXPUS
Total End-of-period Commercial Crude Oil and Other Liquids Inventory


Unnamed: 0_level_0,PASXPUS
Date,Unnamed: 1_level_1
2024-12-01,1267.61
2024-11-01,1284.902
2024-10-01,1287.922



-------------------------
Success:  COSQPUS
Strategic Petroleum Reserve


Unnamed: 0_level_0,COSQPUS
Date,Unnamed: 1_level_1
2024-12-01,345.2105
2024-11-01,345.2105
2024-10-01,345.2105



-------------------------
Success:  COSXPUS
Crude Oil Inventory (excluding SPR)


Unnamed: 0_level_0,COSXPUS
Date,Unnamed: 1_level_1
2024-12-01,469.5096
2024-11-01,477.5722
2024-10-01,472.9599



-------------------------
Success:  MGWHUUS
Refiner Wholesale Gasoline Price


Unnamed: 0_level_0,MGWHUUS
Date,Unnamed: 1_level_1
2024-12-01,209.1272
2024-11-01,217.9178
2024-10-01,225.5785



-------------------------
Success:  DSWHUUS
Diesel Fuel Refiner Wholesale Price


Unnamed: 0_level_0,DSWHUUS
Date,Unnamed: 1_level_1
2024-12-01,245.2129
2024-11-01,249.8924
2024-10-01,252.6826



-------------------------
Success:  BREPUUS
Brent crude oil spot price


Unnamed: 0_level_0,BREPUUS
Date,Unnamed: 1_level_1
2024-12-01,78.0
2024-11-01,78.0
2024-10-01,78.0



-------------------------
Success:  WORL
Crude oil, NGPL, and other liquids


Unnamed: 0_level_0,WORL
Date,Unnamed: 1_level_1
2022-12-01,98009.234232
2022-11-01,99084.249563
2022-10-01,99141.569536



-------------------------
Success:  RNGWHHD
Henry Hub Natural Gas Spot Price (Dollars per Million Btu)


Unnamed: 0_level_0,RNGWHHD
Date,Unnamed: 1_level_1
2023-03-01,2.31
2023-02-01,2.38
2023-01-01,3.27



-------------------------


## Cast all data to single DataFrame

### Find the feature with most records to append all data

In [7]:
#find the longest dataseries
max_val = len(read_data[0])
max_idx = 0

# print field names and size for read_data
for i in range(0, len(read_data)):
    name = read_data[i].columns[0]
    size = len(read_data[i])
    phrase = str(i) + ': '+ name + ' (1*' + str(size) + ')'
    
    if size>max_val:
        max_val = size
        max_idx = i
    
    print(phrase)

0: WTIPUUS (1*420)
1: COPR_OPEC (1*384)
2: PAPR_NONOPEC (1*384)
3: COPRPUS (1*420)
4: PATC_OECD (1*420)
5: PATC_NON_OECD (1*420)
6: CICPIUS (1*420)
7: WPCPIUS (1*420)
8: WP57IUS (1*420)
9: ZOMNIUS (1*420)
10: PASC_OECD_T3 (1*264)
11: PASXPUS (1*420)
12: COSQPUS (1*420)
13: COSXPUS (1*420)
14: MGWHUUS (1*420)
15: DSWHUUS (1*420)
16: BREPUUS (1*420)
17: WORL (1*360)
18: RNGWHHD (1*315)


### Initiate a dataframe with biggest size feature
Using the feature with biggest number of records as the first field ensures that all rows can be appended to the the dataframe

In [8]:
#initiate the df with the the longest timeseries
df = read_data[max_idx]
df.index = df.index.astype('datetime64[ns]')
display(df.head(3))

Unnamed: 0_level_0,WTIPUUS
Date,Unnamed: 1_level_1
2024-12-01,72.0
2024-11-01,72.0
2024-10-01,72.0


### Merge_order all data frames

In [9]:
# Merge_order all data frames on date 
for i in range(0, len(read_data)):
    if i != max_idx:
        df_temp = read_data[i]
        df_temp.index = df_temp.index.astype('datetime64[ns]')
        df = pd.merge_ordered(df, df_temp, on = 'Date')

## Check the final dataframe

In [10]:
display(df.head(10).T)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Date,1990-01-01 00:00:00,1990-02-01 00:00:00,1990-03-01 00:00:00,1990-04-01 00:00:00,1990-05-01 00:00:00,1990-06-01 00:00:00,1990-07-01 00:00:00,1990-08-01 00:00:00,1990-09-01 00:00:00,1990-10-01 00:00:00
WTIPUUS,22.863,22.113,20.388,18.426,18.2,16.695,18.454,27.307,33.508,36.04
COPR_OPEC,,,,,,,,,,
PAPR_NONOPEC,,,,,,,,,,
COPRPUS,7.546173,7.497242,7.433341,7.407173,7.328342,7.105838,7.173263,7.286634,7.22382,7.541889
PATC_OECD,42.477491,43.172266,42.289847,40.389046,40.404323,40.943268,41.593816,43.196503,40.513194,41.151578
PATC_NON_OECD,24.986265,24.986039,24.985897,24.985671,24.985235,24.985666,24.985875,24.985666,24.985424,24.985462
CICPIUS,1.275,1.28,1.286,1.289,1.291,1.299,1.305,1.316,1.325,1.334
WPCPIUS,1.143341,1.147157,1.148556,1.140791,1.142422,1.146698,1.155263,1.1636,1.173352,1.194494
WP57IUS,0.718,0.645,0.633,0.651,0.653,0.639,0.623,0.745,0.867,0.969


In [11]:
display(df.tail(10).T)

Unnamed: 0,410,411,412,413,414,415,416,417,418,419
Date,2024-03-01 00:00:00,2024-04-01 00:00:00,2024-05-01 00:00:00,2024-06-01 00:00:00,2024-07-01 00:00:00,2024-08-01 00:00:00,2024-09-01 00:00:00,2024-10-01 00:00:00,2024-11-01 00:00:00,2024-12-01 00:00:00
WTIPUUS,79.0,76.0,76.0,76.0,74.0,74.0,74.0,72.0,72.0,72.0
COPR_OPEC,29.18,29.219,29.258,29.347,29.334,29.323,29.313,29.202,28.991,28.88
PAPR_NONOPEC,67.913497,68.103074,68.375514,68.674344,69.058307,68.881581,68.764737,69.099035,69.352227,69.202363
COPRPUS,12.7184,12.75319,12.70042,12.68838,12.73281,12.77885,12.7864,12.69427,12.84815,12.93507
PATC_OECD,46.164425,45.678576,45.388119,46.209218,46.513988,46.69804,46.449637,46.39168,46.382377,46.983737
PATC_NON_OECD,56.135418,56.109873,56.54707,57.208484,56.562246,56.174729,56.938336,55.492283,56.535749,57.552057
CICPIUS,3.096431,3.099793,3.1043,3.109017,3.114276,3.119161,3.124007,3.128954,3.133612,3.138122
WPCPIUS,2.408241,2.392627,2.387204,2.38428,2.386159,2.386504,2.38762,2.388834,2.391997,2.396435
WP57IUS,2.57547,2.533719,2.539963,2.538756,2.489058,2.486924,2.464764,2.401871,2.359443,2.308539


## Store the data

In [12]:
# Change directory one step back and save as the root directory
ROOT_DIR = os.path.normpath(os.getcwd() + os.sep + os.pardir)

# Change direcotry to the 'data\raw' folder
path = ROOT_DIR + '\\data\\interim\\'

In [13]:
# Set a file name
file_data = path + 'eia_data.csv'
df.to_csv(file_data)
print('Save:\n', file_data)

Save:
 D:\gitProjects\WTI_Crude_Oil_Price_Prediction_Using_ML\data\interim\eia_data.csv


In [14]:
#save meta data
file_metadata = path + 'eia_data_description.csv'

# Open a CSV file for writing
with open(file_metadata, "w", newline="") as file:
    # Create a writer object
    writer = csv.writer(file)
    
    # Write the dictionary to the CSV file with two columns
    for key, value in feature_description.items():
        writer.writerow([key, value])
# Close the file
print('Save:\n', file_metadata)

Save:
 D:\gitProjects\WTI_Crude_Oil_Price_Prediction_Using_ML\data\interim\eia_data_description.csv
