# Read Data from EIA API

The U.S. Energy Information Administration is committed to its free and open data by making it available through an Application Programming Interface (API) and its open data tools. EIA's API is multi-facetted and contains the following time-series data sets organized by the main energy categories.

## Libraries

### Installation

In [5]:
#pip install python-dotenv

### Import

In [2]:
#Import the required modules
import os
from dotenv import load_dotenv

import requests
import json

import pandas as pd
import numpy as np

import pickle

## Load API key from ".env"

In [3]:
#Load secret api_key from .env file
load_dotenv()

api_key = os.getenv('eia_api_key')
#print('eia_api_key: ', api_key)

## Specify EIA features to import

In [12]:
# List EIA feature names.
FEATURE_NAMES = [
    'wti_price',
    'oil_production_opec',
    'oil_production_nonopec',
    'oil_production_world',
    'henry_hub_ng_price',
    'oil_production_us',
    'petrol_consumption_oecd',
    'petrol_consumption_nonoecd',
    'us_cpi',
    'us_ppi',
    'us_ppi_petroleum',
    'us_pmi',
    'petroleum_inventory_oecd',
    'crude_oil_inventory_total',
    'crude_oil_inventory_spr',
    'crude_oil_inventory_nonspr',
    'refiner_wholesale_gasoline_price',
    'refiner_wholesale_diesel_price',
    'brent_price']

# List EAT Series IDs for the features
FEATURE_KEYS = [
    'STEO.WTIPUUS.M',
    'STEO.COPR_OPEC.M',
    'STEO.PAPR_NONOPEC.M',
    'INTL.55-1-WORL-TBPD.M',
    'NG.RNGWHHD.M',
    'STEO.COPRPUS.M',
    'STEO.PATC_OECD.M',
    'STEO.PATC_NON_OECD.M',
    'STEO.CICPIUS.M',
    'STEO.WPCPIUS.M',
    'STEO.WP57IUS.M',
    'STEO.ZOMNIUS.M',
    'STEO.PASC_OECD_T3.M',
    'STEO.PASXPUS.M',
    'STEO.COSQPUS.M',
    'STEO.COSXPUS.M',
    'STEO.MGWHUUS.M',
    'STEO.DSWHUUS.M',
    'STEO.BREPUUS.M']

## Request to EIA API & store respose

In [13]:
# Initialize list; this is the final list that will store all the data from the json pull. 
read_data = []

# Pull in data via EIA API
for i in range(len(FEATURE_KEYS)):
    url = 'http://api.eia.gov/series/?api_key=' + api_key +'&series_id=' + FEATURE_KEYS[i]
               
    r = requests.get(url)
    json_data = r.json()
    
    if r.status_code == 200:
        print('Success: ', FEATURE_KEYS[i])
    else:
        print('Error!')
    
    # Cast the json pull to pandas dataframe
    df = pd.DataFrame(json_data.get('series')[0].get('data'),
                      columns = ['Date', FEATURE_NAMES[i]])
    df['Date'] = pd.to_datetime(df['Date'], format='%Y%m', errors="raise")
    df.set_index('Date', drop=True, inplace=True)
    
    #display(df.head(3).T)
    
    # Concat the pandas dataframe into a list
    read_data.append(df)

Success:  STEO.WTIPUUS.M
Success:  STEO.COPR_OPEC.M
Success:  STEO.PAPR_NONOPEC.M
Success:  INTL.55-1-WORL-TBPD.M
Success:  NG.RNGWHHD.M
Success:  STEO.COPRPUS.M
Success:  STEO.PATC_OECD.M
Success:  STEO.PATC_NON_OECD.M
Success:  STEO.CICPIUS.M
Success:  STEO.WPCPIUS.M
Success:  STEO.WP57IUS.M
Success:  STEO.ZOMNIUS.M
Success:  STEO.PASC_OECD_T3.M
Success:  STEO.PASXPUS.M
Success:  STEO.COSQPUS.M
Success:  STEO.COSXPUS.M
Success:  STEO.MGWHUUS.M
Success:  STEO.DSWHUUS.M
Success:  STEO.BREPUUS.M


## Cast all data to single DataFrame

### Find the feature with most records to append all data

In [6]:
#find the longest dataseries
max_val = len(read_data[0])
max_idx = 0

# print field names and size for read_data
for i in range(0, len(read_data)):
    name = read_data[i].columns[0]
    size = len(read_data[i])
    phrase = str(i) + ': '+ name + ' (1*' + str(size) + ')'
    
    if size>max_val:
        max_val = size
        max_idx = i
    
    print(phrase)

0: wti_price (1*408)
1: oil_production_opec (1*372)
2: oil_production_nonopec (1*372)
3: oil_production_world (1*355)
4: henry_hub_ng_price (1*310)
5: oil_production_us (1*408)
6: petrol_consumption_oecd (1*408)
7: petrol_consumption_nonoecd (1*408)
8: us_cpi (1*408)
9: us_ppi (1*408)
10: us_ppi_petroleum (1*408)
11: us_pmi (1*408)
12: petroleum_inventory_oecd (1*252)
13: crude_oil_inventory_total (1*408)
14: crude_oil_inventory_spr (1*408)
15: crude_oil_inventory_nonspr (1*408)
16: refiner_wholesale_gasoline_price (1*408)
17: refiner_wholesale_diesel_price (1*408)
18: brent_price (1*408)


### Initiate a dataframe with biggest size feature
Using the feature with biggest number of records as the first field ensures that all rows can be appended to the the dataframe

In [7]:
#initiate the df with the the longest timeseries
df = read_data[max_idx]
df.index = df.index.astype('datetime64[ns]')
display(df.head(3).T)

Date,2023-12-01,2023-11-01,2023-10-01
wti_price,92.0,92.0,91.0


### Merge_order all data frames

In [8]:
# Merge_order all data frames on date 
for i in range(0, len(read_data)):
    if i != max_idx:
        df_temp = read_data[i]
        df_temp.index = df_temp.index.astype('datetime64[ns]')
        df = pd.merge_ordered(df, df_temp, on = 'Date')

## Check the final dataframe

In [9]:
display(df.tail(3).T)

Unnamed: 0,405,406,407
Date,2023-10-01 00:00:00,2023-11-01 00:00:00,2023-12-01 00:00:00
wti_price,91.0,92.0,92.0
oil_production_opec,28.724806,28.623965,28.623125
oil_production_nonopec,66.693706,66.887016,66.696367
oil_production_world,,,
henry_hub_ng_price,,,
oil_production_us,12.32337,12.53371,12.57993
petrol_consumption_oecd,45.911357,45.948852,46.613551
petrol_consumption_nonoecd,53.936641,54.878101,55.717633
us_cpi,3.076277,3.084238,3.091933


## Store the data

In [10]:
# Change directory one step back and save as the root directory
ROOT_DIR = os.path.normpath(os.getcwd() + os.sep + os.pardir)

# Change direcotry to the 'data\raw' folder
path = ROOT_DIR + '\\data\\interim\\'

# Set a file name
file_save = path + 'eia_data_raw.csv'
df.to_csv(file_save)

print('Save:\n', file_save)

Save:
 D:\gitProjects\springboard_capstone_1\Springboard_Capstone_01\data\interim\eia_data_raw.csv
