# Read Data from EIA API

The U.S. Energy Information Administration is committed to its free and open data by making it available through an Application Programming Interface (API) and its open data tools. EIA's API is multi-facetted and contains the following time-series data sets organized by the main energy categories.

## Libraries

### Installation

In [1]:
#!pip install python-dotenv

### Import

In [2]:
#Import the required modules
import os
from dotenv import load_dotenv

import requests
import json

import pandas as pd
import numpy as np

import pickle

import csv

## Load API key from ".env"

In [3]:
#Load secret api_key from .env file
load_dotenv()

api_key = os.getenv('eia_api_key')
#print('eia_api_key: ', api_key)

## Specify EIA features to import

In [4]:
# List EAI Series IDs for the features
SERIES_IDS_DICT = {
    'WTIPUUS':'steo',
    'COPR_OPEC':'steo',
    'PAPR_NONOPEC':'steo',
    'COPRPUS':'steo',
    'PATC_OECD':'steo',
    'PATC_NON_OECD':'steo',
    'CICPIUS':'steo',
    'WPCPIUS':'steo',
    'WP57IUS':'steo',
    'ZOMNIUS':'steo',
    'PASC_OECD_T3':'steo',
    'PASXPUS':'steo',
    'COSQPUS':'steo',
    'COSXPUS':'steo',
    'MGWHUUS':'steo',
    'DSWHUUS':'steo',
    'BREPUUS':'steo',
    'WORL': 'international',
    'RNGWHHD': 'natural-gas'}

In [5]:
FEATURE_NAMES = ['wti',
				 'oil_opec',
				 'oil_nonopec',
				 'oil_world',
				 'henry_hub_ng',
				 'oil_us',
				 'petrol_oecd',
				 'petrol_nonoecd',
				 'us_cpi',
				 'us_ppi',
				 'us_ppi_petrol',
				 'us_pmi',
				 'petroleum_inv_oecd',
				 'crude_inv_total',
				 'crude_inv_spr',
				 'crude_inv_nonspr',
				 'refiner_gas_price',
				 'refiner_diesel_price',
				 'brent']

In [6]:
list(SERIES_IDS_DICT.keys())

## Request to EIA API & store respose

In [7]:
def get_url_eia_api(series_dict, feature_id):
    
    if series_dict[feature_id] == 'steo':    
        url = 'https://api.eia.gov/v2/steo/data/?api_key=' + api_key +\
                '&frequency=monthly&data[0]=value&facets[seriesId][]='+ feature_id +\
                '&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000'
        description_field = 'seriesDescription'
        
    elif series_dict[feature_id] == 'international':
        url = 'https://api.eia.gov/v2/international/data/?api_key=' + api_key +\
                '&frequency=monthly&data[0]=value&facets[activityId][]=1'+\
                '&facets[productId][]=55&facets[countryRegionId][]=' + feature_id +\
                '&facets[unit][]=TBPD&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000'
        description_field = 'productName'
        
    elif series_dict[feature_id] == 'natural-gas':
        url = 'https://api.eia.gov/v2/natural-gas/pri/fut/data/?api_key=' + api_key +\
                '&frequency=monthly&data[0]=value&facets[series][]=' + feature_id +\
                '&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000'
        description_field = 'series-description'

    else:
        print('Feature ID not properly defined!')
        
    return url, description_field

In [8]:
# Initialize list; this is the final list that will store all the data from the json pull. 
read_data = []
feature_description = {}
i = 0

# Pull in data via EIA API
for feature_id in list(SERIES_IDS_DICT.keys()):
    
    url, description_field = get_url_eia_api(SERIES_IDS_DICT, feature_id)
               
    r = requests.get(url)
    json_data = r.json()
    
    if r.status_code == 200:
        print('Success: ', feature_id)
    else:
        print('Error!')
    
    
    #get seriesDescription
    feature_description[feature_id] = json_data['response']['data'][0][description_field]
    print(feature_description[feature_id])
    
    # Cast data from the json pull to pandas dataframe
    df = pd.DataFrame(json_data['response']['data'], columns = ['period', 'value'])
    
    #rename columns
    df.columns = ['Date', FEATURE_NAMES[i]]
    i += 1

    # Chage datetime and set as index
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m', errors="raise")
    df.set_index('Date', drop=True, inplace=True)
    
    display(df.head(3))
    
    # Concat the pandas dataframe into a list
    read_data.append(df)
    print('\n-------------------------')


## Cast all data to single DataFrame

### Find the feature with most records to append all data

In [9]:
#find the longest dataseries
max_val = len(read_data[0])
max_idx = 0

# print field names and size for read_data
for i in range(0, len(read_data)):
    name = read_data[i].columns[0]
    size = len(read_data[i])
    phrase = str(i) + ': '+ name + ' (1*' + str(size) + ')'
    
    if size>max_val:
        max_val = size
        max_idx = i
    
    print(phrase)

### Initiate a dataframe with biggest size feature
Using the feature with biggest number of records as the first field ensures that all rows can be appended to the the dataframe

In [10]:
#initiate the df with the the longest timeseries
df = read_data[max_idx]
df.index = df.index.astype('datetime64[ns]')
display(df.head(3))

### Merge_order all data frames

In [11]:
# Merge_order all data frames on date 
for i in range(0, len(read_data)):
    if i != max_idx:
        df_temp = read_data[i]
        df_temp.index = df_temp.index.astype('datetime64[ns]')
        df = pd.merge_ordered(df, df_temp, on = 'Date')

## Check the final dataframe

In [12]:
display(df.head(10).T)

In [13]:
display(df.tail(10).T)

## Store the data

In [14]:
# Change directory one step back and save as the root directory
ROOT_DIR = os.path.normpath(os.getcwd() + os.sep + os.pardir)

# Change direcotry to the 'data\raw' folder
path = ROOT_DIR + '\\data\\interim\\'

In [15]:
# Set a file name
file_data = path + 'eia_data.csv'
df.to_csv(file_data)
print('Save:\n', file_data)

In [16]:
#save meta data
file_metadata = path + 'eia_data_description.csv'

# Open a CSV file for writing
with open(file_metadata, "w", newline="") as file:
    # Create a writer object
    writer = csv.writer(file)
    
    # Write the dictionary to the CSV file with two columns
    for key, value in feature_description.items():
        writer.writerow([key, value])
# Close the file
print('Save:\n', file_metadata)