# Get EIA Demand Data

Functions to query EIA's (U.S. Energy Information Administration) OpenData API for hourly electricity demand data.  This notebook generates a single csv file per EIA 1) balancing authority, 2) EIA regions, and 3) contiguous US with all available EIA hourly demand data.


Author:
T. Ruggles
14 June 2019

This code was written using `python3.7`.


# EIA API Resources

EIA provides some commands here: https://www.eia.gov/opendata/commands.php


# EIA Electricity Demand

Web interface for EIA electricity demand data: https://www.eia.gov/opendata/qb.php?category=2122628

Web interface for EIA day-ahead forecasted electricity demand data: https://www.eia.gov/opendata/qb.php?category=2122627


A real-time display of the U.S. interconnect is available here: https://www.eia.gov/realtime_grid/


# Details

In the cases where the result of the EIA API query skipped
an hour, the associated row will have a demand value of `MISSING`.
In the cases where the result of the EIA API query returned NONE for
an hour, the associated row will have a demand value of `EMPTY`.
These values are kept distinct to help informe further study of the EIA data set.

Note: the first 5 hours of July 1st 2015 are empty for all BAs.  Becase data reporting began using local time, the West Coast BAs are missing 8 hours of reporting for July 1st 2015 (UTC time).

In [None]:
import urllib.request
import urllib.parse
import json
import csv
import os
import datetime
from collections import OrderedDict
import subprocess

# Getting and EIA API key

EIA provides open data and an API for accessing them. To use their API you must first get a key here: https://www.eia.gov/opendata/register.php

In [None]:
EIA_API_KEY='YOUR_EIA_API_KEY_HERE' # as a string
print(EIA_API_KEY)

# Function definitions

In [None]:
# Query EIA to get list of regions for which hourly electricity deman data is available
def get_regions_data():

    regions_query = urllib.request.urlopen('http://api.eia.gov/category/?api_key={}&category_id=2122628&format=json'.format(EIA_API_KEY))
    regions_response = regions_query.read().decode('utf-8')
    regions_data = json.loads(regions_response)

    return regions_data



# EIA changed API mapping and now we need to be able to change between
# category_id and series_id
def category_id_to_series_id_demand(category_id):

    region_query = urllib.request.urlopen('http://api.eia.gov/category/?api_key={}&category_id={}&format=json'.format(EIA_API_KEY, category_id))
    region_response = region_query.read().decode('utf-8')
    region_data = json.loads(region_response)

    return region_data['category']['childseries'][0]['series_id']



# Query EIA for hour electric demand data for a given region
def get_regional_data(series_id):

    region_query = urllib.request.urlopen('http://api.eia.gov/series/?api_key={}&series_id={}&format=json'.format(EIA_API_KEY, series_id))
    region_response = region_query.read().decode('utf-8')
    region_data = json.loads(region_response)

    # For checking initial raw EIA output
    #with open('data/{}_raw.csv'.format(series_id), 'w', newline='') as csvfile:
    #    csvfile.write(json.dumps(region_data, sort_keys=True, indent=4))

    return region_data



# Query EIA for forecasted hourly electric demand data for a given region
def get_forecast_regional_data(series_id):

    # The series_id for the forecasted demand is identical to that of the realized demand with a minor string replacement
    region_query = urllib.request.urlopen('http://api.eia.gov/series/?api_key={}&series_id={}&format=json'.format(EIA_API_KEY, series_id.replace('-ALL.D.H','-ALL.DF.H')))
    region_response = region_query.read().decode('utf-8')
    region_data = json.loads(region_response)

    return region_data



# Generate full hourly date and time series from start date ending the hour before end date
def generate_full_time_series(start_date, end_date):
    full_date_range = []
    for n in range(int ((end_date - start_date).days)):
        for h in range(24):
            full_date_range.append(datetime.datetime.combine(start_date + datetime.timedelta(n), datetime.time(h, 0)))

    return full_date_range


# Save region hourly electric demand data to a format usable by MEM
def save_file(series_id, region_data, region_forecast_data, full_date_range):

    region_id = series_id.replace('EBA.','').replace('-ALL.D.H','')

    with open('./data/{}.csv'.format(region_id), 'w', newline='') as csvfile:

        fieldnames = ['date_time', 'demand (MW)', 'forecast demand (MW)']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        full_date_range_dict = OrderedDict()
        for hour in full_date_range:
            full_date_range_dict[hour.strftime("%Y%m%dT%HZ")] = ['MISSING', 'MISSING']

        # Actual realized demand
        for demand in region_data['series'][0]['data']:
            # Skip dates outside the specified range
            if demand[0] not in full_date_range_dict.keys():
                continue
            try:
                if demand[1] == None:
                    full_date_range_dict[demand[0]][0] = 'EMPTY'
                else:
                    full_date_range_dict[demand[0]][0] = demand[1]
            except KeyError:
                print("Check date and time formatting for category {} for time {}".format(region_id, demand[0]))

        # Day ahead forecasted demand
        for demand in region_forecast_data['series'][0]['data']:
            # Skip dates outside the specified range
            if demand[0] not in full_date_range_dict.keys():
                continue
            try:
                if demand[1] == None:
                    full_date_range_dict[demand[0]][1] = 'EMPTY'
                else:
                    full_date_range_dict[demand[0]][1] = demand[1]
            except KeyError:
                print("Check date and time formatting for forecast category {} for time {}".format(region_id, demand[0]))

        for time, demand in full_date_range_dict.items():

            dt = datetime.datetime.strptime(time, '%Y%m%dT%HZ')
            # From EIA form 930 instructions: 
            # "Report all data as hourly integrated values in megawatts by hour ending time."
            writer.writerow({'date_time': dt,
                'demand (MW)': demand[0], 'forecast demand (MW)': demand[1]})

# Running the quries

You can adjust the data range of the output CSV files with `start_date` and `end_date`

In [None]:
regions_data = get_regions_data()

# Make data directory
if not os.path.exists('./data'):
    os.mkdir('./data')

# Date range of interest
start_date = datetime.date(2015, 7, 1) # EIA demand data starts in July of 2015
end_date = datetime.date(2020, 2, 1) # Can update this as time progresses
full_date_range = generate_full_time_series(start_date, end_date)

for region in regions_data['category']['childcategories']:

    series_id = category_id_to_series_id_demand(region['category_id'])
    print("Getting data for: {} with series_id {}".format(region['name'], series_id))
    region_data = get_regional_data(series_id)
    region_forecast_data = get_forecast_regional_data(series_id)
    save_file(series_id, region_data, region_forecast_data, full_date_range)