## Notebook function:
* Connect to API at US Energy Information Agency and download hourly electric grid data *
https://www.eia.gov

API URL:
https://api.eia.gov/v2/electricity/rto/region-data/data/?frequency=hourly&data[0]=value&start=2015-07-01T00&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000

Method:  GET

Series description:
Hourly demand, day-ahead demand forecast, net generation, and interchange by balancing authority. Source: Form EIA-930 Product: Hourly Electric Grid Monitor

API Documentation:  https://www.eia.gov/opendata/documentation.php

*API URL for inventory of operable generators (monthly):*
https://api.eia.gov/v2/electricity/operating-generator-capacity/data/?frequency=monthly&data[0]=county&data[1]=latitude&data[2]=longitude&data[3]=nameplate-capacity-mw&data[4]=net-summer-capacity-mw&data[5]=net-winter-capacity-mw&data[6]=operating-year-month&data[7]=planned-derate-summer-cap-mw&data[8]=planned-derate-year-month&data[9]=planned-retirement-year-month&data[10]=planned-uprate-summer-cap-mw&data[11]=planned-uprate-year-month&start=2018-01&end=2022-12&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000


*API URL for electric power operations for individual power plants (monthly):*
https://api.eia.gov/v2/electricity/facility-fuel/data/?frequency=monthly&data[0]=average-heat-content&data[1]=consumption-for-eg&data[2]=consumption-for-eg-btu&data[3]=generation&data[4]=gross-generation&data[5]=total-consumption&data[6]=total-consumption-btu&start=2018-01&end=2022-12&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000

Problem:  requests for monthly generation capacity data sometimes fail with {'error': 'Error getting data.', 'code': 400}

Strategy:  iteratively adjust offset to home in on error, skip, and document

In [2]:
import requests
import json
import pandas as pd
import time
from datetime import datetime, timedelta
import math

https://api.eia.gov/v2/electricity/operating-generator-capacity/data/?frequency=monthly&data[0]=county&data[1]=latitude&data[2]=longitude&data[3]=nameplate-capacity-mw&data[4]=net-summer-capacity-mw&data[5]=net-winter-capacity-mw&start=2022-11&end=2022-12&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000

In [3]:
urlParts_gencap_monthly = {'keyprefix':'https://api.eia.gov/v2/electricity/operating-generator-capacity/data/?',
                        'startprefix':('frequency=monthly&data[0]=county&data[1]=latitude&data[2]=longitude&data[3]'
                                       '=nameplate-capacity-mw&data[4]=net-summer-capacity-mw&data[5]=net-winter-'
                                       'capacity-mw&start='),
                        'endprefix':'&end=',
                        'offsetprefix':'&sort[0][column]=period&sort[0][direction]=desc&offset=', 
                        'lengthprefix':'&length='}

In [4]:
urlParts_genops_monthly = {'keyprefix':'https://api.eia.gov/v2/electricity/facility-fuel/data/?',
                        'startprefix':('frequency=monthly&data[0]=average-heat-content&data[1]='
                                       'consumption-for-eg&data[2]=consumption-for-eg-btu&data[3]='
                                       'generation&data[4]=gross-generation&data[5]=total-'
                                       'consumption&data[6]=total-consumption-btu&start='),
                        'endprefix':'&end=',
                        'offsetprefix':'&sort[0][column]=period&sort[0][direction]=desc&offset=', 
                        'lengthprefix':'&length='}

In [5]:
#retrieves data from EIA api, returns json with response, request, and api metadata
def getEIAdata(api_keystring, url_dict, start, end, offset, length):
    url = (f"{url_dict['keyprefix']}{api_keystring}{url_dict['startprefix']}{start}{url_dict['endprefix']}"
           f"{end}{url_dict['offsetprefix']}{offset}{url_dict['lengthprefix']}{length}")
    #print(url)
    try:
        response = requests.get(url)
    except:
        print(f'no response from {url}')
    try:
        response_json = json.dumps(response.json(), indent=4)
    except:
        print('No response')
    return response_json

In [6]:
#returns subset of dictionary containing data items
def extractData(response_dict):
    try:
        data = response_dict['response']['data']
    except KeyError:
        print("No response")
        #print("Key Error. response_dict:")
        #print(response_dict)
    return data

In [7]:
#extracts total number of items that matched API request
def extractTotalRows(response_dict):
    total_rows = response_dict['response']['total']
    return total_rows

In [8]:
#appends data to json file
def saveJSON(json_obj, data_file_path):
    with open(data_file_path, 'a') as fout:
        fout.write(json_obj)

In [9]:
#appends data to csv file
def saveCSV(data_dict, csv_path, header):
    df = pd.DataFrame.from_dict(data_dict)
    with open(csv_path, 'a') as fout:
        df.to_csv(fout, header=header, index=False, lineterminator='\n')

In [10]:
# function to adjust rows requested until error is avoided
def searchAround(offset, row_limit, csv_path, api_keystring, url_dict, start, end, api_chill_time):
    bad_rows = []
    zoom = 2
    success = False
    adj_rowlim = row_limit
    while (success == False) and (adj_rowlim > 1):
        print(f"Error getting data. Adjusting zoom")
        print(f"zoom:  {zoom}")
        adj_rowlim = math.ceil(row_limit/zoom)
        print(f"{offset}, {offset + adj_rowlim}")
        try:
            response_json = getEIAdata(api_keystring, url_dict, start, end, offset, adj_rowlim) 
        except UnboundLocalError:
            resume_time = datetime.now() + timedelta(minutes=api_chill_time)
            print(f"API response error. Lurking until {resume_time}")
            time.sleep(60*api_chill_time)
            continue 
        d = json.loads(response_json)
        zoom = zoom * 2
        try:
            data = extractData(d)
        except UnboundLocalError:
            #saveJSON(response_json, json_path)  
            continue
        saveCSV(data, csv_path, header=False)
        success = True
    if (adj_rowlim == 1) and (success == False):
        bad_rows.append(offset+1)
        result = {'bad_rows':bad_rows, 'start_at':offset+1, 'row_lim':adj_rowlim}
    else:
        result = {'bad_rows':bad_rows, 'start_at':offset+adj_rowlim, 'row_lim':adj_rowlim}
    adj_offset = result['start_at']
    while (success == True) and (zoom > 1):
        print(f"Zooming out:  {zoom}")
        adj_rowlim = math.ceil(row_limit/zoom)
        print(f"{adj_offset}, {adj_offset + adj_rowlim}")
        try:
            response_json = getEIAdata(api_keystring, url_dict, start, end, adj_offset, adj_rowlim) 
        except UnboundLocalError:
            resume_time = datetime.now() + timedelta(minutes=api_chill_time)
            print(f"API response error. Lurking until {resume_time}")
            time.sleep(60*api_chill_time)
            continue 
        d = json.loads(response_json)
        try:
            data = extractData(d)
        except UnboundLocalError:
            #saveJSON(response_json, json_path) 
            success = False 
            continue
        saveCSV(data, csv_path, header=False)
        adj_offset += adj_rowlim
        zoom = zoom / 2
    result = {'bad_rows':bad_rows, 'start_at':adj_offset, 'row_lim':adj_rowlim}

    return result 
       

In [11]:
#flow
api_url_parts = urlParts_gencap_monthly
#api_url_parts = urlParts_genops_monthly
api_key = 'DryvLQciETN0UgsSlqTeeQnSfHj8sPif8tfUGKCg'
api_keystring = f"api_key={api_key}&"
row_limit = 5000
offset = 0
start_datetime = '2008-01'
end_datetime = '2016-10'
#start_datetime = f"{start_datetime}T00" #API takes start and end hour in '2023-04-02T00' format
#end_datetime = f"{end_datetime}T00"
api_chill_time = 15

json_path = 'eiadata.json'
csv_path = 'eia.csv'
with open(json_path, 'w') as overwrite:
    pass
with open(csv_path, 'w') as overwrite:
    pass

print(f"{offset}, {offset+row_limit}")
response_json = getEIAdata(api_keystring, api_url_parts, start_datetime, end_datetime, offset, row_limit)
#create dictionary from json object
d = json.loads(response_json)
data = extractData(d)
saveJSON(response_json, json_path)
saveCSV(data, csv_path, header=True)
returned_rows = len(data)
total_rows = extractTotalRows(d)
print(f"Total rows:  {total_rows}")
bad_rows = []
call_count = 1
offset = call_count * row_limit
zoom_size = row_limit
while offset < total_rows:
    print(f"{offset}, {offset + min(zoom_size, row_limit)}")
    try:
        response_json = getEIAdata(api_keystring, api_url_parts, start_datetime, end_datetime, offset, zoom_size) 
    except UnboundLocalError:
        resume_time = datetime.now() + timedelta(minutes=api_chill_time)
        print(f"API response error. Lurking until {resume_time}")
        time.sleep(60*api_chill_time)
        continue       
    d = json.loads(response_json)
    try:
        data = extractData(d)
    except UnboundLocalError:
        saveJSON(response_json, json_path)
        srch_dict = searchAround(offset, zoom_size, csv_path, api_keystring, api_url_parts, start_datetime, end_datetime, api_chill_time) 
        if len(srch_dict['bad_rows']) > 0:
            bad_rows = bad_rows + srch_dict['bad_rows']
            print(f"Bad rows:  {bad_rows}")
        offset = srch_dict['start_at']
        zoom_size = srch_dict['row_lim']  
        continue
    if zoom_size < row_limit:
        offset += zoom_size
        zoom_size = zoom_size*2
    else:
        offset += row_limit
    #only save first and last chunk to json for examination
    #saveJSON(response_json, json_path)
    saveCSV(data, csv_path, header=False)
    #time.sleep(30)
    #if call_count % 20 == 0:
    #    time.sleep(69)
    #if call_count % 100 == 0:
        #time.sleep(5400)
print(f"Bad rows:  {bad_rows}")
badrows_df = pd.DataFrame(bad_rows)
with open('bad_rows.csv', 'w') as fout:
    badrows_df.to_csv(fout, lineterminator='\n')


0, 5000
Total rows:  1991319
5000, 10000
10000, 15000
15000, 20000
20000, 25000
25000, 30000
30000, 35000
35000, 40000
40000, 45000
45000, 50000
50000, 55000
55000, 60000
60000, 65000
65000, 70000
70000, 75000
75000, 80000
80000, 85000
85000, 90000
90000, 95000
95000, 100000
100000, 105000
105000, 110000
110000, 115000
115000, 120000
120000, 125000
125000, 130000
130000, 135000
135000, 140000
140000, 145000
145000, 150000
150000, 155000
155000, 160000
160000, 165000
165000, 170000
170000, 175000
175000, 180000
180000, 185000
185000, 190000
190000, 195000
195000, 200000
200000, 205000
205000, 210000
210000, 215000
215000, 220000
220000, 225000
225000, 230000
230000, 235000
235000, 240000
240000, 245000
245000, 250000
250000, 255000
255000, 260000
260000, 265000
265000, 270000
270000, 275000
275000, 280000
280000, 285000
285000, 290000
290000, 295000
295000, 300000
300000, 305000
No response
API response error. Lurking until 2023-05-06 09:15:41.922509
300000, 305000
305000, 310000
310000