## Notebook function:
* Connect to API at US Energy Information Agency and download hourly electric grid data *
https://www.eia.gov

API URL:
https://api.eia.gov/v2/electricity/rto/region-data/data/?frequency=hourly&data[0]=value&start=2015-07-01T00&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000

Method:  GET

Series description:
Hourly demand, day-ahead demand forecast, net generation, and interchange by balancing authority. Source: Form EIA-930 Product: Hourly Electric Grid Monitor

API Documentation:  https://www.eia.gov/opendata/documentation.php

*API URL for inventory of operable generators (monthly):*
https://api.eia.gov/v2/electricity/operating-generator-capacity/data/?frequency=monthly&data[0]=county&data[1]=latitude&data[2]=longitude&data[3]=nameplate-capacity-mw&data[4]=net-summer-capacity-mw&data[5]=net-winter-capacity-mw&data[6]=operating-year-month&data[7]=planned-derate-summer-cap-mw&data[8]=planned-derate-year-month&data[9]=planned-retirement-year-month&data[10]=planned-uprate-summer-cap-mw&data[11]=planned-uprate-year-month&start=2018-01&end=2022-12&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000


*API URL for electric power operations for individual power plants (monthly):*
https://api.eia.gov/v2/electricity/facility-fuel/data/?frequency=monthly&data[0]=average-heat-content&data[1]=consumption-for-eg&data[2]=consumption-for-eg-btu&data[3]=generation&data[4]=gross-generation&data[5]=total-consumption&data[6]=total-consumption-btu&start=2018-01&end=2022-12&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000

In [1]:
import requests
import json
import pandas as pd
import time
from datetime import date, datetime, timedelta
from dateutil.relativedelta import relativedelta

In [2]:
urlParts_gencap_monthly = {'keyprefix':'https://api.eia.gov/v2/electricity/operating-generator-capacity/data/?',
                        'startprefix':('frequency=monthly&data[0]=county&data[1]=latitude&data[2]=longitude&data[3]'
                                       '=nameplate-capacity-mw&data[4]=net-summer-capacity-mw&data[5]=net-winter-'
                                       'capacity-mw&start='),
                        'endprefix':'&end=',
                        'offsetprefix':'&sort[0][column]=period&sort[0][direction]=desc&offset=', 
                        'lengthprefix':'&length='}

In [3]:
urlParts_genops_monthly = {'keyprefix':'https://api.eia.gov/v2/electricity/facility-fuel/data/?',
                        'startprefix':('frequency=monthly&data[0]=average-heat-content&data[1]='
                                       'consumption-for-eg&data[2]=consumption-for-eg-btu&data[3]='
                                       'generation&data[4]=gross-generation&data[5]=total-'
                                       'consumption&data[6]=total-consumption-btu&start='),
                        'endprefix':'&end=',
                        'offsetprefix':'&sort[0][column]=period&sort[0][direction]=desc&offset=', 
                        'lengthprefix':'&length='}

In [4]:
#retrieves data from EIA api, returns json with response, request, and api metadata
def getEIAdata(api_keystring, url_dict, start, end, offset, length):
    url = (f"{url_dict['keyprefix']}{api_keystring}{url_dict['startprefix']}{start}{url_dict['endprefix']}"
           f"{end}{url_dict['offsetprefix']}{offset}{url_dict['lengthprefix']}{length}")
    #print(url)
    try:
        response = requests.get(url)
    except:
        print(f'no response from {url}')
    try:
        response_json = json.dumps(response.json(), indent=4)
    except:
        print(response)
    return response_json

In [5]:
#returns subset of dictionary containing data items
def extractData(response_dict):
    try:
        data = response_dict['response']['data']
    except KeyError:
        print("Key Error. response_dict:")
        print(response_dict)
    return data

In [6]:
#extracts total number of items that matched API request
def extractTotalRows(response_dict):
    total_rows = response_dict['response']['total']
    return total_rows

In [7]:
#appends data to json file
def saveJSON(json_obj, data_file_path):
    with open(data_file_path, 'a') as fout:
        fout.write(json_obj)

In [8]:
#appends data to csv file
def saveCSV(data_dict, csv_path, header):
    df = pd.DataFrame.from_dict(data_dict)
    with open(csv_path, 'a') as fout:
        df.to_csv(fout, header=header, index=False, lineterminator='\n')

In [10]:
#flow
#api_url_parts = urlParts_gencap_monthly
api_url_parts = urlParts_genops_monthly
api_key = 'DryvLQciETN0UgsSlqTeeQnSfHj8sPif8tfUGKCg'
api_keystring = f"api_key={api_key}&"
row_limit = 5000

start_datetime = '2008-01'
end_datetime = '2015-01'
#start_datetime = f"{start_datetime}T00" #API takes start and end hour in '2023-04-02T00' format
#end_datetime = f"{end_datetime}T00"
api_chill_time = 15

json_path = 'eiadata.json'
csv_path = 'eia.csv'
with open(json_path, 'w') as overwrite:
    pass
with open(csv_path, 'w') as overwrite:
    pass

month_list = []
start = datetime.strptime(start_datetime, "%Y-%m")
end = datetime.strptime(end_datetime, "%Y-%m")
datestepper = start
while datestepper <= end:
    year_month = datestepper.strftime("%Y-%m")
    month_list.append(year_month)
    datestepper += relativedelta(months=+1)
print(month_list)

for month in month_list:
    print(f"Month:  {month}")
    offset = 0
    start_datetime = month
    end_datetime = month
    response_json = getEIAdata(api_keystring, api_url_parts, start_datetime, end_datetime, offset, row_limit)
    #create dictionary from json object
    d = json.loads(response_json)
    data = extractData(d)
    #saveJSON(response_json, json_path)
    saveCSV(data, csv_path, header=True)
    returned_rows = len(data)
    total_rows = extractTotalRows(d)
    print(f"Total rows:  {total_rows}")
    print(f"downloaded rows:  {returned_rows}")
    call_count = 1
    while offset < total_rows:
        offset += returned_rows
        try:
            response_json = getEIAdata(api_keystring, api_url_parts, start_datetime, end_datetime, offset, row_limit) 
        except UnboundLocalError:
            resume_time = datetime.now() + timedelta(minutes=api_chill_time)
            print(f"API response error. Lurking until {resume_time}")
            time.sleep(60*api_chill_time)
            continue       
        d = json.loads(response_json)
        try:
            data = extractData(d)
        except UnboundLocalError:
            print(f"Error getting data.")
            saveJSON(response_json, json_path)
        #only save first and last chunk to json for examination
        #saveJSON(response_json, json_path)
        returned_rows = len(data)
        print(f"downloaded rows:  {offset + returned_rows}")
        saveCSV(data, csv_path, header=False)
        call_count += 1
        #time.sleep(30)
        if call_count % 20 == 0:
            time.sleep(69)
        #if call_count % 100 == 0:
            #time.sleep(5400)
    #saveJSON(response_json, json_path)
    


['2008-01', '2008-02', '2008-03', '2008-04', '2008-05', '2008-06', '2008-07', '2008-08', '2008-09', '2008-10', '2008-11', '2008-12', '2009-01', '2009-02', '2009-03', '2009-04', '2009-05', '2009-06', '2009-07', '2009-08', '2009-09', '2009-10', '2009-11', '2009-12', '2010-01', '2010-02', '2010-03', '2010-04', '2010-05', '2010-06', '2010-07', '2010-08', '2010-09', '2010-10', '2010-11', '2010-12', '2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06', '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12', '2012-01', '2012-02', '2012-03', '2012-04', '2012-05', '2012-06', '2012-07', '2012-08', '2012-09', '2012-10', '2012-11', '2012-12', '2013-01', '2013-02', '2013-03', '2013-04', '2013-05', '2013-06', '2013-07', '2013-08', '2013-09', '2013-10', '2013-11', '2013-12', '2014-01', '2014-02', '2014-03', '2014-04', '2014-05', '2014-06', '2014-07', '2014-08', '2014-09', '2014-10', '2014-11', '2014-12', '2015-01']
Month:  2008-01
Total rows:  23644
downloaded rows:  5000
downlo