## Notebook function:
* Connect to API at US Energy Information Agency and download hourly electric grid data *
https://www.eia.gov

API URL:
https://api.eia.gov/v2/electricity/rto/region-data/data/?frequency=hourly&data[0]=value&start=2015-07-01T00&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000

Method:  GET

Series description:
Hourly demand, day-ahead demand forecast, net generation, and interchange by balancing authority. Source: Form EIA-930 Product: Hourly Electric Grid Monitor

API Documentation:  https://www.eia.gov/opendata/documentation.php

*API URL for inventory of operable generators (monthly):*
https://api.eia.gov/v2/electricity/operating-generator-capacity/data/?frequency=monthly&data[0]=county&data[1]=latitude&data[2]=longitude&data[3]=nameplate-capacity-mw&data[4]=net-summer-capacity-mw&data[5]=net-winter-capacity-mw&data[6]=operating-year-month&data[7]=planned-derate-summer-cap-mw&data[8]=planned-derate-year-month&data[9]=planned-retirement-year-month&data[10]=planned-uprate-summer-cap-mw&data[11]=planned-uprate-year-month&start=2018-01&end=2022-12&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000


*API URL for electric power operations for individual power plants (monthly):*
https://api.eia.gov/v2/electricity/facility-fuel/data/?frequency=monthly&data[0]=average-heat-content&data[1]=consumption-for-eg&data[2]=consumption-for-eg-btu&data[3]=generation&data[4]=gross-generation&data[5]=total-consumption&data[6]=total-consumption-btu&start=2018-01&end=2022-12&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000

In [2]:
import requests
import json
import pandas as pd
import time
from datetime import datetime, timedelta

In [3]:
urlParts_gencap_monthly = {'keyprefix':'https://api.eia.gov/v2/electricity/operating-generator-capacity/data/?',
                        'startprefix':('frequency=monthly&data[0]=county&data[1]=latitude&data[2]=longitude&data[3]'
                                   '=nameplate-capacity-mw&data[4]=net-summer-capacity-mw&data[5]=net-winter-capacity-'
                                   'mw&data[6]=operating-year-month&data[7]=planned-derate-summer-cap-mw&data[8]='
                                   'planned-derate-year-month&data[9]=planned-retirement-year-month&data[10]=planned-'
                                   'uprate-summer-cap-mw&data[11]=planned-uprate-year-month&start='),
                        'endprefix':'&end=',
                        'offsetprefix':'&sort[0][column]=period&sort[0][direction]=desc&offset=', 
                        'lengthprefix':'&length='}

In [4]:
urlParts_genops_monthly = {'keyprefix':'https://api.eia.gov/v2/electricity/facility-fuel/data/?',
                        'startprefix':('frequency=monthly&data[0]=average-heat-content&data[1]='
                                       'consumption-for-eg&data[2]=consumption-for-eg-btu&data[3]='
                                       'generation&data[4]=gross-generation&data[5]=total-'
                                       'consumption&data[6]=total-consumption-btu&start='),
                        'endprefix':'&end=',
                        'offsetprefix':'&sort[0][column]=period&sort[0][direction]=desc&offset=', 
                        'lengthprefix':'&length='}

In [5]:
#retrieves data from EIA api, returns json with response, request, and api metadata
def getEIAdata(api_keystring, url_dict, start, end, offset, length):
    url = (f"{url_dict['keyprefix']}{api_keystring}{url_dict['startprefix']}{start}{url_dict['endprefix']}"
           f"{end}{url_dict['offsetprefix']}{offset}{url_dict['lengthprefix']}{length}")
    #print(url)
    try:
        response = requests.get(url)
    except:
        print(f'no response from {url}')
    try:
        response_json = json.dumps(response.json(), indent=4)
    except:
        print(response)
    return response_json

In [6]:
#returns subset of dictionary containing data items
def extractData(response_dict):
    try:
        data = response_dict['response']['data']
    except KeyError:
        print("Key Error. response_dict:")
        print(response_dict)
    return data

In [7]:
#extracts total number of items that matched API request
def extractTotalRows(response_dict):
    total_rows = response_dict['response']['total']
    return total_rows

In [8]:
#appends data to json file
def saveJSON(json_obj, data_file_path):
    with open(data_file_path, 'a') as fout:
        fout.write(json_obj)

In [9]:
#appends data to csv file
def saveCSV(data_dict, csv_path, header):
    df = pd.DataFrame.from_dict(data_dict)
    with open(csv_path, 'a') as fout:
        df.to_csv(fout, header=header, index=False, lineterminator='\n')

In [10]:
#flow
#api_url_parts = urlParts_gencap_monthly
api_url_parts = urlParts_genops_monthly
api_key = 'DryvLQciETN0UgsSlqTeeQnSfHj8sPif8tfUGKCg'
api_keystring = f"api_key={api_key}&"
row_limit = 5000
offset = 0
start_datetime = '2008-01'
end_datetime = '2017-06'
#start_datetime = f"{start_datetime}T00" #API takes start and end hour in '2023-04-02T00' format
#end_datetime = f"{end_datetime}T00"
api_chill_time = 15

json_path = 'eiadata.json'
csv_path = 'eia.csv'
with open(json_path, 'w') as overwrite:
    pass
with open(csv_path, 'w') as overwrite:
    pass

response_json = getEIAdata(api_keystring, api_url_parts, start_datetime, end_datetime, offset, row_limit)
#create dictionary from json object
d = json.loads(response_json)
data = extractData(d)
saveJSON(response_json, json_path)
saveCSV(data, csv_path, header=True)
returned_rows = len(data)
total_rows = extractTotalRows(d)
print(total_rows)
call_count = 1
while call_count * row_limit < total_rows:
    offset = call_count * row_limit
    try:
        response_json = getEIAdata(api_keystring, api_url_parts, start_datetime, end_datetime, offset, row_limit) 
    except UnboundLocalError:
        resume_time = datetime.now() + timedelta(minutes=api_chill_time)
        print(f"API response error. Lurking until {resume_time}")
        time.sleep(60*api_chill_time)
        continue       
    d = json.loads(response_json)
    try:
        data = extractData(d)
    except UnboundLocalError:
        print(f"Error getting data.")
        saveJSON(response_json, json_path)
    #only save first and last chunk to json for examination
    #saveJSON(response_json, json_path)
    saveCSV(data, csv_path, header=False)
    call_count += 1
    print(call_count*row_limit)
    #time.sleep(30)
    #if call_count % 20 == 0:
    #    time.sleep(69)
    #if call_count % 100 == 0:
        #time.sleep(5400)
length = total_rows - (call_count - 1)*row_limit
response_json = getEIAdata(api_keystring, api_url_parts, start_datetime, end_datetime, offset+row_limit, length)
d = json.loads(response_json)
data = extractData(d)
saveJSON(response_json, json_path)
saveCSV(data, csv_path, header=False)


3157363
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
60000
65000
70000
75000
80000
85000
90000
95000
100000
<Response [503]>
API response error. Lurking until 2023-05-06 18:31:58.932509
105000
110000
<Response [503]>
API response error. Lurking until 2023-05-06 18:48:55.707947
<Response [503]>
API response error. Lurking until 2023-05-06 19:04:37.616532
<Response [503]>
API response error. Lurking until 2023-05-06 19:20:19.125178
<Response [503]>
API response error. Lurking until 2023-05-06 19:36:00.839409
<Response [503]>
API response error. Lurking until 2023-05-06 19:51:42.558394
115000
120000
125000
<Response [503]>
API response error. Lurking until 2023-05-06 20:09:29.280731
130000
<Response [503]>
API response error. Lurking until 2023-05-06 20:25:52.765418
<Response [503]>
API response error. Lurking until 2023-05-06 20:41:34.431896
<Response [503]>
API response error. Lurking until 2023-05-06 20:57:16.172410
<Response [503]>
API response error. Lurking until 2023

: 

: 