In [2]:
# Dependencies
import requests
import json
import pandas as pd
import time

# Import the API key
eia_key = 'gyrsEfYBzdivseatHULDlg9PHRKAanzAz58ZqKT0'

In [3]:
#Create a set of lists to store the state, year, energy source and capacity per fuel type (in MWh)
state_json = []
year = []
energysource = []
capacity = []

In [4]:
#Create a list of states to be used to run through in the for loop
states = ["AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL",
         "GA", "HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA",
         "MD", "ME", "MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE",
         "NH", "NJ", "NM", "NV", "NY", "OH", "OK", "OR", "PA", "RI",
         "SC", "SD", "TN", "TX","UT", "VA", "VT", "WA", "WI", "WV", "WY"]

In [5]:
#The EIA only returns a maximum of 5000 rows per request, so this for loop makes one request per state

# Print a message to let me know the search is beginning
print("Starting State Search")

# Iterate through the list of states
for state in states:
    #For each state, let the user know when it starts searching data for that state
    print(f"Requesting data for {state}")
    
    # Set URL (taken from EIA Documentation, only changing "state" each time)
    complete_url = f"https://api.eia.gov/v2/electricity/state-electricity-profiles/capability/data/?frequency=annual&data[0]=capability&facets[energysourceid][]=COL&facets[energysourceid][]=GEO&facets[energysourceid][]=HYC&facets[energysourceid][]=NG&facets[energysourceid][]=NUC&facets[energysourceid][]=PET&facets[energysourceid][]=SOL&facets[energysourceid][]=WND&facets[stateId][]={state}&facets[producertypeid][]=TOT&start=1990&end=2021&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000&api_key={eia_key}"

    # Make an API request using that URL
    stateresponse = requests.get(complete_url)
    
    # Convert the API response to JSON format
    stateresponse = stateresponse.json()
    
    #The data inside the json is a dictionary, and the data about each powerplant can be found in "Response" > "Data" as a list
    #So this for loop runs through each item in the "Data" list and pulls out the necessary information
    #And appends it to the appropriate list
    for i in range(len(stateresponse["response"]["data"])):
        # Grab each state's data from the response (indexed in data) and place information in appropriate lists
        try:
            state_json.append(stateresponse["response"]["data"][i]["stateId"])
            year.append(stateresponse["response"]["data"][i]["period"])
            energysource.append(stateresponse["response"]["data"][i]["energySourceDescription"])
            capacity.append(stateresponse["response"]["data"][i]["capability"])
        except (KeyError, IndexError):
            # If any one of these doesn't work, the row will return "No Data found"
            state_json.append("No Data found")
            year.append("No Data found")
            energysource.append("No Data found")
            capacity.append("No Data found")
        
    # Let the user know the search for each state has been finished
    print(f"Data for {state} complete")
    
    #wait until next request to be a good person
    time.sleep(1)
    
#Finally, let the user know that the complete search has been finished
print(f"Power plant search is complete")


Starting State Search
Requesting data for AK
Data for AK complete
Requesting data for AL
Data for AL complete
Requesting data for AR
Data for AR complete
Requesting data for AZ
Data for AZ complete
Requesting data for CA
Data for CA complete
Requesting data for CO
Data for CO complete
Requesting data for CT
Data for CT complete
Requesting data for DC
Data for DC complete
Requesting data for DE
Data for DE complete
Requesting data for FL
Data for FL complete
Requesting data for GA
Data for GA complete
Requesting data for HI
Data for HI complete
Requesting data for IA
Data for IA complete
Requesting data for ID
Data for ID complete
Requesting data for IL
Data for IL complete
Requesting data for IN
Data for IN complete
Requesting data for KS
Data for KS complete
Requesting data for KY
Data for KY complete
Requesting data for LA
Data for LA complete
Requesting data for MA
Data for MA complete
Requesting data for MD
Data for MD complete
Requesting data for ME
Data for ME complete
Requesting

In [6]:
#Combine all lists into one tidy little dataframe
All_States_by_source_df = pd.DataFrame({"State" :state_json,
                           "Year":year,
                          "Fuel Type" : energysource,
                          "Year": year,
                          "Capacity (MWh)": capacity})


#Look upon your mighty dataframe and despair
All_States_by_source_df

Unnamed: 0,State,Year,Fuel Type,Capacity (MWh)
0,AK,2021,Coal,168.9
1,AK,2021,Wind,59.0
2,AK,2021,Solar,0.8
3,AK,2021,Petroleum,742.7
4,AK,2021,Natural Gas,1273.1
...,...,...,...,...
8344,WY,1991,Petroleum,5.0
8345,WY,1990,Coal,5553.0
8346,WY,1990,Natural Gas,48.0
8347,WY,1990,Hydroelectric,270.0


In [7]:
#Then publish the mighty dataframe to a csv file to be analyzed later
All_States_by_source_df.to_csv("Output/All_States_by_source.csv")