In [1]:
import pandas as pd
import requests
import numpy as np
from pathlib import Path

We obtain annual emissions from 1990-1996 in the electric power sector by state from the US Energy Information Administration's API:

In [5]:
api_key_EIA = ''  # Replace with your actual API key

url = f"https://api.eia.gov/v2/co2-emissions/co2-emissions-aggregates/data/?frequency=annual&data[0]=value&facets[sectorId][]=EC&start=1990&end=1996&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000&api_key={api_key_EIA}"

data_all = []
# Maximum length of EIA API output is 5000 rows; while loop below gets all rows
while True:
    response = requests.get(url)
    
    # Print status code for debugging
    print(f"Status Code: {response.status_code}")
    
    # If there's an error, print the full response
    if response.status_code == 200:
        print("Request successful")
    else:
        print(f"Request failed with status code: {response.status_code}")
        break
    
    data = response.json()
    
    if "response" not in data or "data" not in data["response"]:
        print("Unexpected response structure:", data)
        break
    
    data_all.extend(data["response"]["data"])
    
    if len(data["response"]["data"]) < 5000:
        break
    
    data['offset'] += 5000

emissions_1990_1996 = pd.DataFrame(data_all)
print(len(emissions_1990_1996))

Status Code: 200
Request successful
1456


In [6]:
# drop unnecessary columns and rows for 'US', 'AK', 'HI':
emissions_1990_1996 = emissions_1990_1996.loc[emissions_1990_1996['fuelId']=='TO']

emissions_1990_1996 = emissions_1990_1996[['period', 'stateId', 'value']]

emissions_1990_1996 = emissions_1990_1996.drop(emissions_1990_1996[emissions_1990_1996['stateId'].isin(['US', 'AK', 'HI'])].index)

In [7]:
# rename columns
emissions_1990_1996 = emissions_1990_1996.rename(columns={'period': 'year', 'stateId': 'state', 'value': 'total_co2'})

In [8]:
# Convert units from megatons to tons
emissions_1990_1996['total_co2'] = emissions_1990_1996['total_co2'].astype(float)*1000000

In [9]:
emissions_1990_1996

Unnamed: 0,year,state,total_co2
1,1996,WY,40630558.0
5,1996,WI,39691630.0
9,1996,WV,78739642.0
13,1996,WA,10737829.0
17,1996,VA,33564559.0
...,...,...,...
1439,1990,VA,23438072.0
1443,1990,WA,7532108.0
1447,1990,WV,71001562.0
1451,1990,WI,33198849.0


The EIA data on emissions give us yearly data. We will estimate monthly emissions using monthly data on emissions from the 1996-2024 data. We will construct monthly weights using monthly averages for each month-state pair, then multiply the 1990-1996 data by these monthly weights. These weights will be held in a dictionary where each key in the dictionary is a state-month pair, and each value is total emissions in a given month for a state divided by total emissions from a state from 1996-2024.

In [10]:
# Load monthly emissions data for 1996-2024
monthly_emissions_1996_2024 = pd.read_csv("/Users/nicholasgeiser/Documents/Erdos/monthly-emissions-1996-2024-final.csv", index_col=[0])

In [11]:
# Drop rows containing data for Alaska, Hawaii, and Puerto Rico
monthly_emissions_1996_2024 = monthly_emissions_1996_2024.drop(monthly_emissions_1996_2024.loc[monthly_emissions_1996_2024['State'].isin(['AK','HI','PR'])].index)

# Get year and month variables 
monthly_emissions_1996_2024['year'] = pd.to_datetime(monthly_emissions_1996_2024['date']).dt.year
monthly_emissions_1996_2024['month'] = pd.to_datetime(monthly_emissions_1996_2024['date']).dt.month

In [12]:
monthly_emissions_1996_2024

Unnamed: 0,State,date,total_co2,year,month
72,AL,1995-01,1343398.462,1995,1
73,AL,1995-02,1246882.866,1995,2
74,AL,1995-03,1305461.609,1995,3
75,AL,1995-04,1407618.773,1995,4
76,AL,1995-05,2082638.094,1995,5
...,...,...,...,...,...
17185,WY,2024-08,3392821.201,2024,8
17186,WY,2024-09,2876132.311,2024,9
17187,WY,2024-10,2761315.645,2024,10
17188,WY,2024-11,2789809.785,2024,11


In [13]:
# Get monthly weights as a dictionary
monthly_proportions = (
    monthly_emissions_1996_2024.groupby(['State', 'month'])['total_co2'].sum() /
    monthly_emissions_1996_2024.groupby('State')['total_co2'].sum()
).to_dict()

In [14]:
# Create a new month column using a cross-join in emissions_1990_1996
months = pd.DataFrame({'month': range(1,13)})
emissions_1990_1996 = emissions_1990_1996.merge(months, how='cross')

In [15]:
# Construct monthly emissions for each state using the monthly_proportions dictionary and an anonymous function
emissions_1990_1996['monthly_emissions'] = emissions_1990_1996.apply(
    lambda row: row['total_co2'] * monthly_proportions.get((row['state'], row['month'])), axis = 1 
)

In [17]:
# Export as csv
filepath = Path('../../SharedData/dataset-generation-final/monthly_emissions_1990_1996.csv')
emissions_1990_1996.to_csv(filepath, index=False)