In [32]:
import pandas as pd
import numpy as np

In [33]:
monthly_emissions_1990_1996 = pd.read_csv("./monthly_emissions_1990_1996.csv")
monthly_emissions_1996_2024 = pd.read_csv("/Users/nicholasgeiser/Documents/Erdos/monthly-emissions-1996-2024-final.csv", index_col=[0])

In [34]:
# Convert from metric tons to short tons (for 1990-1996 data)
monthly_emissions_1990_1996['monthly_emissions'] = monthly_emissions_1990_1996['monthly_emissions']*1.10231

In [35]:
# For both 1996-2024 and 1990-1996 data, convert to 'date' period datatype 
monthly_emissions_1996_2024['date'] = pd.to_datetime(monthly_emissions_1996_2024['date']).dt.to_period('M')
monthly_emissions_1990_1996['date'] = pd.to_datetime(monthly_emissions_1990_1996['date']).dt.to_period('M')
# Extract month and year
monthly_emissions_1996_2024['month'] = monthly_emissions_1996_2024['date'].dt.month
monthly_emissions_1996_2024['year'] = monthly_emissions_1996_2024['date'].dt.year



In [36]:
# Drop rows containing data for Alaska, Hawaii, and Puerto Rico
monthly_emissions_1996_2024 = monthly_emissions_1996_2024.drop(monthly_emissions_1996_2024.loc[monthly_emissions_1996_2024['State'].isin(['AK','HI','PR'])].index)

In [37]:
# Rename columns
monthly_emissions_1996_2024 = monthly_emissions_1996_2024.rename(columns={'State':'state', 'total_co2':'monthly_emissions'})

In [38]:
monthly_emissions_1990_1996

Unnamed: 0,year,state,month,monthly_emissions,date
0,1990,AL,1,4.698354e+06,1990-01
1,1990,AL,2,4.077255e+06,1990-02
2,1990,AL,3,4.167678e+06,1990-03
3,1990,AL,4,3.828220e+06,1990-04
4,1990,AL,5,4.620473e+06,1990-05
...,...,...,...,...,...
4111,1996,WY,8,4.178368e+06,1996-08
4112,1996,WY,9,3.829726e+06,1996-09
4113,1996,WY,10,3.817083e+06,1996-10
4114,1996,WY,11,3.815030e+06,1996-11


In [39]:
monthly_emissions_1996_2024

Unnamed: 0,state,date,monthly_emissions,month,year
72,AL,1995-01,1343398.462,1,1995
73,AL,1995-02,1246882.866,2,1995
74,AL,1995-03,1305461.609,3,1995
75,AL,1995-04,1407618.773,4,1995
76,AL,1995-05,2082638.094,5,1995
...,...,...,...,...,...
17185,WY,2024-08,3392821.201,8,2024
17186,WY,2024-09,2876132.311,9,2024
17187,WY,2024-10,2761315.645,10,2024
17188,WY,2024-11,2789809.785,11,2024


In [None]:
# Merge datasets together using an outer join. The suffixes indicate the original dataset. We will have NAs in the monthly_emissions columns when there are no conflicts
monthly_emissions_1990_2024 = pd.merge(monthly_emissions_1990_1996, monthly_emissions_1996_2024, 
                                       how='outer',
                                       on=['year','state','month','date'],
                                       suffixes=('_1990_1996','_1996_2024'))

In [41]:
# Because the 1990-1996 data are estimated, and some of the states from 1996-2024 go back to 1995, we will use `combine first` to prioritize monthly emissions from the 1996-2024 dataset
monthly_emissions_1990_2024['monthly_emissions'] = monthly_emissions_1990_2024['monthly_emissions_1996_2024'].combine_first(
    monthly_emissions_1990_2024['monthly_emissions_1990_1996']
)

In [42]:
# Drop unnecessary columns
monthly_emissions_1990_2024 = monthly_emissions_1990_2024.drop(columns=['monthly_emissions_1990_1996', 'monthly_emissions_1996_2024'])

In [None]:
# Export file as csv
monthly_emissions_1990_2024.to_csv('monthly_emissions_1990_2024', index=False)

Unnamed: 0,year,state,month,date,monthly_emissions
0,1990,AL,1,1990-01,4.698354e+06
1,1990,AL,2,1990-02,4.077255e+06
2,1990,AL,3,1990-03,4.167678e+06
3,1990,AL,4,1990-04,3.828220e+06
4,1990,AL,5,1990-05,4.620473e+06
...,...,...,...,...,...
20575,2024,WY,8,2024-08,3.392821e+06
20576,2024,WY,9,2024-09,2.876132e+06
20577,2024,WY,10,2024-10,2.761316e+06
20578,2024,WY,11,2024-11,2.789810e+06
