In [None]:
# here, we put energy_totals_by_source.csv into proper format and output energy-by-source-final.csv
# data obtained from https://www.eia.gov/electricity/data.php#generation

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('../SharedData/dataset-generation/energy_totals_by_source.csv')

d = {'state':[], 'year':[], 'month':[], 'date':[],'monthly_energy_renew':[], 'monthly_energy_fossil':[], 'monthly_energy_coal':[], 'monthly_energy_total':[],'yearly_energy_renew':[], 'yearly_energy_fossil':[], 'yearly_energy_coal':[], 'yearly_energy_total':[]}
new_df = pd.DataFrame(data=d)

FileNotFoundError: [Errno 2] No such file or directory: './Data/energy_totals_by_source.csv'

In [None]:
abbreviations = [
    "AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA",
    "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
    "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
    "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
    "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"
]

In [None]:
# drop unnecessary columns and rename remaining columns

df = df.drop(columns=['Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7',
       'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10'])
df = df.rename(columns={'YEAR':'year','STATE':'state','TYPE OF PRODUCER':'type','ENERGY SOURCE':'source', 'GENERATION (Megawatthours)':'mwhrs'})

# get rid of commas

df['mwhrs'] = df['mwhrs'].str.replace(',', '').astype(int)


In [None]:
renewable_source = ['Wind', 'Hydroelectric Conventional', 'Solar Thermal and Photovoltaic', 'Geothermal']
fossil_source = ['Coal', 'Natural Gas', 'Petroleum']

In [None]:
month_counter = 1   # acts like modulo 12 arithmetic
idx = 0             # keeps track of row index

for state in abbreviations:
    for year in range(1990,2024):
        fossil = df.loc[(df.state==state)&(df.year==year)&(df.type=='Total Electric Power Industry')&(df.source.isin(fossil_source))]['mwhrs'].sum().astype(int)
        coal = df.loc[(df.state==state)&(df.year==year)&(df.type=='Total Electric Power Industry')&(df.source.isin(['Coal']))]['mwhrs'].sum().astype(int)
        renew = df.loc[(df.state==state)&(df.year==year)&(df.type=='Total Electric Power Industry')&(df.source.isin(renewable_source))]['mwhrs'].sum().astype(int)
        total = df.loc[(df.state==state)&(df.year==year)&(df.type=='Total Electric Power Industry')&(df.source=='Total')]['mwhrs'].sum().astype(int)
        for month in range(12):
            if month_counter < 12: 
                new_df.loc[idx*12+month]=[state,year,month_counter,str(year)+'-'+'{:02d}'.format(month_counter), np.nan,np.nan,np.nan,np.nan,renew,fossil,coal,total]
                month_counter += 1
            elif month_counter == 12:
                new_df.loc[idx*12+month]=[state,year,month_counter,str(year)+'-'+'{:02d}'.format(month_counter),renew/12,fossil/12,coal/12,total/12,renew,fossil,coal,total]
                month_counter = 1
        idx += 1

In [None]:
# linear interpolation to fill in months based on end-of-year data

cols = ['monthly_energy_renew','monthly_energy_fossil','monthly_energy_coal','monthly_energy_total']
for state in abbreviations:
    new_df.loc[new_df.state == state, cols] = new_df.loc[new_df.state == state,cols].interpolate(method='linear')

In [None]:
# create columns to reflect renewable/total and fossil/total percentages

new_df['monthly_renew_pct'] = new_df['monthly_energy_renew']/new_df['monthly_energy_total']
new_df['monthly_fossil_pct'] = new_df['monthly_energy_fossil']/new_df['monthly_energy_total']
new_df['monthly_coal_pct'] = new_df['monthly_energy_coal']/new_df['monthly_energy_total']
new_df['yearly_renew_pct'] = new_df['yearly_energy_renew']/new_df['yearly_energy_total']
new_df['yearly_fossil_pct'] = new_df['yearly_energy_fossil']/new_df['yearly_energy_total']
new_df['yearly_coal_pct'] = new_df['yearly_energy_coal']/new_df['yearly_energy_total']

In [None]:
# export new_df to a .csv

new_df.to_csv('../../SharedData/dataset-generation-final/energy-by-source-final.csv',index=False)