In [106]:
import pandas as pd
import numpy as np

In [107]:
data_dir = '../data/'
elec_dir = data_dir + 'electricity/'

# Import Data

## Data

In [108]:
# Instruments
data_instruments = pd.read_csv(data_dir + 'processed/' + 'instruments_data.csv')

# Fuel generation and consumption
data_fuel_gen_con = pd.read_csv(data_dir + 'processed/' + 'fuel_data.csv')

# Electricity Prices
elc_pr_file_loc = elec_dir + 'avg_elec_price.csv'
data_elc_pr = pd.read_csv(elc_pr_file_loc, na_values = ['NA'])

## Keys

In [109]:
# MSN Codes Key
msn_codes_file_loc = data_dir + 'keys/MSN_codes.csv'
msn_codes_key = pd.read_csv(msn_codes_file_loc)

# State FIPS Codes
state_fips_file_loc = data_dir + 'keys/state_FIPS.csv'
state_fips_key = pd.read_csv(state_fips_file_loc)
state_fips_indicators = ['State Abbreviation', 'State Name']

# Month Key
month_key = dict(zip(['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'], range(1,13)))

# Clean Data

In [110]:
# Melt month columns
data_elc_pr = data_elc_pr.melt(id_vars = ['State Name', 'Sector', 'units'], value_vars = data_elc_pr.columns[4:16], var_name='month', value_name='elc_price')
data_elc_pr['month'] = data_elc_pr['month'].apply(lambda x: month_key[x])

# Convert from cents per kwh to dollars per thousand kwh
data_elc_pr['elc_price'] = pd.to_numeric(data_elc_pr['elc_price'])*1000/100

# Pivot sector column
data_elc_pr['Sector'] = data_elc_pr['Sector'].str.strip()
data_elc_pr = data_elc_pr.groupby(['State Name', 'Sector', 'month']).sum().unstack('Sector').reset_index()

# Fix column names
data_elc_pr.columns = ['State Name', 'month', 'elc_price_all', 'elc_price_com', 'elc_price_ind', 'elc_price_oth', 'elc_price_res', 'elc_price_trn']

# Fill 0's with NaNs
data_elc_pr = data_elc_pr.replace(to_replace = 0, value = np.nan)

In [111]:
data_elc_pr.head()

Unnamed: 0,State Name,month,elc_price_all,elc_price_com,elc_price_ind,elc_price_oth,elc_price_res,elc_price_trn
0,Alabama,1,89.4,107.9,53.6,,110.0,
1,Alabama,2,90.1,108.8,52.8,,114.5,
2,Alabama,3,91.0,108.4,55.5,,120.7,
3,Alabama,4,92.6,110.5,58.7,,123.7,
4,Alabama,5,93.2,110.2,60.0,,120.5,


# Merge Data

In [112]:
data_elc_pr['State Name'] = data_elc_pr['State Name'].str.upper().str.strip()
data_elc_pr = data_elc_pr.merge(state_fips_key).drop(state_fips_indicators, axis = 1)

In [113]:
data_merged = data_instruments.merge(data_elc_pr, how = 'outer', on = ['month', 'State FIPS']).merge(data_fuel_gen_con, how = 'outer', on = ['month', 'State FIPS'])
data_merged = data_merged.drop(['NCDC Code_x', 'NCDC Code_y'], axis = 1)

# Export Data

In [114]:
data_merged.to_csv(data_dir + 'processed/' + 'merged_data.csv', index = False)