In [233]:
import numpy as np
import pandas as pd

In [234]:
data_dir  = '../data/'

## Import Data

In [235]:
# Solar data
solar_data_file_loc = data_dir + 'processed/solar_data.csv'
solar_data = pd.read_csv(solar_data_file_loc)

# Wind data
wind_data_file_loc = data_dir + 'processed/wind_data.csv'
wind_data = pd.read_csv(wind_data_file_loc)

# Population data
pop_data_file_loc = data_dir + 'state_factors/state_population.csv'
pop_data = pd.read_csv(pop_data_file_loc)

# State Area data
area_data_file_loc = data_dir + 'state_factors/state_area.csv'
area_data = pd.read_csv(area_data_file_loc)

In [236]:
# State FIPS Codes
state_fips_file_loc = data_dir + 'keys/state_FIPS.csv'
state_fips_key = pd.read_csv(state_fips_file_loc)
state_fips_indicators = ['State Abbreviation', 'State Name']

## Clean Data

In [237]:
# Pop data FIPS codes
pop_data = pop_data.rename(columns = {'State': 'State Name'})
pop_data['State Name'] = pop_data['State Name'].apply(lambda x: x[1:]).str.upper()
pop_data = pop_data.merge(state_fips_key).drop(state_fips_indicators, axis = 1)

In [238]:
# Preprend solar consumption and generation columns with solar prefix
solar_data = solar_data.rename(columns = {x: 'Solar_' + x for x in solar_data.columns if 'Cons' in x or 'Net_G' in x})

# Preprend solar consumption and generation columns with solar prefix
wind_data = wind_data.rename(columns = {x: 'Wind_' + x for x in wind_data.columns if 'Cons' in x or 'Net_G' in x})

# Drop unnecessary columns
solar_data = solar_data.drop(['Year'], axis = 1)

In [239]:
# Convert areas from km2 to m2
for col in area_data.columns:
    if 'Area' in col:
        area_data[col] = area_data[col].apply(lambda x: float(x)*(1000**2))

# Area data FIPS codes
area_data['State Name'] = area_data['State Name'].str.upper()
area_data = area_data.merge(state_fips_key).drop(state_fips_indicators, axis = 1)

## Merge Data

In [240]:
renewable_data = pop_data.merge(solar_data).merge(wind_data).merge(area_data)
renewable_data.head()

Unnamed: 0,Population,State FIPS,Annual_Avg_Rad,January_Avg_Rad,February_Avg_Rad,March_Avg_Rad,April_Avg_Rad,May_Avg_Rad,June_Avg_Rad,July_Avg_Rad,...,Wind_Net_Generation_elc,Wind_Net_Generation_tot,Annual_Avg_Wind_Speed,Total_Area,Land_Area,Water_Area,Water_Inland_Area,Water_Coastal_Area,Water_Great_Lakes_Area,Water_Territorial_Area
0,4860545,1,0.001621,0.001413,0.001573,0.001741,0.001847,0.001821,0.001756,0.00165,...,0.0,0.0,3.585898,135767000000.0,131171000000.0,4597000000.0,2740000000.0,1340000000.0,0.0,516000000.0
1,6908642,4,0.002792,0.002383,0.002431,0.002832,0.003092,0.00338,0.003555,0.002774,...,542.0,542.0,3.323894,295234000000.0,294207000000.0,1026000000.0,1026000000.0,0.0,0.0,0.0
2,2988231,5,0.001632,0.001277,0.001358,0.001522,0.001789,0.001686,0.001916,0.002022,...,0.0,0.0,3.905829,137732000000.0,134771000000.0,2961000000.0,2961000000.0,0.0,0.0,0.0
3,39296476,6,0.002427,0.001584,0.001672,0.00223,0.002467,0.002909,0.003263,0.003259,...,13498.0,13509.0,3.12396,423967000000.0,403466000000.0,20501000000.0,7339000000.0,634000000.0,0.0,12528000000.0
4,5530105,8,0.002281,0.00173,0.001803,0.002128,0.002365,0.002606,0.002997,0.002796,...,9417.0,9421.0,3.30188,269601000000.0,268431000000.0,1170000000.0,1170000000.0,0.0,0.0,0.0


## Export Data

In [241]:
renewable_data.to_csv(data_dir + 'processed/renewable_data.csv', index = False)

## References

 * Annual Estimates of the Resident Population for the United States, Regions, States, and Puerto Rico: April 1, 2010 to July 1, 2017 (NST-EST2017-01)  https://www.census.gov/data/tables/2017/demo/popest/state-total.html
 * State Area Measurements and Internal Point Coordinates. https://www.census.gov/geo/reference/state-area.html