In [93]:
# Import modules
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [94]:
# Read CSV
census_csv = "resources/Energy Census and Economic Data US 2010-2014.csv"
df = pd.read_csv(census_csv)
df.head(3)

Unnamed: 0,StateCodes,State,Region,Division,Coast,Great Lakes,TotalC2010,TotalC2011,TotalC2012,TotalC2013,...,RINTERNATIONALMIG2013,RINTERNATIONALMIG2014,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014
0,AL,Alabama,3.0,6.0,1.0,0.0,1931522,1905207,1879716,1919365,...,1.165832,1.157861,-0.020443,-0.168414,0.396416,0.420102,1.011941,1.001333,1.562247,1.577963
1,AK,Alaska,4.0,9.0,1.0,0.0,653221,653637,649341,621107,...,3.203618,2.86976,-1.175137,-1.949571,-3.789313,-13.754494,0.948185,1.835376,-0.585695,-10.884734
2,AZ,Arizona,4.0,8.0,0.0,0.0,1383531,1424944,1395839,1414383,...,1.090035,1.091283,1.341472,-0.420875,-0.580562,-1.31305,2.317801,0.621971,0.509473,-0.221767


In [95]:
# Filter for Eastern coastal states
# Create mask for desired states
df_filter = df['StateCodes'].apply(lambda state: state in ['MN','NH','VT','MA','RI','CT','NY','NJ','PA','MD','OH','VA','WV','NC','SC','GA','FL'])

# Apply filter to df
df = df[df_filter]
df = df.drop('State', axis=1)
df.head(3)

Unnamed: 0,StateCodes,Region,Division,Coast,Great Lakes,TotalC2010,TotalC2011,TotalC2012,TotalC2013,TotalC2014,...,RINTERNATIONALMIG2013,RINTERNATIONALMIG2014,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014
6,CT,1.0,1.0,1.0,0.0,764970,739130,725019,754901,750019,...,4.753602,4.73095,-3.384435,-5.611492,-4.731638,-7.286252,1.116894,-1.059166,0.021964,-2.555302
8,FL,3.0,5.0,1.0,0.0,4282673,4141711,4029903,4076406,4121680,...,5.783717,5.6873,5.540393,5.12532,4.918783,7.016123,11.359606,10.722573,10.702501,12.703423
9,GA,3.0,5.0,1.0,0.0,3100144,2982837,2767491,2782782,2850990,...,2.510526,2.470423,1.105816,1.852201,-0.576888,2.200467,3.375007,4.426268,1.933638,4.67089


In [96]:
# Rename 'StateCodes' to 'State'
df = df.rename(columns={'StateCodes':'State'})
df.head(3)

Unnamed: 0,State,Region,Division,Coast,Great Lakes,TotalC2010,TotalC2011,TotalC2012,TotalC2013,TotalC2014,...,RINTERNATIONALMIG2013,RINTERNATIONALMIG2014,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014
6,CT,1.0,1.0,1.0,0.0,764970,739130,725019,754901,750019,...,4.753602,4.73095,-3.384435,-5.611492,-4.731638,-7.286252,1.116894,-1.059166,0.021964,-2.555302
8,FL,3.0,5.0,1.0,0.0,4282673,4141711,4029903,4076406,4121680,...,5.783717,5.6873,5.540393,5.12532,4.918783,7.016123,11.359606,10.722573,10.702501,12.703423
9,GA,3.0,5.0,1.0,0.0,3100144,2982837,2767491,2782782,2850990,...,2.510526,2.470423,1.105816,1.852201,-0.576888,2.200467,3.375007,4.426268,1.933638,4.67089


In [108]:
# Total Production dataframe
totalp_df = df.loc[:, ['State', 'TotalP2010', 'TotalP2011', 'TotalP2012', 'TotalP2013', 'TotalP2014']]
totalp_df.head(3)

Unnamed: 0,State,TotalP2010,TotalP2011,TotalP2012,TotalP2013,TotalP2014
6,CT,203188,195792,205073,207118,197271
8,FL,510201,500907,442188,542570,553738
9,GA,561796,549483,555238,581082,597955


In [109]:
# Export Total Production CSV
totalp_df.to_csv(r'exports/total_production.csv')

In [105]:
# Average total production per state
totalp_col = df.loc[:, 'TotalP2010':'TotalP2014']
totalp_df['Average Total Production'] = totalp_col.mean(axis=1)
totalp_df.head(3)

Unnamed: 0,State,TotalP2010,TotalP2011,TotalP2012,TotalP2013,TotalP2014,Average Total Production
6,CT,203188,195792,205073,207118,197271,201688.4
8,FL,510201,500907,442188,542570,553738,509920.8
9,GA,561796,549483,555238,581082,597955,569110.8


In [101]:
# Total production of all states by year
totalp_year_col = totalp_df.loc[:, 'TotalP2010':'TotalP2014'].sum()
print(totalp_year_col)

# Average total production of all states
avg_totalp_year = totalp_year_col.mean()

# Sort by 'Average Total Production'
totalp_df.sort_values(by=['Average Total Production'], inplace=True, ascending=False)

totalp_df.head()

TotalP2010    13637873
TotalP2011    14612630
TotalP2012    15169297
TotalP2013    16770134
TotalP2014    18741350
dtype: int64


Unnamed: 0,State,TotalP2010,TotalP2011,TotalP2012,TotalP2013,TotalP2014,Average Total Production
37,PA,3064745,3870671,4730336,5873742,7087392,4925377.2
47,WV,3699729,3820203,3720223,3809750,4154063,3840793.6
34,OH,1032440,1067943,1067944,1149882,1547368,1173115.4
45,VA,1096973,1085234,1045095,1023009,979780,1046018.2
31,NY,836703,879395,817908,872500,871720,855645.2


In [102]:
# Plot 'Average Total Production' by 'State'
fig = go.Figure(data=[go.Bar(x=totalp_df['State'], y=totalp_df['Average Total Production'])])

# Update chart properties
fig.update_layout(title='<b>Average Total Energy Production by State</b>', width=800, height=500, showlegend=False)
fig.update_yaxes(title='Billion BTU')
fig.show()