In [1]:
# Import modules
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
# Read CSV
census_csv = "resources/Energy Census and Economic Data US 2010-2014.csv"
df = pd.read_csv(census_csv)
df.head()

Unnamed: 0,StateCodes,State,Region,Division,Coast,Great Lakes,TotalC2010,TotalC2011,TotalC2012,TotalC2013,...,RINTERNATIONALMIG2013,RINTERNATIONALMIG2014,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014
0,AL,Alabama,3.0,6.0,1.0,0.0,1931522,1905207,1879716,1919365,...,1.165832,1.157861,-0.020443,-0.168414,0.396416,0.420102,1.011941,1.001333,1.562247,1.577963
1,AK,Alaska,4.0,9.0,1.0,0.0,653221,653637,649341,621107,...,3.203618,2.86976,-1.175137,-1.949571,-3.789313,-13.754494,0.948185,1.835376,-0.585695,-10.884734
2,AZ,Arizona,4.0,8.0,0.0,0.0,1383531,1424944,1395839,1414383,...,1.090035,1.091283,1.341472,-0.420875,-0.580562,-1.31305,2.317801,0.621971,0.509473,-0.221767
3,AR,Arkansas,3.0,7.0,0.0,0.0,1120632,1122544,1067642,1096438,...,2.141877,2.129805,1.369514,5.131282,3.910476,6.280636,3.336628,7.155212,6.052353,8.410441
4,CA,California,4.0,9.0,1.0,0.0,7760629,7777115,7564063,7665241,...,4.207353,4.177389,-1.162079,-1.173951,-1.341226,-0.830982,2.761377,2.77277,2.866127,3.346406


In [3]:
# Filter for Eastern coastal states
# Create mask for desired states
df_filter = df['StateCodes'].apply(lambda state: state in ['MN','NH','VT','MA','RI','CT','NY','NJ','PA','MD','OH','VA','WV','NC','SC','GA','FL', 'TX', 'LA', 'MI', 'AL'])

# Apply filter to df
df = df[df_filter]
df = df.drop('State', axis=1)
df.head(3)

Unnamed: 0,StateCodes,Region,Division,Coast,Great Lakes,TotalC2010,TotalC2011,TotalC2012,TotalC2013,TotalC2014,...,RINTERNATIONALMIG2013,RINTERNATIONALMIG2014,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014
0,AL,3.0,6.0,1.0,0.0,1931522,1905207,1879716,1919365,1958221,...,1.165832,1.157861,-0.020443,-0.168414,0.396416,0.420102,1.011941,1.001333,1.562247,1.577963
6,CT,1.0,1.0,1.0,0.0,764970,739130,725019,754901,750019,...,4.753602,4.73095,-3.384435,-5.611492,-4.731638,-7.286252,1.116894,-1.059166,0.021964,-2.555302
8,FL,3.0,5.0,1.0,0.0,4282673,4141711,4029903,4076406,4121680,...,5.783717,5.6873,5.540393,5.12532,4.918783,7.016123,11.359606,10.722573,10.702501,12.703423


In [4]:
# Rename 'StateCodes' to 'State'
df = df.rename(columns={'StateCodes':'State'})
df.head(3)

Unnamed: 0,State,Region,Division,Coast,Great Lakes,TotalC2010,TotalC2011,TotalC2012,TotalC2013,TotalC2014,...,RINTERNATIONALMIG2013,RINTERNATIONALMIG2014,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014
0,AL,3.0,6.0,1.0,0.0,1931522,1905207,1879716,1919365,1958221,...,1.165832,1.157861,-0.020443,-0.168414,0.396416,0.420102,1.011941,1.001333,1.562247,1.577963
6,CT,1.0,1.0,1.0,0.0,764970,739130,725019,754901,750019,...,4.753602,4.73095,-3.384435,-5.611492,-4.731638,-7.286252,1.116894,-1.059166,0.021964,-2.555302
8,FL,3.0,5.0,1.0,0.0,4282673,4141711,4029903,4076406,4121680,...,5.783717,5.6873,5.540393,5.12532,4.918783,7.016123,11.359606,10.722573,10.702501,12.703423


In [22]:
# Total Production dataframe
total_df = df.loc[:, ['State', 'TotalP2010', 'TotalP2011', 'TotalP2012', 'TotalP2013', 'TotalP2014',
                       'CoalP2010', 'CoalP2011', 'CoalP2012', 'CoalP2013', 'CoalP2014',
                       'HydroP2010', 'HydroP2011', 'HydroP2012', 'HydroP2013', 'HydroP2014']]
total_df.head(3)

Unnamed: 0,State,TotalP2010,TotalP2011,TotalP2012,TotalP2013,TotalP2014,CoalP2010,CoalP2011,CoalP2012,CoalP2013,CoalP2014,HydroP2010,HydroP2011,HydroP2012,HydroP2013,HydroP2014
0,AL,1419613,1400108,1433370,1463647,1353725,493094,468671,488084,469162,414366,8704,8884,7435,12899,9467
6,CT,203188,195792,205073,207118,197271,0,0,0,0,0,391,567,312,402,434
8,FL,510201,500907,442188,542570,553738,0,0,0,0,0,177,182,151,254,211


In [9]:
# Export Total Production CSV
total_df.to_csv(r'exports/total_coal_hydro.csv')

In [101]:
# Average total production per state
totalp_col = total_df.loc[:, 'TotalP2010':'TotalP2014']
total_df['Average Total Production'] = totalp_col.mean(axis=1)

# Total production of all states by year
totalp_year_col = total_df.loc[:, 'TotalP2010':'TotalP2014'].sum()

# Average total production of all states
avg_totalp_year = totalp_year_col.mean()

# Sort by 'Average Total Production'
total_df.sort_values(by=['Average Total Production'], inplace=True, ascending=False)

In [93]:
# Average total coal production per state
totalc_col = total_df.loc[:, 'CoalP2010':'CoalP2014']
total_df['Average Total Coal Production'] = totalc_col.mean(axis=1)

# Total production of all states by year
totalc_year_col = total_df.loc[:, 'CoalP2010':'CoalP2014'].sum()

# Average total production of all states
avg_totalc_year = totalc_year_col.mean()

# Sort by 'Average Total Production'
total_coal = total_df.sort_values(by=['Average Total Coal Production'], inplace=True, ascending=False)

CoalP2010    7185661
CoalP2011    7266857
CoalP2012    6759191
CoalP2013    6440332
CoalP2014    6432430
dtype: int64


In [98]:
# Average total hydro production per state
totalh_col = total_df.loc[:, 'HydroP2010':'HydroP2014']
total_df['Average Total Hydro Production'] = totalh_col.mean(axis=1)

# Total production of all states by year
totalh_year_col = total_df.loc[:, 'HydroP2010':'HydroP2014'].sum()

# Average total production of all states
avg_totalh_year = totalh_year_col.mean()

# Sort by 'Average Total Production'
total_hydro = total_df.sort_values(by=['Average Total Hydro Production'], inplace=True, ascending=False)

In [104]:
# Create plot
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"colspan":2}, None],
          [{},{}]],
    subplot_titles=("Average Total Energy Production","Average Total Coal Production", "Average Total Hydro Production"))

# Add traces
fig.add_trace(go.Bar(x=total_df['State'], y=total_df['Average Total Production'], marker_color='blue'),
                 row=1, col=1)
fig.add_trace(go.Bar(x=total_df['State'], y=total_df['Average Total Coal Production'], marker_color='lightsalmon'),
                 row=2, col=1)
fig.add_trace(go.Bar(x=total_df['State'], y=total_df['Average Total Hydro Production'], marker_color='lightblue'),
                 row=2, col=2)

# Update yaxes
fig.update_yaxes(title_text='Billion BTU', row=1, col=1)
fig.update_yaxes(title_text='Billion BTU', row=2, col=1)

# Update title 
fig.update_layout(title='<b>Total Energy Production by State</b>', width=900, height=700, showlegend=False)
fig.show()

fig.write_image("exports/total_coal_hydro.png")

Exception: In order to reference traces by row and column, you must first use plotly.tools.make_subplots to create the figure with a subplot grid.

In [99]:
# Coal Production
fig_coal = go.Figure()
fig_coal.add_trace(go.Bar(
    x=total_df['State'],
    y=total_df['Average Total Coal Production'],
    name='Coal Production',
    marker_color='lightsalmon'))

# Update yaxes
fig_coal.update_yaxes(title_text='Billion BTU')

# Update title 
fig_coal.update_layout(title='<b>Average Total Coal Production by State</b>', xaxis_tickangle=-45)
fig_coal.show()

In [100]:
# Hydro Production
fig_hydro = go.Figure()
fig_hydro.add_trace(go.Bar(
    x=total_df['State'],
    y=total_df['Average Total Hydro Production'],
    name='Hydro Production',
    marker_color='lightblue'))

# Update yaxes
fig_hydro.update_yaxes(title_text='Billion BTU')

# Update title 
fig_hydro.update_layout(title='<b>Average Total Hydro Production by State</b>', xaxis_tickangle=-45)
fig_hydro.show()