In [1]:
#Load dependencies
import os
import pandas as pd

In [2]:
#Define data files to import
consumptionfile='../Raw Data Files/EIA Annual Electricity Fuel Consumption by State.xls'
generationfile='../Raw Data Files/EIA Annual Electricity Generation by State.xls'
coolheatfile='../Raw Data Files/EIA Cooling-Heating Degree Days by Region.csv'
emissionfile='../Raw Data Files/EIA Emission Annual Data by State.xls'
greenhousefile='../Raw Data Files/ghgp_data_by_year.xlsx'

In [3]:
#Read data files into Panda dataframes
tempconsumption=pd.ExcelFile(consumptionfile)
tempgeneration=pd.ExcelFile(generationfile)
coolheat=pd.read_csv(coolheatfile,header=4)
tempemission=pd.ExcelFile(emissionfile)
tempgreenhouse=pd.ExcelFile(greenhousefile)
consumption=tempconsumption.parse('Consumption_1990 - 2018 Final',skiprows=1,header=0)
generation=tempgeneration.parse('Net_Generation_1990-2018 Final',skiprows=1,header=0)
emission=tempemission.parse('State Emissions',header=0)
greenhouse=tempgreenhouse.parse('Direct Emitters',skiprows=3,header=0)

In [4]:
#Rename columns of dataframes for consistency and simplicity
consumption.columns=['year','state','producer type','energy source','consumption']
generation.columns=['year','state','producer type','energy source','generation (mwh)']
coolheat.columns=['year','U.S. cooling degree days','U.S. heating degree days','Pacific heating degree days',
                  'Mountain heating degree days','West South Central heating degree days',
                  'East South Central heating degree days','South Atlantic heating degree days',
                  'West North Central heating degree days','East North Central heating degree days',
                  'Middle Atlantic heating degree days','New England heating degree days',
                  'Pacific cooling degree days','Mountain cooling degree days','West South Central cooling degree days',
                  'East South Central cooling degree days','South Atlantic cooling degree days',
                  'West North Central cooling degree days','East North Central cooling degree days',
                  'Middle Atlantic cooling degree days','New England cooling degree days']
emission.columns=['year','state','producer type','energy source','CO2 (MT)','SO2 (MT)','NOx (MT)']
greenhouse.columns=['facility id','frs id','facility name','city','state','zip','address','county',
                    'latitude','longitude','naics code','industry type (subparts)','industry type (sectors)',
                    '2018 direct emissions','2017 direct emissions','2016 direct emissions','2015 direct emissions',
                    '2014 direct emissions','2013 direct emissions','2012 direct emissions','2011 direct emissions']

In [5]:
#Change values in energy source column for consistency
consumption=consumption.replace({'energy source':{'Natural Gas (Mcf)':'Natural Gas','Coal (Short Tons)':'Coal',
                                                      'Petroleum (Barrels)':'Petroleum','Other Gases (Billion BTU)':'Other Gases'}})
emission=emission.replace({'energy source':{'All Sources':'Total'}})         

In [6]:
#Select data for the total power industry within states and exclude Guam, Puetro Rico, and Virgin Islands
stateconsumption=consumption.loc[consumption['producer type']=='Total Electric Power Industry']
stategeneration=generation.loc[generation['producer type']=='Total Electric Power Industry']
stateemission=emission.loc[emission['producer type']=='Total Electric Power Industry']
temppowergreenhouse=greenhouse.loc[greenhouse['industry type (sectors)']=='Power Plants']
powergreenhouse=temppowergreenhouse[~temppowergreenhouse.state.isin(['GU','PR',"VI"])]

In [7]:
#Create state greenhouse emissions dataframe
tempgreenhouse=powergreenhouse.groupby(['state'])['2018 direct emissions','2017 direct emissions','2016 direct emissions',
                                                   '2015 direct emissions','2014 direct emissions','2013 direct emissions',
                                                   '2012 direct emissions','2011 direct emissions'].sum()
tempgreenhouse.columns=['2018','2017','2016','2015','2014','2013','2012','2011']
seriesgreenhouse=tempgreenhouse.stack()
stategreenhouse=pd.DataFrame(seriesgreenhouse)
stategreenhouse.reset_index(inplace=True)
stategreenhouse.columns=['state','year','greenhouse emissions']
stategreenhouse['year']=pd.to_numeric(stategreenhouse['year'])

In [8]:
#Merge state datasets together to create state data
tempstatedata=pd.merge(stateconsumption,stategeneration,on=['year','state','producer type','energy source'],how='outer')
statedata=pd.merge(stateemission,tempstatedata,on=['year','state','producer type','energy source'],how='outer')

In [20]:
#Create greenhouse files
facility=powergreenhouse.drop(['city','zip','address','county','naics code','industry type (subparts)','industry type (sectors)',
                    '2018 direct emissions','2017 direct emissions','2016 direct emissions','2015 direct emissions',
                    '2014 direct emissions','2013 direct emissions','2012 direct emissions','2011 direct emissions'],axis=1)
tempfacilityemission=powergreenhouse.drop(['frs id','facility name','city','state','zip','address','county',
                    'latitude','longitude','naics code','industry type (subparts)','industry type (sectors)'],axis=1)
tempfacilityemission.columns=['facility id','2018','2017','2016','2015','2014','2013','2012','2011']
seriesfacilityemission=tempfacilityemission.stack()
facilityemission=pd.DataFrame(seriesfacilityemission)
facilityemission.reset_index(inplace=True)
#facilityemission.columns=['state','year','greenhouse emissions']
#facilityemission['year']=pd.to_numeric(facilityemission['year'])

In [24]:
tempfacilityemission

Unnamed: 0,facility id,2018,2017,2016,2015,2014,2013,2012,2011
0,1990,AK,Total Electric Power Industry,Coal (Short Tons),404871,,,,
1,1990,AK,Total Electric Power Industry,Petroleum (Barrels),961837,,,,
2,1990,AK,Total Electric Power Industry,Natural Gas (Mcf),42764948,,,,
3,1990,AK,"Electric Generators, Electric Utilities",Coal (Short Tons),290182,,,,
4,1990,AK,"Electric Generators, Electric Utilities",Petroleum (Barrels),657706,,,,
5,1990,AK,"Electric Generators, Electric Utilities",Natural Gas (Mcf),34366142,,,,
6,1990,AK,"Combined Heat and Power, Commercial Power",Coal (Short Tons),114689,,,,
7,1990,AK,"Combined Heat and Power, Commercial Power",Petroleum (Barrels),112409,,,,
8,1990,AK,"Combined Heat and Power, Industrial Power",Petroleum (Barrels),191722,,,,
9,1990,AK,"Combined Heat and Power, Industrial Power",Natural Gas (Mcf),8398806,,,,


In [25]:
emission.head(10)

Unnamed: 0_level_0,2018,2017,2016,2015,2014,2013,2012,2011,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1990.0,AK,2.986768e+06,3.084472e+06,3028204.0,3331016.0,3244966.0,3411938.0,3818413.0,3815045.0,,,,,,,,,,,
1,1990.0,AK,Total Electric Power Industry,Coal,510573.0,,,,,,,,,,,,,,,,
2,1990.0,AK,Total Electric Power Industry,Hydroelectric Conventional,974521.0,,,,,,,,,,,,,,,,
3,1990.0,AK,Total Electric Power Industry,Natural Gas,3466261.0,,,,,,,,,,,,,,,,
4,1990.0,AK,Total Electric Power Industry,Petroleum,497116.0,,,,,,,,,,,,,,,,
5,1990.0,AK,Total Electric Power Industry,Wind,0.0,,,,,,,,,,,,,,,,
6,1990.0,AK,Total Electric Power Industry,Wood and Wood Derived Fuels,151035.0,,,,,,,,,,,,,,,,
7,1990.0,AK,"Electric Generators, Electric Utilities",Total,4493024.0,,,,,,,,,,,,,,,,
8,1990.0,AK,"Electric Generators, Electric Utilities",Coal,311960.0,,,,,,,,,,,,,,,,
9,1990.0,AK,"Electric Generators, Electric Utilities",Hydroelectric Conventional,974521.0,,,,,,,,,,,,,,,,


In [8]:
tempgreenhouse

Unnamed: 0_level_0,facility id,frs id,facility name,city,state,zip,address,county,latitude,longitude,...,industry type (subparts),industry type (sectors),2018 direct emissions,2017 direct emissions,2016 direct emissions,2015 direct emissions,2014 direct emissions,2013 direct emissions,2012 direct emissions,2011 direct emissions
0,1000001,1.100005e+11,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.828707,-122.685533,...,D,Power Plants,302529.480,350890.100,354145.488,405725.952,333193.564,395314.784,14719.108,35878.164
1,1000002,1.100412e+11,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.371053,-85.198134,...,"C,N",Minerals,110511.712,115937.540,114530.004,114583.804,114959.652,103822.920,111294.548,109863.600
2,1000003,1.100015e+11,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.291066,-78.391883,...,"C,N",Minerals,79393.210,80219.128,74813.296,80976.036,81003.988,80535.232,74324.196,77199.184
3,1000004,1.100008e+11,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.157643,-89.353796,...,"C,N",Minerals,55547.748,57894.492,54780.848,59954.736,57837.096,59600.496,60085.996,52494.612
4,1000005,1.100005e+11,Ardagh Glass Inc. (Madera),MADERA,CA,93637,24441 AVENUE 12 & ROAD 24 1/2,MADERA COUNTY,36.923750,-120.104120,...,"C,N",Minerals,83863.020,82451.324,79708.576,81133.624,74754.144,78249.768,82149.608,75969.472
5,1000006,1.100008e+11,Ardagh Glass Inc. (Milford),MILFORD,MA,1757,1 NATIONAL STREET,WORCESTER,42.127778,-71.510556,...,"C,N",Minerals,9745.016,37320.220,33116.336,41248.504,42313.908,42648.156,45293.706,43434.408
6,1000007,1.100070e+11,AMERICAN GYPSUM - BERNALILLO (WALLBOARD) PLANT,BERNALILLO,NM,87004,1000 N HILL RD,SANDOVAL,35.329352,-106.525777,...,C,Minerals,10859.360,4145.684,0.000,0.000,0.000,0.000,0.000,0.000
7,1000010,1.100139e+11,SANTA ROSA CENTRAL LANDFILL,MILTON,FL,32583,6337 DELISA RD.,SANTA ROSA COUNTY,30.580400,-87.062100,...,HH,Waste,125328.250,107562.000,106339.750,101770.000,107609.500,109206.000,102640.000,98143.350
8,1000015,1.100005e+11,"TYSON FARMS, INC.",WILKESBORO,NC,28697,704 FACTORY STREET,WILKES COUNTY,36.143889,-81.163333,...,C,Other,26683.896,27070.946,25592.004,28135.430,28909.070,29057.040,25292.904,25076.254
9,1000016,1.100015e+11,TYSON FARMS INCORPORATED HARMONY,HARMONY,NC,28634,501 SHEFFIELD ROAD,IREDELL COUNTY,35.954580,-80.721080,...,C,Other,36535.312,35015.368,35333.116,32337.728,29817.888,29325.540,49787.358,50407.950
