In [1]:
## load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [2]:
## load data
pollution_data = pd.read_csv("owid-co2-data.csv")
pollution_data.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
0,AFG,Afghanistan,1949,0.015,,,,,,0.0,...,,,,,,,,,7663783.0,
1,AFG,Afghanistan,1950,0.084,475.0,0.07,,,,0.0,...,,,,,,,,,7752000.0,19494800000.0
2,AFG,Afghanistan,1951,0.092,8.696,0.007,,,,0.0,...,,,,,,,,,7840000.0,20063850000.0
3,AFG,Afghanistan,1952,0.092,0.0,0.0,,,,0.0,...,,,,,,,,,7936000.0,20742350000.0
4,AFG,Afghanistan,1953,0.106,16.0,0.015,,,,0.0,...,,,,,,,,,8040000.0,22015460000.0


In [3]:
## only keep data from English-speaking Western countries
pollution_data = pollution_data[(pollution_data['country'] == 'United States') | (pollution_data['country'] == 'Canada') | (pollution_data['country'] == 'United Kingdom') | (pollution_data['country'] == 'Australia')]
pollution_data.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
1593,AUS,Australia,1851,-0.062,,,,,,-0.0,...,,,,,,,,,786732.0,1955064000.0
1594,AUS,Australia,1852,-0.055,-11.765,0.007,,,,-0.0,...,,,,,,,,,831228.0,2559550000.0
1595,AUS,Australia,1853,-0.11,100.0,-0.055,,,,-0.0,...,,,,,,,,,877298.0,3137865000.0
1596,AUS,Australia,1854,-0.128,16.667,-0.018,,,,-0.0,...,,,,,,,,,926037.0,3066359000.0
1597,AUS,Australia,1855,-0.132,2.857,-0.004,,,,-0.0,...,,,,,,,,,977302.0,3224240000.0


In [4]:
## remove columns that contain mostly NaN values
pollution_data = pollution_data[["iso_code", "country", "year", "co2", "population", "gdp"]]
pollution_data.head()

Unnamed: 0,iso_code,country,year,co2,population,gdp
1593,AUS,Australia,1851,-0.062,786732.0,1955064000.0
1594,AUS,Australia,1852,-0.055,831228.0,2559550000.0
1595,AUS,Australia,1853,-0.11,877298.0,3137865000.0
1596,AUS,Australia,1854,-0.128,926037.0,3066359000.0
1597,AUS,Australia,1855,-0.132,977302.0,3224240000.0


In [5]:
## load data
ghg_data = pd.read_csv("ghgp_data_by_year.csv")
ghg_data.head()

Unnamed: 0,Summary data collected by the Greenhouse Gas Reporting Program,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
0,This data was reported to EPA by facilities as...,,,,,,,,,,...,,,,,,,,,,
1,All emissions data is presented in units of me...,,,,,,,,,,...,,,,,,,,,,
2,Facility Id,FRS Id,Facility Name,City,State,Zip Code,Address,County,Latitude,Longitude,...,Latest Reported Industry Type (subparts),Latest Reported Industry Type (sectors),2018 Total reported direct emissions,2017 Total reported direct emissions,2016 Total reported direct emissions,2015 Total reported direct emissions,2014 Total reported direct emissions,2013 Total reported direct emissions,2012 Total reported direct emissions,2011 Total reported direct emissions
3,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.10,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
4,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.20,...,"C,N",Minerals,110511.71,115937.54,114530.00,114583.80,114959.65,103822.92,111294.55,109863.60


In [6]:
## Get rid of first two rows because they are just an explanation
ghg_data = ghg_data.dropna()
ghg_data.head()

Unnamed: 0,Summary data collected by the Greenhouse Gas Reporting Program,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
2,Facility Id,FRS Id,Facility Name,City,State,Zip Code,Address,County,Latitude,Longitude,...,Latest Reported Industry Type (subparts),Latest Reported Industry Type (sectors),2018 Total reported direct emissions,2017 Total reported direct emissions,2016 Total reported direct emissions,2015 Total reported direct emissions,2014 Total reported direct emissions,2013 Total reported direct emissions,2012 Total reported direct emissions,2011 Total reported direct emissions
3,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.10,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
4,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.20,...,"C,N",Minerals,110511.71,115937.54,114530.00,114583.80,114959.65,103822.92,111294.55,109863.60
5,1000003,110001482887,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.29,-78.39,...,"C,N",Minerals,79393.21,80219.13,74813.30,80976.04,81003.99,80535.23,74324.20,77199.18
6,1000004,110000833518,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.16,-89.35,...,"C,N",Minerals,55547.75,57894.49,54780.85,59954.74,57837.10,59600.50,60086.00,52494.61


In [7]:
ghg_data = ghg_data.drop([2],axis=0)
ghg_data.head()

Unnamed: 0,Summary data collected by the Greenhouse Gas Reporting Program,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
3,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.1,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
4,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.2,...,"C,N",Minerals,110511.71,115937.54,114530.0,114583.8,114959.65,103822.92,111294.55,109863.6
5,1000003,110001482887,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.29,-78.39,...,"C,N",Minerals,79393.21,80219.13,74813.3,80976.04,81003.99,80535.23,74324.2,77199.18
6,1000004,110000833518,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.16,-89.35,...,"C,N",Minerals,55547.75,57894.49,54780.85,59954.74,57837.1,59600.5,60086.0,52494.61
7,1000005,110000482022,Ardagh Glass Inc. (Madera),MADERA,CA,93637,24441 AVENUE 12 & ROAD 24 1/2,MADERA COUNTY,36.92,-120.1,...,"C,N",Minerals,83863.02,82451.32,79708.58,81133.62,74754.14,78249.77,82149.61,75969.47


In [8]:
## Reset index
ghg_data = ghg_data.reset_index(drop=True)
ghg_data.head()

Unnamed: 0,Summary data collected by the Greenhouse Gas Reporting Program,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
0,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.1,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
1,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.2,...,"C,N",Minerals,110511.71,115937.54,114530.0,114583.8,114959.65,103822.92,111294.55,109863.6
2,1000003,110001482887,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.29,-78.39,...,"C,N",Minerals,79393.21,80219.13,74813.3,80976.04,81003.99,80535.23,74324.2,77199.18
3,1000004,110000833518,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.16,-89.35,...,"C,N",Minerals,55547.75,57894.49,54780.85,59954.74,57837.1,59600.5,60086.0,52494.61
4,1000005,110000482022,Ardagh Glass Inc. (Madera),MADERA,CA,93637,24441 AVENUE 12 & ROAD 24 1/2,MADERA COUNTY,36.92,-120.1,...,"C,N",Minerals,83863.02,82451.32,79708.58,81133.62,74754.14,78249.77,82149.61,75969.47


In [9]:
## Rename the columns
ghg_data.rename(columns={'Summary data collected by the Greenhouse Gas Reporting Program':'facility_id',
                          'Unnamed: 1':'frs_id',
                          'Unnamed: 2':'facility_name',
                            'Unnamed: 3': 'city',
                            'Unnamed: 4':'state',
                            'Unnamed: 5': 'zip_code',
                            'Unnamed: 6': 'address',
                            'Unnamed: 7':'county',
                            'Unnamed: 8': 'latitude',
                            'Unnamed: 9': 'longitude',
                            'Unnamed: 10':'naics_code',
                            'Unnamed: 11':'industy_subparts',
                            'Unnamed: 12':'industry_sector',
                            'Unnamed: 13':'2018',
                            'Unnamed: 14':'2017',
                            'Unnamed: 15':'2016',
                            'Unnamed: 16':'2015',
                            'Unnamed: 17':'2014',
                            'Unnamed: 18':'2013',
                            'Unnamed: 19':'2012',
                            'Unnamed: 20':'2011'},
                 inplace=True)
ghg_data.head()

Unnamed: 0,facility_id,frs_id,facility_name,city,state,zip_code,address,county,latitude,longitude,...,industy_subparts,industry_sector,2018,2017,2016,2015,2014,2013,2012,2011
0,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.1,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
1,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.2,...,"C,N",Minerals,110511.71,115937.54,114530.0,114583.8,114959.65,103822.92,111294.55,109863.6
2,1000003,110001482887,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.29,-78.39,...,"C,N",Minerals,79393.21,80219.13,74813.3,80976.04,81003.99,80535.23,74324.2,77199.18
3,1000004,110000833518,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.16,-89.35,...,"C,N",Minerals,55547.75,57894.49,54780.85,59954.74,57837.1,59600.5,60086.0,52494.61
4,1000005,110000482022,Ardagh Glass Inc. (Madera),MADERA,CA,93637,24441 AVENUE 12 & ROAD 24 1/2,MADERA COUNTY,36.92,-120.1,...,"C,N",Minerals,83863.02,82451.32,79708.58,81133.62,74754.14,78249.77,82149.61,75969.47


In [10]:
## Group the data by state
ghg_state = ghg_data.sort_values(by = ['state'])
ghg_state.head()

Unnamed: 0,facility_id,frs_id,facility_name,city,state,zip_code,address,county,latitude,longitude,...,industy_subparts,industry_sector,2018,2017,2016,2015,2014,2013,2012,2011
2916,1004719,110041255012,CHENA POWER PLANT,FAIRBANKS,AK,99701,1206 1ST AVE,FAIRBANKS NORTH STAR BOROUGH,64.85,-147.73,...,C,Power Plants,321500.94,314164.74,334019.09,331719.04,328081.7,321806.7,328261.76,350029.07
3097,1005004,110007919501,Tesoro Alaska Company LLC - KLNG,Kenai,AK,99611,48237 Kenai Spur Highway,KENAI PENINSULA BOROUGH,60.68,-151.39,...,"C,W-LNGIE",Petroleum and Natural Gas Systems,4484.28,16845.34,12243.0,186840.27,175911.99,53833.3,229404.07,294567.91
3270,1005349,110044228289,CENTRAL PENINSULA BALING FACILITY-SOLDOTNA LF,SOLDOTNA,AK,99669,98.5 STERLING HWY.,KENAI PENINSULA BOROUGH,60.44,-151.11,...,"C,HH",Waste,53028.59,52632.45,50910.3,55202.8,55063.94,52830.5,51725.0,51369.3
2245,1003492,110021331664,HANK NIKKELS PLANT ONE,ANCHORAGE,AK,99501,821 EAST FIRST AVENUE,ANCHORAGE MUNICIPALITY,61.22,-149.87,...,C,Power Plants,21076.32,34704.32,28806.64,30495.84,22905.82,19097.93,36153.42,58152.23
4770,1007874,110064626846,NIKISKI CO-GENERATION,NIKISKI,AK,99635,48169 KENAI SPUR HIGHWAY,KENAI PENINSULA BOROUGH,60.68,-151.38,...,C,Power Plants,192573.12,218051.03,189446.24,198337.0,171829.05,143244.66,163901.88,190496.73


In [11]:
## Find the number of unique states
state_unique_values = ghg_data["state"].unique()
state_unique_values

array(['WA', 'IN', 'NC', 'IL', 'CA', 'MA', 'NM', 'FL', 'MO', 'NE', 'PA',
       'LA', 'TX', 'OK', 'AZ', 'OH', 'IA', 'WI', 'ME', 'SD', 'WY', 'CO',
       'NV', 'CT', 'NY', 'GA', 'TN', 'DE', 'NJ', 'UT', 'MN', 'ID', 'MS',
       'MT', 'AL', 'SC', 'NH', 'MI', 'VA', 'AR', 'KS', 'RI', 'HI', 'ND',
       'WV', 'KY', 'MD', 'OR', 'AK', 'DC', 'VT', 'PR', 'VI', 'GU'],
      dtype=object)

In [12]:
## Subset the state data to only contain the state and years
ghg_state_mean = ghg_state[["state", "2018","2017","2016","2015","2014","2013","2012","2011"]]
ghg_state_mean.head()

Unnamed: 0,state,2018,2017,2016,2015,2014,2013,2012,2011
2916,AK,321500.94,314164.74,334019.09,331719.04,328081.7,321806.7,328261.76,350029.07
3097,AK,4484.28,16845.34,12243.0,186840.27,175911.99,53833.3,229404.07,294567.91
3270,AK,53028.59,52632.45,50910.3,55202.8,55063.94,52830.5,51725.0,51369.3
2245,AK,21076.32,34704.32,28806.64,30495.84,22905.82,19097.93,36153.42,58152.23
4770,AK,192573.12,218051.03,189446.24,198337.0,171829.05,143244.66,163901.88,190496.73


In [17]:
## Find the range of the dataset
ghg_state_mean.describe()

Unnamed: 0,state,2018,2017,2016,2015,2014,2013,2012,2011
count,5183,5183.0,5183.0,5183.0,5183.0,5183.0,5183.0,5183.0,5183.0
unique,54,5168.0,5168.0,5172.0,5175.0,5176.0,5172.0,5173.0,5171.0
top,TX,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
freq,514,15.0,15.0,7.0,6.0,8.0,10.0,9.0,11.0


In [None]:
## Change the type of the year columns to floats in order to calcualte the mean (they are originally strings)
for col in ghg_state_mean:
    for row in range(5013):
        ghg_state_mean[col][row] = ghg_state_mean[row][col].replace(",","")
ghg_state_mean = ghg_state_mean.astype({"2018": float, "2017": float, "2016": float, "2015": float, "2014": float, "2013": float, "2012": float, "2011": float})
ghg_state_mean.head()

In [None]:
## Find the mean emissions for each state for each year
ghg_state_mean = ghg_state_mean.groupby(by = 'state')["2018","2017","2016","2015","2014","2013","2012","2011"].mean()
ghg_state_mean.head()

In [None]:
## Plot of emissions in 2018 by state
##plt.plot(ghg_state_mean["state"], ghg_state_mean["2018"])
##plt.show()

In [None]:
## Plot of emissions in 2011 by state
##plt.plot(ghg_state_mean["state"], ghg_state_mean["2011"])
##plt.show()

In [None]:
## Group the data by industry sector
##ghg_sector = ghg_data.groupby(by='industry_sector')
##ghg_sector_mean = ghg_sector['2011', '2012', '2013', '2014,' '2015', '2016', '2017', '2018'].mean()
##ghg_sector.head()

In [None]:
## Plot of emissions by sector in 2018
##plt.plot(ghg_sector_mean["industry_sector"], ghg_sector_mean["2018"])
##plt.show()

In [None]:
## Plot of emissions by sector in 2011
##plt.plot(ghg_sector_mean["industry_sector"], ghg_sector_mean["2011"])
##plt.show()

In [None]:
## How do we plot each state's emission trends over the years?

In [None]:
## Load class survey data
class_data = pd.read_csv("global_climate_change_class_survey_responses.csv")
class_data.head()

In [None]:
## 
class_data.columns = class_data.iloc(0) 