In [1]:
## load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [2]:
## load data
pollution_data = pd.read_csv("owid-co2-data.csv")
pollution_data.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
0,AFG,Afghanistan,1949,0.015,,,,,,0.0,...,,,,,,,,,7663783.0,
1,AFG,Afghanistan,1950,0.084,475.0,0.07,,,,0.0,...,,,,,,,,,7752000.0,19494800000.0
2,AFG,Afghanistan,1951,0.092,8.696,0.007,,,,0.0,...,,,,,,,,,7840000.0,20063850000.0
3,AFG,Afghanistan,1952,0.092,0.0,0.0,,,,0.0,...,,,,,,,,,7936000.0,20742350000.0
4,AFG,Afghanistan,1953,0.106,16.0,0.015,,,,0.0,...,,,,,,,,,8040000.0,22015460000.0


In [3]:
## only keep data from English-speaking Western countries
pollution_data = pollution_data[(pollution_data['country'] == 'United States') | (pollution_data['country'] == 'Canada') | (pollution_data['country'] == 'United Kingdom') | (pollution_data['country'] == 'Australia')]
pollution_data.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
1593,AUS,Australia,1851,-0.062,,,,,,-0.0,...,,,,,,,,,786732.0,1955064000.0
1594,AUS,Australia,1852,-0.055,-11.765,0.007,,,,-0.0,...,,,,,,,,,831228.0,2559550000.0
1595,AUS,Australia,1853,-0.11,100.0,-0.055,,,,-0.0,...,,,,,,,,,877298.0,3137865000.0
1596,AUS,Australia,1854,-0.128,16.667,-0.018,,,,-0.0,...,,,,,,,,,926037.0,3066359000.0
1597,AUS,Australia,1855,-0.132,2.857,-0.004,,,,-0.0,...,,,,,,,,,977302.0,3224240000.0


In [4]:
## remove columns that contain mostly NaN values
pollution_data = pollution_data[["iso_code", "country", "year", "co2", "population", "gdp"]]
pollution_data.head()

Unnamed: 0,iso_code,country,year,co2,population,gdp
1593,AUS,Australia,1851,-0.062,786732.0,1955064000.0
1594,AUS,Australia,1852,-0.055,831228.0,2559550000.0
1595,AUS,Australia,1853,-0.11,877298.0,3137865000.0
1596,AUS,Australia,1854,-0.128,926037.0,3066359000.0
1597,AUS,Australia,1855,-0.132,977302.0,3224240000.0


In [5]:
## load data
ghg_data = pd.read_csv("ghgp_data_by_year.csv")
ghg_data.head()

Unnamed: 0,Summary data collected by the Greenhouse Gas Reporting Program,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
0,This data was reported to EPA by facilities as...,,,,,,,,,,...,,,,,,,,,,
1,All emissions data is presented in units of me...,,,,,,,,,,...,,,,,,,,,,
2,Facility Id,FRS Id,Facility Name,City,State,Zip Code,Address,County,Latitude,Longitude,...,Latest Reported Industry Type (subparts),Latest Reported Industry Type (sectors),2018 Total reported direct emissions,2017 Total reported direct emissions,2016 Total reported direct emissions,2015 Total reported direct emissions,2014 Total reported direct emissions,2013 Total reported direct emissions,2012 Total reported direct emissions,2011 Total reported direct emissions
3,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.10,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
4,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.20,...,"C,N",Minerals,110511.71,115937.54,114530.00,114583.80,114959.65,103822.92,111294.55,109863.60


In [6]:
## Get rid of first two rows because they are just an explanation
ghg_data = ghg_data.dropna()
ghg_data.head()

Unnamed: 0,Summary data collected by the Greenhouse Gas Reporting Program,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
2,Facility Id,FRS Id,Facility Name,City,State,Zip Code,Address,County,Latitude,Longitude,...,Latest Reported Industry Type (subparts),Latest Reported Industry Type (sectors),2018 Total reported direct emissions,2017 Total reported direct emissions,2016 Total reported direct emissions,2015 Total reported direct emissions,2014 Total reported direct emissions,2013 Total reported direct emissions,2012 Total reported direct emissions,2011 Total reported direct emissions
3,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.10,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
4,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.20,...,"C,N",Minerals,110511.71,115937.54,114530.00,114583.80,114959.65,103822.92,111294.55,109863.60
5,1000003,110001482887,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.29,-78.39,...,"C,N",Minerals,79393.21,80219.13,74813.30,80976.04,81003.99,80535.23,74324.20,77199.18
6,1000004,110000833518,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.16,-89.35,...,"C,N",Minerals,55547.75,57894.49,54780.85,59954.74,57837.10,59600.50,60086.00,52494.61


In [7]:
ghg_data.drop([2],axis=0).head()

Unnamed: 0,Summary data collected by the Greenhouse Gas Reporting Program,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
3,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.1,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
4,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.2,...,"C,N",Minerals,110511.71,115937.54,114530.0,114583.8,114959.65,103822.92,111294.55,109863.6
5,1000003,110001482887,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.29,-78.39,...,"C,N",Minerals,79393.21,80219.13,74813.3,80976.04,81003.99,80535.23,74324.2,77199.18
6,1000004,110000833518,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.16,-89.35,...,"C,N",Minerals,55547.75,57894.49,54780.85,59954.74,57837.1,59600.5,60086.0,52494.61
7,1000005,110000482022,Ardagh Glass Inc. (Madera),MADERA,CA,93637,24441 AVENUE 12 & ROAD 24 1/2,MADERA COUNTY,36.92,-120.1,...,"C,N",Minerals,83863.02,82451.32,79708.58,81133.62,74754.14,78249.77,82149.61,75969.47


In [8]:
## Reset index
ghg_data = ghg_data.reset_index(drop=True)
ghg_data.head()

Unnamed: 0,Summary data collected by the Greenhouse Gas Reporting Program,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
0,Facility Id,FRS Id,Facility Name,City,State,Zip Code,Address,County,Latitude,Longitude,...,Latest Reported Industry Type (subparts),Latest Reported Industry Type (sectors),2018 Total reported direct emissions,2017 Total reported direct emissions,2016 Total reported direct emissions,2015 Total reported direct emissions,2014 Total reported direct emissions,2013 Total reported direct emissions,2012 Total reported direct emissions,2011 Total reported direct emissions
1,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.10,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
2,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.20,...,"C,N",Minerals,110511.71,115937.54,114530.00,114583.80,114959.65,103822.92,111294.55,109863.60
3,1000003,110001482887,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.29,-78.39,...,"C,N",Minerals,79393.21,80219.13,74813.30,80976.04,81003.99,80535.23,74324.20,77199.18
4,1000004,110000833518,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.16,-89.35,...,"C,N",Minerals,55547.75,57894.49,54780.85,59954.74,57837.10,59600.50,60086.00,52494.61


In [9]:
## Rename the columns
ghg_data.rename(columns={'Summary data collected by the Greenhouse Gas Reporting Program':'facility_id',
                          'Unnamed: 1':'frs_id',
                          'Unnamed: 2':'facility_name',
                            'Unnamed: 3': 'city',
                            'Unnamed: 4':'state',
                            'Unnamed: 5': 'zip_code',
                            'Unnamed: 6': 'address',
                            'Unnamed: 7':'county',
                            'Unnamed: 8': 'latitude',
                            'Unnamed: 9': 'longitude',
                            'Unnamed: 10':'naics_code',
                            'Unnamed: 11':'industy_subparts',
                            'Unnamed: 12':'industry_sector',
                            'Unnamed: 13':'2018',
                            'Unnamed: 14':'2017',
                            'Unnamed: 15':'2016',
                            'Unnamed: 16':'2015',
                            'Unnamed: 17':'2014',
                            'Unnamed: 18':'2013',
                            'Unnamed: 19':'2012',
                            'Unnamed: 20':'2011'},
                 inplace=True)
ghg_data.head()

Unnamed: 0,facility_id,frs_id,facility_name,city,state,zip_code,address,county,latitude,longitude,...,industy_subparts,industry_sector,2018,2017,2016,2015,2014,2013,2012,2011
0,Facility Id,FRS Id,Facility Name,City,State,Zip Code,Address,County,Latitude,Longitude,...,Latest Reported Industry Type (subparts),Latest Reported Industry Type (sectors),2018 Total reported direct emissions,2017 Total reported direct emissions,2016 Total reported direct emissions,2015 Total reported direct emissions,2014 Total reported direct emissions,2013 Total reported direct emissions,2012 Total reported direct emissions,2011 Total reported direct emissions
1,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.10,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
2,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.20,...,"C,N",Minerals,110511.71,115937.54,114530.00,114583.80,114959.65,103822.92,111294.55,109863.60
3,1000003,110001482887,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.29,-78.39,...,"C,N",Minerals,79393.21,80219.13,74813.30,80976.04,81003.99,80535.23,74324.20,77199.18
4,1000004,110000833518,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.16,-89.35,...,"C,N",Minerals,55547.75,57894.49,54780.85,59954.74,57837.10,59600.50,60086.00,52494.61


In [12]:
## Group the data by state
##ghg_state = ghg_data.groupby(by='state')
##ghg_state_mean = ghg_state['2011', '2012', '2013', '2014,' '2015', '2016', '2017', '2018'].mean()
##ghg_state.head()

Unnamed: 0,facility_id,frs_id,facility_name,city,state,zip_code,address,county,latitude,longitude,...,industy_subparts,industry_sector,2018,2017,2016,2015,2014,2013,2012,2011
0,Facility Id,FRS Id,Facility Name,City,State,Zip Code,Address,County,Latitude,Longitude,...,Latest Reported Industry Type (subparts),Latest Reported Industry Type (sectors),2018 Total reported direct emissions,2017 Total reported direct emissions,2016 Total reported direct emissions,2015 Total reported direct emissions,2014 Total reported direct emissions,2013 Total reported direct emissions,2012 Total reported direct emissions,2011 Total reported direct emissions
1,1000001,110000490166,PSE Ferndale Generating Station,FERNDALE,WA,98248,5105 LAKE TERRELL ROAD,WHATCOM COUNTY,48.83,-122.69,...,D,Power Plants,302529.48,350890.10,354145.49,405725.95,333193.56,395314.78,14719.11,35878.16
2,1000002,110041175000,Ardagh Glass Inc. (Dunkirk),DUNKIRK,IN,47336,524 E. CENTER STREET,JAY COUNTY,40.37,-85.20,...,"C,N",Minerals,110511.71,115937.54,114530.00,114583.80,114959.65,103822.92,111294.55,109863.60
3,1000003,110001482887,Ardagh Glass Inc. (Henderson),Henderson,NC,27537,620 Facet Road,VANCE COUNTY,36.29,-78.39,...,"C,N",Minerals,79393.21,80219.13,74813.30,80976.04,81003.99,80535.23,74324.20,77199.18
4,1000004,110000833518,Ardagh Glass Inc. (Lincoln),LINCOLN,IL,62656,1200 NORTH LOGAN STREET,LOGAN COUNTY,40.16,-89.35,...,"C,N",Minerals,55547.75,57894.49,54780.85,59954.74,57837.10,59600.50,60086.00,52494.61
5,1000005,110000482022,Ardagh Glass Inc. (Madera),MADERA,CA,93637,24441 AVENUE 12 & ROAD 24 1/2,MADERA COUNTY,36.92,-120.10,...,"C,N",Minerals,83863.02,82451.32,79708.58,81133.62,74754.14,78249.77,82149.61,75969.47
6,1000006,110000834900,Ardagh Glass Inc. (Milford),MILFORD,MA,01757,1 NATIONAL STREET,WORCESTER,42.13,-71.51,...,"C,N",Minerals,9745.02,37320.22,33116.34,41248.50,42313.91,42648.16,45293.71,43434.41
7,1000007,110007014774,AMERICAN GYPSUM - BERNALILLO (WALLBOARD) PLANT,BERNALILLO,NM,87004,1000 N HILL RD,SANDOVAL,35.33,-106.53,...,C,Minerals,10859.36,4145.68,0.00,0.00,0.00,0.00,0.00,0.00
8,1000010,110013945437,SANTA ROSA CENTRAL LANDFILL,MILTON,FL,32583,6337 DELISA RD.,SANTA ROSA COUNTY,30.58,-87.06,...,HH,Waste,125328.25,107562.00,106339.75,101770.00,107609.50,109206.00,102640.00,98143.35
9,1000015,110000492734,"TYSON FARMS, INC.",WILKESBORO,NC,28697,704 FACTORY STREET,WILKES COUNTY,36.14,-81.16,...,C,Other,26683.90,27070.95,25592.00,28135.43,28909.07,29057.04,25292.90,25076.25


In [None]:
## Plot of emissions in 2018 by state
##plt.plot(ghg_state_mean["state"], ghg_state_mean["2018"])
##plt.show()

In [None]:
## Plot of emissions in 2011 by state
##plt.plot(ghg_state_mean["state"], ghg_state_mean["2011"])
##plt.show()

In [None]:
## Group the data by industry sector
##ghg_sector = ghg_data.groupby(by='industry_sector')
##ghg_sector_mean = ghg_sector['2011', '2012', '2013', '2014,' '2015', '2016', '2017', '2018'].mean()
##ghg_sector.head()

In [None]:
## Plot of emissions by sector in 2018
##plt.plot(ghg_sector_mean["industry_sector"], ghg_sector_mean["2018"])
##plt.show()

In [None]:
## Plot of emissions by sector in 2011
##plt.plot(ghg_sector_mean["industry_sector"], ghg_sector_mean["2011"])
##plt.show()

In [None]:
## How do we plot each state's emission trends over the years?

In [13]:
## Load class survey data
class_data = pd.read_csv("global_climate_change_class_survey_responses.csv")
class_data.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,18,9,26,27,24,Unnamed: 7,Unnamed: 8,3,...,27.1,28,Unnamed: 71,0.1,2.2,4.2,7.2,58,32.1,Unnamed: 78
0,Timestamp,3. What academic year did you enroll in COMM 1...,2007-2008,2008-2009,2009-2010,2010-2011,2011-2012,4. What is/was your major?,5. When did you graduate from SJSU?,Spring 2008,...,Used a bicycle instead of a car as transportat...,The course didn’t have an influence on my tran...,19. As a result of my participation in this co...,All the time,Often2,Sometimes3,Occasionally4,Never,The course had no influence on my purchase of ...,
1,11/25/2015 13:43:47,2007-2008,1,0,0,0,0,Economics,Fall 2010,0,...,1,0,Never,0,0,0,0,1,0,
2,11/25/2015 13:48:31,2010-2011,0,0,0,1,0,Business,After Fall 2012,0,...,0,1,The course had no influence on my purchase of ...,0,0,0,0,0,1,
3,11/25/2015 13:54:36,2011-2012,0,0,0,0,1,Environmental Studies/ green building and ener...,After Fall 2012,0,...,1,0,The course had no influence on my purchase of ...,0,0,0,0,0,1,
4,11/25/2015 14:09:40,2010-2011,0,0,0,1,0,Environmental Science,Fall 2011,0,...,0,0,The course had no influence on my purchase of ...,0,0,0,0,0,1,


In [None]:
## 