In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import sem

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
international_ghe_load = "AIR_GHG_06112019215747904.csv"
canada_ghe_load = "GHG_IPCC_Can_Prov_Terr.csv"

# Read the international and Canadian GHE data files
internationalghe_data = pd.read_csv(international_ghe_load)
canadaghe_data = pd.read_csv(canada_ghe_load)


#  Cleaning the International dataframe

In [2]:
# Cleaning the international data file

internationalghe = internationalghe_data.loc[:,["Year","Country", "Pollutant", "VAR","Unit", "Value" ]]


internationalghe.head()

Unnamed: 0,Year,Country,Pollutant,VAR,Unit,Value
0,1990,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,420315.323
1,1991,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,421381.463
2,1992,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,425702.483
3,1993,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,426232.23
4,1994,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,426304.968


In [3]:
internationalghe.describe()

Unnamed: 0,Year,Value
count,51614.0,51614.0
mean,2003.500155,139405.5
std,7.992061,1612112.0
min,1990.0,-1499960.0
25%,1997.0,18.09375
50%,2004.0,101.254
75%,2010.0,7310.914
max,2017.0,100197500.0


In [4]:
years = internationalghe["Year"].unique()
years


array([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
       2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
       2012, 2013, 2014, 2015, 2016, 2017], dtype=int64)

In [5]:
internationalghe["Pollutant"].unique()

array(['Greenhouse gases', 'Carbon dioxide', 'Methane', 'Nitrous oxide',
       'Hydrofluorocarbons', 'Perfluorocarbons', 'Sulphur hexafluoride',
       'Nitrogen trifluoride', 'Unspecified mix of HFCs and PFCs'],
      dtype=object)

In [6]:
# Maximum value for GHE emissions

max_ghe = internationalghe["Value"].max()
max_ghe

100197518.299

In [7]:
# Reducing dataset to contain only total greenhouse emissions per country and year

# Generating list of countries
country = internationalghe["Country"].unique()
country

array(['Australia', 'Austria', 'Belgium', 'Canada', 'Czech Republic',
       'Denmark', 'Finland', 'France', 'Germany', 'Greece', 'Hungary',
       'Iceland', 'Ireland', 'Italy', 'Japan', 'Korea', 'Luxembourg',
       'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Poland',
       'Portugal', 'Slovak Republic', 'Spain', 'Sweden', 'Switzerland',
       'Turkey', 'United Kingdom', 'United States', 'Chile', 'Estonia',
       'Israel', 'Russia', 'Slovenia', 'Latvia', 'Lithuania',
       'OECD - Total', 'OECD - Europe', 'Brazil',
       "China (People's Republic of)", 'Colombia', 'Costa Rica', 'India',
       'Indonesia', 'South Africa', 'Argentina',
       'European Union (28 countries)'], dtype=object)

In [8]:
# Filtering the dataframe for only total emissions and greenhouse gases per country and per year

country_ghe = internationalghe.loc[internationalghe["VAR"] == "TOTAL", :]
country_ghe = internationalghe.loc[internationalghe["Pollutant"] == "Greenhouse gases", :]

country_ghe.head()

Unnamed: 0,Year,Country,Pollutant,VAR,Unit,Value
0,1990,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,420315.323
1,1991,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,421381.463
2,1992,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,425702.483
3,1993,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,426232.23
4,1994,Australia,Greenhouse gases,TOTAL,Tonnes of CO2 equivalent,426304.968


In [9]:
country_ghe.describe()

Unnamed: 0,Year,Value
count,31148.0,31148.0
mean,2003.484943,152426.8
std,8.002099,940263.8
min,1990.0,-1499960.0
25%,1997.0,8.614
50%,2004.0,84.219
75%,2010.0,17118.62
max,2017.0,16931120.0


In [10]:
# Selecting the columns for the final data frame

country_ghe = country_ghe.loc[:,["Year","Country", "Pollutant", "VAR","Value"]]
country_ghe.head()

country_ghe.dtypes

Year           int64
Country       object
Pollutant     object
VAR           object
Value        float64
dtype: object

# Cleaning the Canadian dataframe

In [7]:
canadaghe_data.head()

Unnamed: 0,Year,Province/Territory,CategoryID,Category,Rollup,CO2,CH4,CH4eq,N2O,N2Oeq,HFCs,PFCs,SF6,NF3,CO2E,Unit
0,1990,Canada,110,Public Electricity and Heat Production,False,93723.47784,1.782975494,44.57438736,1.729072228,515.2635241,0,0,0,0,94283.31575,kt
1,1990,Canada,120,Petroleum Refining Industries,False,17300.2733,0.429143356,10.72858389,0.174660169,52.04873034,0,0,0,0,17363.05062,kt
2,1990,Canada,130,Oil and Gas Extraction,False,32971.13809,62.36636212,1559.159053,0.692008076,206.2184067,0,0,0,0,34736.51554,kt
3,1990,Canada,131,Mining,False,4576.522975,0.096745975,2.418649381,0.094969959,28.30104789,0,0,0,0,4607.242672,kt
4,1990,Canada,141,Iron and Steel,False,4904.693043,0.152365184,3.809129597,0.127264944,37.9249533,0,0,0,0,4946.427126,kt


In [8]:
# Selecting the columns

canadaghe = canadaghe_data.loc[:,["Year","Province/Territory", "CO2E", "Unit" ]]
canadaghe.head()

Unnamed: 0,Year,Province/Territory,CO2E,Unit
0,1990,Canada,94283.31575,kt
1,1990,Canada,17363.05062,kt
2,1990,Canada,34736.51554,kt
3,1990,Canada,4607.242672,kt
4,1990,Canada,4946.427126,kt


In [14]:
# Converting CO2E to numeric (float)

canadaghe["CO2Ef"] = pd.to_numeric(canadaghe["CO2E"], errors = 'coerce', downcast = 'float')

In [15]:
canadaghe.dtypes

Year                    int64
Province/Territory     object
CO2E                   object
Unit                   object
CO2Ef                 float32
dtype: object

In [16]:
# Converting CO2E (in kt) to tones and adding a new column

CO2E_tones = canadaghe["CO2Ef"] * 1000

canadaghe["CO2E (Tones)"] = CO2E_tones

In [17]:
canadaghe.head()

Unnamed: 0,Year,Province/Territory,CO2E,Unit,CO2Ef,CO2E (Tones)
0,1990,Canada,94283.31575,kt,94283.3125,94283312.0
1,1990,Canada,17363.05062,kt,17363.050781,17363050.0
2,1990,Canada,34736.51554,kt,34736.515625,34736516.0
3,1990,Canada,4607.242672,kt,4607.242676,4607242.5
4,1990,Canada,4946.427126,kt,4946.427246,4946427.0
