In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np

# Study data files

emissions_source_path = "Resources/by-end-user/data.csv"

emissions_user_path = "Resources/emissions-by-source/data.csv"

# Read the data and the study results
emissions_source = pd.read_csv(emissions_source_path)
user_source = pd.read_csv(emissions_user_path)

# Combine the data into a single dataset

data_df = pd.merge(emissions_source, user_source, how = 'outer', on='Year')
data_df.info()
# Display the data table for preview

<class 'pandas.core.frame.DataFrame'>
Int64Index: 62514735 entries, 0 to 62514734
Data columns (total 27 columns):
 #   Column                               Dtype  
---  ------                               -----  
 0   National Communication Category_x    object 
 1   National Communication Fuel_x        object 
 2   Measure type_x                       object 
 3   Greenhouse Gas Group_x               object 
 4   IPCC Code_x                          object 
 5   Year                                 int64  
 6   National Communication Sub Sector_x  object 
 7   Greenhouse Gas_x                     object 
 8   Activity Name_x                      object 
 9   National Communication Fuel Group_x  object 
 10  Source_x                             object 
 11  National Communication Sector_x      object 
 12  Gas Emissions_x                      float64
 13  Unit of Measure_x                    object 
 14  National Communication Sector_y      object 
 15  National Communication Fuel_y 

In [3]:
# analyse data

emissions_source.info()
emissions_source.head()
emissions_source["Year"].unique()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41508 entries, 0 to 41507
Data columns (total 14 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   National Communication Category    41508 non-null  object 
 1   National Communication Fuel        41508 non-null  object 
 2   Measure type                       41508 non-null  object 
 3   Greenhouse Gas Group               41508 non-null  object 
 4   IPCC Code                          41508 non-null  object 
 5   Year                               41508 non-null  int64  
 6   National Communication Sub Sector  41508 non-null  object 
 7   Greenhouse Gas                     41508 non-null  object 
 8   Activity Name                      41508 non-null  object 
 9   National Communication Fuel Group  41508 non-null  object 
 10  Source                             41508 non-null  object 
 11  National Communication Sector      41508 non-null  obj

array([2017, 2006, 2005, 2001, 1991, 1995, 1990, 2008, 2013, 1994, 1998,
       2016, 2018, 1999, 2012, 1996, 2010, 1993, 2003, 1992, 2007, 2002,
       2019, 1997, 2009, 2015, 2000, 2004, 2014, 2011], dtype=int64)

In [4]:
# analyse data
user_source.info()
user_source.head()
user_source["Year"].unique()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44838 entries, 0 to 44837
Data columns (total 14 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   National Communication Sector      44838 non-null  object 
 1   National Communication Fuel        44838 non-null  object 
 2   IPCC Code                          44838 non-null  object 
 3   National Communication Fuel Group  44838 non-null  object 
 4   National Communication Category    44838 non-null  object 
 5   National Communication Sub Sector  44838 non-null  object 
 6   Source                             44838 non-null  object 
 7   Greenhouse Gas Group               44838 non-null  object 
 8   Greenhouse Gas                     44838 non-null  object 
 9   Measure type                       44838 non-null  object 
 10  Activity Name                      44838 non-null  object 
 11  Year                               44838 non-null  int

array([1993, 1995, 1999, 2001, 1996, 1997, 1998, 2002, 1990, 1992, 2003,
       1994, 2000, 1991, 2004, 2011, 2014, 2012, 2005, 2010, 2016, 2008,
       2009, 2007, 2006, 2013, 2015, 2017, 2018, 2019], dtype=int64)

In [5]:
data_df.head()

Unnamed: 0,National Communication Category_x,National Communication Fuel_x,Measure type_x,Greenhouse Gas Group_x,IPCC Code_x,Year,National Communication Sub Sector_x,Greenhouse Gas_x,Activity Name_x,National Communication Fuel Group_x,...,National Communication Fuel Group_y,National Communication Category_y,National Communication Sub Sector_y,Source_y,Greenhouse Gas Group_y,Greenhouse Gas_y,Measure type_y,Activity Name_y,Gas Emissions_y,Unit of Measure_y
0,Land converted to wetland,Other emissions,Gas Emissions,Carbon dioxide (CO2),4D2,2017,Wetlands,Carbon dioxide (CO2),Non fuel combustion,Other emissions,...,Other emissions,Cattle - wastes,Wastes,Other cattle - Heifers for breeding - Digestat...,Nitrous oxide (N2O),Nitrous oxide (N2O),Gas Emissions,Storage,1e-05,http://gss-data.org.uk/def/concept/measurement...
1,Land converted to wetland,Other emissions,Gas Emissions,Carbon dioxide (CO2),4D2,2017,Wetlands,Carbon dioxide (CO2),Non fuel combustion,Other emissions,...,Gaseous fuels,Residential combustion,Not Applicable,Domestic combustion,Carbon dioxide (CO2),Carbon dioxide (CO2),Gas Emissions,Natural gas,54.06971,http://gss-data.org.uk/def/concept/measurement...
2,Land converted to wetland,Other emissions,Gas Emissions,Carbon dioxide (CO2),4D2,2017,Wetlands,Carbon dioxide (CO2),Non fuel combustion,Other emissions,...,Coal,Industrial combustion and electricity (excl. i...,Not Applicable,Cement production - combustion,Methane (CH4),Methane (CH4),Gas Emissions,Coal,0.00385,http://gss-data.org.uk/def/concept/measurement...
3,Land converted to wetland,Other emissions,Gas Emissions,Carbon dioxide (CO2),4D2,2017,Wetlands,Carbon dioxide (CO2),Non fuel combustion,Other emissions,...,Gaseous fuels,Residential combustion,Not Applicable,Domestic combustion,Carbon dioxide (CO2),Carbon dioxide (CO2),Gas Emissions,Lpg,0.58445,http://gss-data.org.uk/def/concept/measurement...
4,Land converted to wetland,Other emissions,Gas Emissions,Carbon dioxide (CO2),4D2,2017,Wetlands,Carbon dioxide (CO2),Non fuel combustion,Other emissions,...,Other emissions,Sheep - wastes,Wastes,Excreta,Methane (CH4),Methane (CH4),Gas Emissions,Sheep lamb,0.03042,http://gss-data.org.uk/def/concept/measurement...


In [10]:
sector_use = data_df.groupby('National Communication Category_x')

sector_use.count()

Unnamed: 0_level_0,National Communication Fuel_x,Measure type_x,Greenhouse Gas Group_x,IPCC Code_x,Year,National Communication Sub Sector_x,Greenhouse Gas_x,Activity Name_x,National Communication Fuel Group_x,Source_x,...,National Communication Fuel Group_y,National Communication Category_y,National Communication Sub Sector_y,Source_y,Greenhouse Gas Group_y,Greenhouse Gas_y,Measure type_y,Activity Name_y,Gas Emissions_y,Unit of Measure_y
National Communication Category_x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Accidental fires - business,44838,44838,44838,44838,44838,44838,44838,44838,44838,44838,...,44838,44838,44838,44838,44838,44838,44838,44838,44838,44838
Accidental fires - residential,89676,89676,89676,89676,89676,89676,89676,89676,89676,89676,...,89676,89676,89676,89676,89676,89676,89676,89676,89676,89676
Adipic acid production,28837,28837,28837,28837,28837,28837,28837,28837,28837,28837,...,28837,28837,28837,28837,28837,28837,28837,28837,28837,28837
Aerosols and metered dose inhalers,162038,162038,162038,162038,162038,162038,162038,162038,162038,162038,...,162038,162038,162038,162038,162038,162038,162038,162038,162038,162038
Aircraft support vehicles,134514,134514,134514,134514,134514,134514,134514,134514,134514,134514,...,134514,134514,134514,134514,134514,134514,134514,134514,134514,134514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Use of N2O - Industrial Process,44838,44838,44838,44838,44838,44838,44838,44838,44838,44838,...,44838,44838,44838,44838,44838,44838,44838,44838,44838,44838
Use of non aerosol consumer products,44838,44838,44838,44838,44838,44838,44838,44838,44838,44838,...,44838,44838,44838,44838,44838,44838,44838,44838,44838,44838
Waste Incineration,385785,385785,385785,385785,385785,385785,385785,385785,385785,385785,...,385785,385785,385785,385785,385785,385785,385785,385785,385785,385785
Waste-water handling,224190,224190,224190,224190,224190,224190,224190,224190,224190,224190,...,224190,224190,224190,224190,224190,224190,224190,224190,224190,224190


In [11]:
sector_sum = sector_use[['Gas Emissions_y']].sum()

sector_sum.head(10)

Unnamed: 0_level_0,Gas Emissions_y
National Communication Category_x,Unnamed: 1_level_1
Accidental fires - business,19829.82128
Accidental fires - residential,39659.64256
Adipic acid production,14584.08553
Aerosols and metered dose inhalers,68076.69926
Aircraft support vehicles,59489.46384
Aluminium production,59489.46384
Ammonia production,59489.46384
Anaerobic digestion,19020.70142
Biomass burning - Cropland,76451.43589
Biomass burning - Forest land,59489.46384
