In [1]:
import json
import requests
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [2]:
# Data Source:https://unstats.un.org/sdgs/UNSDGAPIV5/swagger/index.html
un_goals = pd.read_csv("Resources/UN_Goals.csv")

un_goals_clean = un_goals[["Target", 'GeoAreaName', 'TimePeriod', 'Value', '[Units]']]
un_goals_clean.head()

Unnamed: 0,Target,GeoAreaName,TimePeriod,Value,[Units]
0,13.2,Canada,1990,601.52369,MT_CO2E
1,13.2,Canada,1991,595.77673,MT_CO2E
2,13.2,Canada,1992,613.59852,MT_CO2E
3,13.2,Canada,1993,616.79839,MT_CO2E
4,13.2,Canada,1994,638.00047,MT_CO2E


In [3]:
total_un_goals_clean=un_goals_clean.groupby('TimePeriod').agg({'Value':["mean","median","min","max"]})
total_un_goals_clean


Unnamed: 0_level_0,Value,Value,Value,Value
Unnamed: 0_level_1,mean,median,min,max
TimePeriod,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1990,601.52369,601.52369,601.52369,601.52369
1991,595.77673,595.77673,595.77673,595.77673
1992,613.59852,613.59852,613.59852,613.59852
1993,616.79839,616.79839,616.79839,616.79839
1994,638.00047,638.00047,638.00047,638.00047
1995,656.29872,656.29872,656.29872,656.29872
1996,678.71449,678.71449,678.71449,678.71449
1997,691.19537,691.19537,691.19537,691.19537
1998,696.90152,696.90152,696.90152,696.90152
1999,709.60092,709.60092,709.60092,709.60092


In [4]:
# Data Source: https://data.ec.gc.ca/data/substances/monitor/canada-s-official-greenhouse-gas-inventory/B-Economic-Sector/?lang=en

canada_result = pd.read_csv("Resources/Economic_Sector_Canada.csv")
canada_result.head()


Unnamed: 0,Sectors - Mt CO2 eq,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,NATIONAL TOTAL,595,588,605,608,628,645,667,682,689,...,721,726,732,730,733,715,725,740,738,672
1,Oil and Gas,103,102,111,118,122,128,135,137,141,...,187,194,199,205,205,194,196,205,203,179
2,Electricity,95,96,103,93,95,98,98,109,122,...,87,83,80,76,80,74,73,63,62,56
3,Transport,120,114,115,117,121,122,126,131,137,...,168,171,174,171,172,173,179,184,185,159
4,Heavy Industry,97,97,95,94,100,100,103,103,98,...,80,80,79,79,78,76,76,77,77,72


In [5]:
#transposed
df1 = canada_result.set_index(['Sectors - Mt CO2 eq'])
df2 = df1.transpose()
df3 = df2[['Oil and Gas', 'Electricity', 'Transport','NATIONAL TOTAL']]
df4 = df3.reset_index()
transpose_can_result = df4.rename(columns={'index':'Years'})
transpose_can_result

Sectors - Mt CO2 eq,Years,Oil and Gas,Electricity,Transport,NATIONAL TOTAL
0,1990,103,95,120,595
1,1991,102,96,114,588
2,1992,111,103,115,605
3,1993,118,93,117,608
4,1994,122,95,121,628
5,1995,128,98,122,645
6,1996,135,98,126,667
7,1997,137,109,131,682
8,1998,141,122,137,689
9,1999,150,119,143,702


In [6]:
#transpose_can_result = transpose_can_result['Years'].astype(int)

In [7]:
transpose_can_result.dtypes

Sectors - Mt CO2 eq
Years             object
Oil and Gas        int64
Electricity        int64
Transport          int64
NATIONAL TOTAL     int64
dtype: object

In [21]:
total_canada_max_oil_gas = transpose_can_result['Oil and Gas'].max()
total_canada_year_oil_gas = transpose_can_result['Oil and Gas'].idxmax()
total_canada_min_oil_gas = transpose_can_result['Oil and Gas'].min()




total_canada_max_electricity = transpose_can_result['Electricity'].max()
total_canada_year_electricity = transpose_can_result['Electricity'].idxmax()
total_canada_min_electricity = transpose_can_result['Electricity'].min()

total_canada_max_transport = transpose_can_result['Transport'].max()
total_canada_year_transport = transpose_can_result['Transport'].idxmax()
total_canada_min_transport = transpose_can_result['Transport'].min()





In [22]:
# Filter the DataFrame based on the minimum oil and gas production value
min_oil_gas_df = transpose_can_result.loc[transpose_can_result['Oil and Gas'] == total_canada_min_oil_gas]
max_oil_gas_df = transpose_can_result.loc[transpose_can_result['Oil and Gas'] == total_canada_max_oil_gas]

min_Electricity_df = transpose_can_result.loc[transpose_can_result['Electricity'] == total_canada_min_electricity]
max_Electricity_df = transpose_can_result.loc[transpose_can_result['Electricity'] == total_canada_max_electricity]

min_Transport_df = transpose_can_result.loc[transpose_can_result['Transport'] == total_canada_min_transport]
max_Transport_df = transpose_can_result.loc[transpose_can_result['Transport'] == total_canada_max_transport]



# Reset the index to a column named 'Years'
min_oil_gas_df = min_oil_gas_df.reset_index().rename(columns={'index': 'Years'})
max_oil_gas_df = max_oil_gas_df.reset_index().rename(columns={'index': 'Years'})

min_Electricity_df = min_Electricity_df.reset_index().rename(columns={'index': 'Years'})
max_Electricity_df = max_Electricity_df.reset_index().rename(columns={'index': 'Years'})

min_Transport_df = min_Transport_df.reset_index().rename(columns={'index': 'Years'})
max_Transport_df = max_Transport_df.reset_index().rename(columns={'index': 'Years'})


# Extract the 'Years' column as a numpy array
min_oil_gas_years = min_oil_gas_df['Years'].values
max_oil_gas_years = max_oil_gas_df['Years'].values

min_Electricity_years = min_Electricity_df['Years'].values
max_Electricity_years = max_Electricity_df['Years'].values


min_Transport_years = min_Electricity_df['Years'].values
max_Transport_years = max_Electricity_df['Years'].values



# Print the values

print(f"Lowest Oil and Gas Emission {total_canada_min_oil_gas} at {min_oil_gas_years[0][1]}")
print(f"Highest Oil and Gas Emission {total_canada_max_oil_gas} at {max_oil_gas_years[0][1]}")

print(f"Lowest Electricity Emission {total_canada_min_electricity} at {min_Electricity_years[0][1]}")
print(f"Highest Electricity Emission {total_canada_max_electricity} at {max_Electricity_years[0][1]}")

print(f"Lowest Electricity Emission {total_canada_min_transport} at {min_Transport_years[0][1]}")
print(f"Highest Electricity Emission {total_canada_max_transport} at {max_Transport_years[0][1]}")




Lowest Oil and Gas Emission 102 at 1991
Highest Oil and Gas Emission 205 at 2014
Lowest Electricity Emission 56 at 2020
Highest Electricity Emission 129 at 2000
Lowest Electricity Emission 114 at 2020
Highest Electricity Emission 185 at 2000


In [23]:
# Data Source: https://data.ec.gc.ca/data/substances/monitor/canada-s-official-greenhouse-gas-inventory/B-Economic-Sector/?lang=en

provices_result = pd.read_csv("Resources/Economic_Sector_Prov.csv")
provices_result.head()


Unnamed: 0,Year,Region,Index,Source,CO2eq,Unit
0,1990,Alberta,0,Provincial Inventory Total,165.65,Mt
1,1990,Alberta,1,Oil and Gas,61.96,Mt
2,1990,Alberta,2,Electricity,39.76,Mt
3,1990,Alberta,3,Transport,15.78,Mt
4,1990,Alberta,4,Heavy Industry,12.59,Mt


In [24]:
provices_result

Unnamed: 0,Year,Region,Index,Source,CO2eq,Unit
0,1990,Alberta,0,Provincial Inventory Total,165.65,Mt
1,1990,Alberta,1,Oil and Gas,61.96,Mt
2,1990,Alberta,2,Electricity,39.76,Mt
3,1990,Alberta,3,Transport,15.78,Mt
4,1990,Alberta,4,Heavy Industry,12.59,Mt
...,...,...,...,...,...,...
3924,2020,Yukon,3924,Transport,0.41,Mt
3925,2020,Yukon,3925,Heavy Industry,0.03,Mt
3926,2020,Yukon,3926,Buildings,0.04,Mt
3927,2020,Yukon,3927,Waste,0.04,Mt


In [25]:
alberta = provices_result[provices_result["Region"] == "Alberta"]
ontario = provices_result[provices_result["Region"] == "Ontario"]
quebec = provices_result[provices_result["Region"] == "Quebec"]

alberta.loc[:, "CO2eq"] = pd.to_numeric(alberta["CO2eq"], errors="coerce")

alberta_scores_mean = alberta.groupby(["Source"])["CO2eq"].mean()
alberta_scores_median = alberta.groupby(["Source"]).median()
alberta_scores_mode = alberta.groupby(["Source"])["CO2eq"].apply(lambda x: x.mode()[0])
alberta_scores_std = alberta.groupby(["Source"])["CO2eq"].std()
alberta_scores_var = alberta.groupby(["Source"])["CO2eq"].var()
alberta_scores_sem = alberta.groupby(["Source"])["CO2eq"].sem()

alberta_scores = pd.DataFrame({"Mean": alberta_scores_mean,
    "Median": alberta_scores_median["CO2eq"],
    "Mode": alberta_scores_mode,
    "STD": alberta_scores_std,
    "Variance": alberta_scores_var,
    "SEM": alberta_scores_sem
})

alberta_scores.index.name = None

#alberta_scores_df.loc["Source"]== ('Oil and Gas')|alberta_scores_df.loc["Source"]==('Electricity')|alberta_scores_df.loc["Source"]==('Transport')




TypeError: could not convert string to float: 'Alberta'

In [None]:
alberta = provices_result[provices_result["Region"] == "Alberta"]
ontario = provices_result[provices_result["Region"] == "Ontario"]
quebec = provices_result[provices_result["Region"] == "Quebec"]


ontario.loc[:, "CO2eq"] = pd.to_numeric(ontario["CO2eq"], errors="coerce")

ontario_scores_mean = ontario.groupby(["Source"])["CO2eq"].mean()
ontario_scores_median = ontario.groupby(["Source"]).median()
ontario_scores_mode = ontario.groupby(["Source"])["CO2eq"].apply(lambda x: x.mode()[0] if not x.mode().empty else None)
ontario_scores_std = ontario.groupby(["Source"])["CO2eq"].std()
ontario_scores_var = ontario.groupby(["Source"])["CO2eq"].var()
ontario_scores_sem = ontario.groupby(["Source"])["CO2eq"].sem()

ontario_scores = pd.DataFrame({
    "Mean": ontario_scores_mean,
    "Median": ontario_scores_median["CO2eq"],
    "STD": ontario_scores_std,
    "Variance": ontario_scores_var,
    "SEM": ontario_scores_sem
})

ontario_scores_df = ontario_scores.loc[[]]
ontario_scores.index.name = None

ontario_scores


In [None]:
alberta = provices_result[provices_result["Region"] == "Alberta"]
ontario = provices_result[provices_result["Region"] == "Ontario"]
quebec = provices_result[provices_result["Region"] == "Quebec"]

quebec.loc[:, "CO2eq"] = pd.to_numeric(quebec["CO2eq"], errors="coerce")

quebec_scores_mean = quebec.groupby(["Source"])["CO2eq"].mean()
quebec_scores_median = quebec.groupby(["Source"]).median()
quebec_scores_mode = quebec.groupby(["Source"])["CO2eq"].apply(lambda x: x.mode()[0])
quebec_scores_std = quebec.groupby(["Source"])["CO2eq"].std()
quebec_scores_var = quebec.groupby(["Source"])["CO2eq"].var()
quebec_scores_sem = quebec.groupby(["Source"])["CO2eq"].sem()

quebec_scores = pd.DataFrame({
    "Mean": quebec_scores_mean,
    "Median": quebec_scores_median["CO2eq"],
    "Mode": quebec_scores_mode,
    "STD": quebec_scores_std,
    "Variance": quebec_scores_var,
    "SEM": quebec_scores_sem
})

quebec_scores.index.name = None

quebec_scores

In [None]:
   
    
    quartiles = treatment_data.quantile([.25,.5,.75]) 
    lowerq = quartiles[0.25]
    upperq = quartiles[0.75]
    iqr = upperq-lowerq
    lower_bound = lowerq - (1.5*iqr)
    upper_bound = upperq + (1.5*iqr)
    outliers = treatment_data.loc[(treatment_data< lower_bound) | (treatment_data> upper_bound)]
    print(f"{treatment}'s potential outliers: {outliers}")

# VISUALISATION

In [None]:
Canada_df =  transpose_can_result.groupby('Years').sum()[["Oil and Gas","Electricity","Transport"]]
Canada_df 

In [None]:
#Generate a Stacked bar graph of Canada industries for Oil and & gas, Electricity and transport showing GHG emissions per year 

ax = Canada_df.plot.bar(stacked=True, color =['blue', 'tomato','lightseagreen'], figsize=(20,12))
ax.set_title('GHG Emissions for Oil and Gas,Electricity and Transport Industries', fontsize=30)
ax.set_ylim(0,550)

ax.set_xticklabels(["1990","1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020"], rotation=0)

plt.xlabel("Years")
plt.ylabel("Sectors - Mt CO2 eq")


#show cgraph and set layout
plt.show()
plt.tight_layout()


# Toki

In [None]:
#transposed
df_1 = canada_result.set_index(['Sectors - Mt CO2 eq'])
df_2 = df_1.transpose()
#df3 = df2[['NATIONAL TOTAL','Oil and Gas', 'Electricity', 'Transport']]
df_4 = df_2.reset_index()
transpose_can_result_2 = df_4.rename(columns={'index':'Years'})
transpose_can_result_2.head()

In [None]:
#two line graphs and stackplot

y2_nat_ttl = transpose_can_result_2['NATIONAL TOTAL']
y2_oil_gas = transpose_can_result_2['Oil and Gas']
y2_elec = transpose_can_result_2['Electricity']
y2_transp = transpose_can_result_2['Transport']
x2_years = transpose_can_result_2['Years']

plt.plot(x2_years, y2_nat_ttl, label = 'National Total')
plt.plot(x2_years, y2_oil_gas, label = 'Oil and Gas Industry')
plt.plot(x2_years, y2_elec, label = 'Electricty Industry')
plt.plot(x2_years, y2_transp, label = 'Transportation Industry')
plt.legend()
plt.xticks(rotation = 270)
plt.title('Canada GHG Emissions')
plt.show()

print("---------------------------------------------------------------------------")

y_nat_ttl = transpose_can_result_2['NATIONAL TOTAL']
y_oil_gas = transpose_can_result_2['Oil and Gas']
y_elec = transpose_can_result_2['Electricity']
y_transp = transpose_can_result_2['Transport']
y_hvyind = transpose_can_result_2['Heavy Industry']
y_bldgs = transpose_can_result_2['Buildings']
y_agri = transpose_can_result_2['Agriculture']
y_wst = transpose_can_result_2['Waste']
y_coal = transpose_can_result_2['Coal Production']
y_lmcfr = transpose_can_result_2['Light Manufacturing, Construction and Forest Resources']
x_years = transpose_can_result_2['Years']

plt.plot(x_years, y_nat_ttl, label = 'National Total')
plt.plot(x_years, y_oil_gas, label = 'Oil and Gas Industry')
plt.plot(x_years, y_elec, label = 'Electricty Industry')
plt.plot(x_years, y_transp, label = 'Transportation Industry')
plt.plot(x_years, y_hvyind, label = 'Heavy Industry')
plt.plot(x_years, y_bldgs, label = 'Buildings')
plt.plot(x_years, y_agri, label = 'Agriculture')
plt.plot(x_years, y_wst, label = 'Waste')
plt.plot(x_years, y_coal, label = 'Coal Production')
plt.plot(x_years, y_lmcfr, label = 'Others')
plt.legend(bbox_to_anchor = (1.04, 0.5), loc = "center left")
plt.xticks(rotation = 270)
plt.title('Canada GHG Emissions')
plt.show()

print("---------------------------------------------------------------------------")

emissions_df = transpose_can_result_2[['Oil and Gas', 'Electricity', 'Transport', 
                                    'Heavy Industry', 'Buildings', 'Agriculture', 'Waste', 'Coal Production', 
                                    'Light Manufacturing, Construction and Forest Resources']]
emissions_dict = emissions_df.to_dict('list')

years = transpose_can_result_2['Years']


fig, ax = plt.subplots()
ax.stackplot(years, emissions_dict.values(), labels = emissions_dict.keys(), alpha = 0.8)
ax.legend(bbox_to_anchor = (1.04, 0.5), loc = "center left")
plt.xticks(rotation = 270)
plt.title('Canada Total GHG Emission per Industry')
plt.show()