![alt text](https://github.com/callysto/callysto-sample-notebooks/blob/master/notebooks/images/Callysto_Notebook-Banner_Top_06.06.18.jpg?raw=true)

## Case Study: Greenhouse gas emissions, by sector (1990 - 2008)

Greenhouse gas emissions (carbon dioxide equivalents), by industries and households. Industry aggregation is at the L-level of the input-output accounts of Statistics Canada.

Geography: Canada

Table ID 38100111

Source

https://open.canada.ca/data/en/dataset/2d60830b-ee2e-4fb5-8c6c-f241f6bf76ba

In [None]:
%run -i ./stats_can/helpers.py
%run -i ./stats_can/scwds.py
%run -i ./stats_can/sc.py

In [None]:
from ipywidgets import widgets, VBox, HBox, Button
from ipywidgets import Button, Layout, widgets
from IPython.display import display, Javascript, Markdown, HTML
import datetime as dt
import pandas as pd
import json
import datetime
from tqdm import tnrange, tqdm_notebook
from time import sleep

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

style = {'description_width': 'initial'}



In [None]:
# # Download data 
# DATA SET PRODUCT ID  for internal use only. 
productId = '38100111'

        
download_tables(str(productId))


df_fullDATA = zip_table_to_dataframe(productId)


# Clean up full dataset - remove internal use columns
cols = list(df_fullDATA.loc[:,'REF_DATE':'UOM'])+ ['SCALAR_FACTOR'] +  ['VALUE']
df_less = df_fullDATA[cols]
df_less2 = df_less.drop(["DGUID"], axis=1)

# Display only first five entries
df_less2.head()

In [None]:
# Fancy user interface to explore datasets
def rerun_cell( b ):
    
    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1,\
    IPython.notebook.get_selected_index()+3)'))    

    
def run_4cell( b ):
    
    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1,\
    IPython.notebook.get_selected_index()+5)'))    

style = {'description_width': 'initial'}

all_the_widgets = [widgets.Dropdown(
                value = df_less2["Sector"].tolist()[0],
                options = df_less2["Sector"].unique(), 
                description ='Sector:', 
                style = style, 
                disabled=False)]


# Button widget
CD_button = widgets.Button(
    button_style='success',
    description="Preview Dataset", 
    layout=Layout(width='15%', height='30px'),
    style=style
)    

# Connect widget to function - run subsequent cells
CD_button.on_click( rerun_cell )

# user menu using categories found above
tab3 = VBox(children=[HBox(children=all_the_widgets[0:3]),
                      CD_button])
tab = widgets.Tab(children=[tab3])
tab.set_title(0, 'Load Data Subset')
display(tab)

In [None]:
sub_df = df_less2[(df_less2["Sector"]==all_the_widgets[0].value)]

In [None]:
# Time to plot!
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from matplotlib.pyplot import figure
register_matplotlib_converters()
%matplotlib inline

# Actual plot of time series
figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
# Get start and end date, plot value found under "VALUE" command
plt.plot(sub_df["REF_DATE"],sub_df["VALUE"],'b--',label='Value')
plt.xlabel('Year', fontsize=15)
plt.ylabel('Greenhouse Gas Emissions (kilotonnes)',fontsize=15)
# Title changes depending on the subcategory explored
plt.title(str(all_the_widgets[0].value),fontsize=20)
plt.xticks(rotation=90)
plt.grid(True)


In [None]:
#load "cufflinks" library under short name "cf"
import cufflinks as cf

#command to display graphics correctly in Jupyter notebook
cf.go_offline()

def enable_plotly_in_cell():
    import IPython
    from plotly.offline import init_notebook_mode
    display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
  '''))
    init_notebook_mode(connected=False)
    
get_ipython().events.register('pre_run_cell', enable_plotly_in_cell)

In [None]:
# pivot table to display total greenhouse gas emissions, by sector and year
all_data = pd.pivot_table(df_less2[df_less2["Sector"]!="Total, all sectors"], \
                          values='VALUE', index=["REF_DATE"],columns=["Sector"])

<h2 align='center'>Total Greenhouse Gas Emissions by Sector, by year (1990 - 2008)</h2>

In [None]:
all_data

In [None]:
# Plot
title="Boxplot of Greenhouse Gas Emissions by Sector (1990 - 2008)"
print(title)
layout = dict(yaxis=dict(side='left'))

my_fig = all_data.iplot(asFigure=True,kind='box',layout=layout)
my_fig.layout.legend=dict(x=1.0, y=1.8)
my_fig.iplot(filename='line-example.html') 


In [None]:
# Use pivot command to get average
all_data2 = pd.pivot_table(df_less2[df_less2["Sector"]!="Total, all sectors"], \
                           values='VALUE', index=["Sector"], aggfunc=np.average)

<h2 align='center'>Average Greenhouse Gas Emissions by Sector (1990 - 2008)</h2>

In [None]:
all_data2

In [None]:
sorted_sector = all_data2.sort_values(by='VALUE', ascending=False)
sorted_sector = sorted_sector.reset_index("Sector")

In [None]:
sorted_sector.iloc[0:20].iplot(kind="pie",values="VALUE",labels="Sector",title="Average Greenhouse Emissions by Sector") 

In [None]:
all_data.iplot(labels='Sector',legend=False,title="Time Series, Yearly Greenhouse Gas Emissions, by Sector (1990-2008)",xaxis_title="Year",yaxis_title="Greenhouse Gas Emissions (kilotonnes)")

![alt text](https://github.com/callysto/callysto-sample-notebooks/blob/master/notebooks/images/Callysto_Notebook-Banners_Bottom_06.06.18.jpg?raw=true)