In [1]:
from jupyter_dash import JupyterDash
import dash
from dash import dcc
from dash import html
import pandas as pd
import plotly.graph_objects as go

In [2]:
pd.options.display.max_columns = 99
pd.options.display.max_rows = 999

# Requirenments

Description: Emission dashboard to explore individal country contributions, and identify the most polluting nations over time.

- Graph 1: Shows all countries total emissions by population and GDP. Date is adjustable with a slider.  
- Graph 2: Shows detailed emission values for the selected country from Graph 1 over time. There should be an option to select total or per capita values.  
- Graph 3: Shows the top 10 most pollutant nations over time.

### Information about the combined dataset from the World Bank

Contains data in the following categories:

- Country stats:
  - Population, total
  - GDP per capita (constant 2015 US$)  
    
    
- Emission data:
  - CO2 emissions (kt)
  - CO2 emissions (metric tons per capita)
  - Methane emissions (kt of CO2 equivalent)
  - Nitrous oxide emissions (thousand metric tons of CO2 equivalent)

### Read and clean data

In [30]:
# Read in data and drop not needed columns
data_path = 'Data Sources/'
df = pd.read_csv(data_path + 'World Development Indicators.csv')
df = df.drop(columns=['Country Code', 'Series Code'])

# Pivot values to long format and shorten year values
df = df.melt(id_vars = ['Country Name', 'Series Name'], var_name = 'Year', value_name = 'Value').dropna()
df['Year'] = df['Year'].str[0:4]
df['Year'] = pd.to_numeric(df['Year'],errors='coerce')
df['Value'] = pd.to_numeric(df['Value'],errors='coerce')

# Simplify indicator notations
indicator_translation = {
    'Population, total' : 'Population',
    'GDP per capita (constant 2015 US$)' : 'GDP (per capita)',
    'CO2 emissions (kt)' : 'CO2',
    'CO2 emissions (metric tons per capita)' : 'CO2 (per capita)',
    'Methane emissions (kt of CO2 equivalent)' : 'CH4',
    'Nitrous oxide emissions (thousand metric tons of CO2 equivalent)' : 'NOX'
}
df['Series Name'].replace(indicator_translation, inplace=True)

### Calculate per capita values where it is missing

In [31]:
# Get population data, and calculate emission per population, adjust to metric tons
df_emissions_per_capita = (
    df[df['Series Name'].isin(['CH4','NOX'])].merge(
        df[df['Series Name'] == 'Population'], on = ['Country Name', 'Year'], how = 'inner', suffixes = ['','_y'])
    )
df_emissions_per_capita['Value New'] = df_emissions_per_capita['Value'].div(df_emissions_per_capita['Value_y'])*1000

# Prepare new dataset so it can be concatenated to original set
df_emissions_per_capita = df_emissions_per_capita[['Country Name', 'Series Name', 'Year', 'Value New']]
df_emissions_per_capita.rename({'Value New' : 'Value'}, axis = 1, inplace=True)

# Add per capita notations
indicator_translation = {
    'NOX' : 'NOX (per capita)',
    'CH4' : 'CH4 (per capita)'
}
df_emissions_per_capita['Series Name'].replace(indicator_translation, inplace=True)

# Add new per capita values next to the original sub-set of emissions
emissions = ['CO2', 'CO2 (per capita)', 'NOX', 'CH4']
df_emissions = pd.concat([df[df['Series Name'].isin(emissions)],df_emissions_per_capita])

### Rank countries based on total emissions

In [32]:
# Pivot table to enable total calculation
df_emissions_ranked = (
    df_emissions.pivot(index = ['Country Name', 'Year'], columns = ['Series Name'], values = 'Value')
)
df_emissions_ranked.reset_index(inplace=True)

In [33]:
# Calculate totals
df_emissions_ranked['Total Emission'] = df_emissions_ranked['CH4']+df_emissions_ranked['CO2']+df_emissions_ranked['NOX']
df_emissions_ranked['Total Emission (per capita)'] = \
    df_emissions_ranked['CH4 (per capita)']+df_emissions_ranked['CO2 (per capita)']+df_emissions_ranked['NOX (per capita)']

# Rank countries by Total Emission for every year
df_emissions_ranked.loc[:, 'Emission Rank'] = df_emissions_ranked.groupby(['Year'])['Total Emission'].rank(ascending=False)
df_emissions_ranked.loc[:, 'Emission Rank (per capita)'] = df_emissions_ranked.groupby(['Year'])['Total Emission (per capita)'].rank(ascending=False)

# Pivot back to original format
df_emissions_ranked = df_emissions_ranked.melt(id_vars = ['Country Name', 'Year'], var_name = 'Series Name', value_name = 'Value')

In [34]:
# Create final dataframe used to visualize
df_final = pd.concat([df[df['Series Name'].isin(['Population','GDP (per capita)'])],df_emissions_ranked]).dropna()

### Set up dashboard layout

In [36]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)

server = app.server

emission_range = df_final[df_final['Series Name'] == 'Total Emission']['Year'].unique()[:-1]

app.layout = html.Div([
                html.H1('Emission Dashboard'),
                html.Div([
                    html.P('Total emission by: '),
                    dcc.Dropdown(
                            id='crossfilter-meta-type',
                            options=[{'label': i, 'value': i} for i in ['Population', 'GDP (per capita)']],
                            value='Population',
                        )], style = {'display': 'inline-block', 'width': '25%'}),
                html.Div([
                    dcc.Graph(
                        id='crossfilter-population-gdp',
                        clickData={'points': [{'customdata': 'Hungary'}]}
                    )], style={'width': '95%', 'display': 'inline-block', 'padding': '0 20'}),
    
                html.Div(dcc.Slider(
                    id='crossfilter-year-slider',
                    min=emission_range.min(),
                    max=emission_range.max(),
                    value=emission_range.max(),
                    marks={str(year): str(year) for year in emission_range},
                    step=None
                ), style={'width': '95%', 'padding': '0px 20px 20px 20px'}),
                    
                html.Div([
                    html.Div([
                        dcc.RadioItems(
                            id='crossfilter-emission-main-type',
                            options=[{'label': i, 'value': i} for i in ['Total', 'Per capita']],
                            value='Total',
                            labelStyle={'display': 'inline-block'}
                        ),
                        dcc.Graph(id='display-emission-main')
                    ], style={'width': '45%', 'float': 'left', 'display': 'inline-block'}),
                    
                    html.Div([
                        html.Div([
                            dcc.Graph(id='display-emission-rank'),
                        ])
                    ], style={'display': 'inline-block', 'float' : 'right', 'width': '45%'})
                ]),
                    
            ]
            )

In [37]:
@app.callback(
    dash.dependencies.Output('crossfilter-population-gdp', 'figure'),
    [dash.dependencies.Input('crossfilter-meta-type', 'value'),
     dash.dependencies.Input('crossfilter-year-slider', 'value')]
)
def update_population_gdp(meta_type, year_value):
    
    dff = df_final[df_final['Year'] == year_value]    
    dff_x = dff[dff['Series Name'] == meta_type].sort_values(['Country Name'])
    dff_y = dff[dff['Series Name'] == 'Total Emission'].sort_values(['Country Name'])
    
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        y=dff_y['Value'],
        x=dff_x['Value'],
        text=dff_x['Country Name'],
        customdata=dff_x['Country Name'],
        mode='markers',
        marker={
            'size': 20,
            'opacity': 0.7,
            'color': 'purple',
            'line': {'width': 2, 'color': 'blue'}
        }
        ))
    
    fig.update_layout(
        title='Total Emission by Country',
        xaxis={
            'title': meta_type
        },
        yaxis={
            'title': 'Total Emission'
        },
        margin={'l': 40, 'b': 30, 't': 30, 'r': 0},
        height=450,
        hovermode='closest'
    )
        
    return fig

In [38]:
@app.callback(
    dash.dependencies.Output('display-emission-main', 'figure'),
    [dash.dependencies.Input('crossfilter-population-gdp', 'clickData'),
    dash.dependencies.Input('crossfilter-emission-main-type', 'value')]
)
def update_emission_main(hoverData, emission_type):
    country_name = hoverData['points'][0]['customdata']
    if emission_type == 'Total':
        emissions = ['CO2', 'CH4', 'NOX']
    elif emission_type == 'Per capita':
        emissions = ['CO2 (per capita)', 'CH4 (per capita)', 'NOX (per capita)']
        
    df_emission = (
        df_final[(df_final['Country Name'] == country_name) & 
                 (df_final['Series Name'].isin(emissions))].sort_values(['Series Name','Year'])
        )
    
    fig = go.Figure()
    
    for emission in emissions:
        
        fig.add_trace(go.Scatter(
            x=df_emission[df_emission['Series Name'] == emission]['Year'],
            y=df_emission[df_emission['Series Name'] == emission]['Value'],
            mode="lines+markers",
            name=emission
        ))
    
    fig.update_layout(
            title='Emission Details',
            height=225,
            margin = {'l': 30, 'b': 30, 'r': 10, 't': 30},
            annotations = [{
                'x': 0, 'y': 0.85, 'xanchor': 'left', 'yanchor': 'bottom',
                'xref': 'paper', 'yref': 'paper', 'showarrow': False,
                'align': 'left', 'bgcolor': 'rgba(255, 255, 255, 0.5)',
                'text': country_name
            }],
            yaxis={'type': 'linear' },
            xaxis={'showgrid': False}
    )
        
    return fig

In [39]:
@app.callback(
    dash.dependencies.Output('display-emission-rank', 'figure'),
    [dash.dependencies.Input('crossfilter-emission-main-type', 'value')]
)
def update_emission_rank(emission_type):
    df_ranked = (
        df_final[(df_final['Series Name'] == 'Emission Rank') &
                 #(df_final['Value'].isin([i+1 for i in range(10)] + [float('NaN')]))].sort_values(['Country Name','Year', 'Value'])
                 (df_final['Value'].isin([i+1 for i in range(10)]))].sort_values(['Country Name','Year', 'Value'])
        )
    
    fig = go.Figure()
    
    for country in df_ranked['Country Name'].unique():
        
        fig.add_trace(go.Scatter(
            x=df_ranked[df_ranked['Country Name'] == country]['Year'],
            y=df_ranked[df_ranked['Country Name'] == country]['Value'],
            name=country,
            showlegend=True,
            mode='lines+markers'
        ))
    
    fig.update_layout(
        title='Top Polluters Ranked',
        height= 225,
        margin= {'l': 30, 'b': 30, 'r': 10, 't': 30},
        annotations= [{
            'x': 0, 'y': 0.85, 'xanchor': 'left', 'yanchor': 'bottom',
            'xref': 'paper', 'yref': 'paper', 'showarrow': False,
            'align': 'left', 'bgcolor': 'rgba(255, 255, 255, 0.5)',
            'text': 'title'
        }],
        yaxis={'type': 'linear', 'autorange' : 'reversed' },
        xaxis={'showgrid': False}
    )
    
    return fig

In [40]:
final_page = app.run_server()

Dash app running on http://127.0.0.1:8050/
