# Global energy statistics visualisation

This jupyter notebook has been created to visualise the global energy statistics dataset from this kaggle dataset: https://www.kaggle.com/datasets/akhiljethwa/global-electricity-statistics/

## Module Loading and Data Import

In [None]:
# For Data Analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# For Data Visualization
import plotly.express as px
import plotly.io as pio

# Addidtional Imports
from termcolor import colored
import country_converter as coco
import datetime
import calendar
import json

In [None]:
# Set Plotly Template
pio.templates.default = "plotly_dark"

In [None]:
data = pd.read_csv("./data/Global Electricity Statistics.csv")
#map_json = json.load(open("./data/countries.json", "r"))

## Preparing dataframes

For our visualization we will need to preprocess our data a little.

**Get list of countries from geojson and prepare dataframe with Country and Year columns**

To get countries:
- iterate over "features" in map gejson and extract "geounit" name form feature's "properties"
- use set() to remove possible duplicates
- you can use asterisk (*) to unpack iterables into a list
- sort countries by name 

In [None]:
# countries = []
# 
# for feature in map_json["features"]:
#       countries.append(feature["properties"]["geounit"])
# 
# countries=[*set(countries)]
# countries.sort()

Now lets create dataframe, that will have columns Country and Year and **one row for every country-year combination in years 1942-2022** (those present in the dataset).
We can get this by:
- creating dataframe with countries (just turn the list into df)
- creating dataframe with years (creat list of years and turn it to df)
- creating a Caretsian product of the two dataframes

In [None]:
# country_df = pd.DataFrame(countries, columns =['Country'])
# year_df = pd.DataFrame(list(range(1980, 2022)), columns =['Year'])
# 
# countries_df = country_df.merge(year_df, how='cross')

## Filling the Dataframes (Splitting, Aggregation, Counting, ...)

In [None]:
data

In [None]:
data.dtypes

In [None]:
melted_df = pd.melt(data, id_vars=['Country', 'Region', "Features"], var_name='Year', value_name='Value')

melted_df

In [None]:
# Step 4: Rename the columns
melted_df.columns = ['Country', 'Region', 'Features', 'Year', 'Value']

# Now 'melted_df' should have the desired transposed format
melted_df

### Preprocess data types:



In [None]:
data = melted_df

In [None]:
# Convert all "--" and "ie" into NaN

data = data.replace("--", np.nan)
data = data.replace("ie", np.nan)

In [None]:
# Convert Country, Region and Features into categorical data

data["Country"] = data["Country"].apply(lambda x: x.strip()).astype("category")
data["Region"] = data["Region"].apply(lambda x: x.strip()).astype("category")
data["Features"] = data["Features"].apply(lambda x: x.strip()).astype("category")
data["Year"] = data["Year"].astype("category")
data["Value"] = data["Value"].astype("float")

In [None]:
# Pivot the dataframe to get the desired format
new_df = data.set_index(['Country', 'Region', 'Year', 'Features']).unstack('Features').reset_index()

# Flatten the multi-level column index
new_df.columns = [col[1] if col[1] else col[0] for col in new_df.columns]

# Display the resulting dataframe
new_df

In [None]:
country_names_unique = new_df["Country"].unique()

In [None]:
new_df.dtypes

In [None]:
data.dtypes

In [None]:
# Rename countries 

data["Country"] = data["Country"].replace("Former U.S.S.R.", "USSR")

In [None]:
data.dtypes

### Splitting:

In [None]:
data["Features"].unique()

In [None]:
data.sort_values(by=['Country'], inplace=True)

In [None]:
data_net_generation = data[data["Features"] == "net generation"].copy()
data_net_comsumption = data[data["Features"] == "net consumption"].copy()
data_imports = data[data["Features"] == "imports"].copy()
data_exports = data[data["Features"] == "exports"].copy()
data_net_imports = data[data["Features"] == "net imports"].copy()
data_installed_capacity = data[data["Features"] == "installed capacity"].copy()
data_distribution_losses = data[data["Features"] == "distribution losses"].copy()

In [None]:
data_net_generation.drop(columns=["Features"], inplace=True)
data_net_comsumption.drop(columns=["Features"], inplace=True)
data_imports.drop(columns=["Features"], inplace=True)
data_exports.drop(columns=["Features"], inplace=True)
data_net_imports.drop(columns=["Features"], inplace=True)
data_installed_capacity.drop(columns=["Features"], inplace=True)
data_distribution_losses.drop(columns=["Features"], inplace=True)

data_net_generation.reset_index(drop=True, inplace=True)
data_net_comsumption.reset_index(drop=True, inplace=True)
data_imports.reset_index(drop=True, inplace=True)
data_exports.reset_index(drop=True, inplace=True)
data_net_imports.reset_index(drop=True, inplace=True)
data_installed_capacity.reset_index(drop=True, inplace=True)
data_distribution_losses.reset_index(drop=True, inplace=True)

In [None]:
data_distribution_losses

In [None]:
data_net_generation[data_net_generation["Country"] == "Belarus"]

There are some problems with renaming, specifically:

- Former Serbia and Montenegro not found in regex
- USSR not found in regex
- Former Yugoslavia not found in regex
- Germany, East not found in regex
- Germany, West not found in regex
- Hawaiian Trade Zone not found in regex
- Netherlands Antilles not found in regex
- U.S. Pacific Islands not found in regex
- U.S. Territories not found in regex
- Wake Island not found in regex

In [None]:
countries_short = coco.convert(names = data_net_generation['Country'], to='name_short')
country_iso3 = coco.convert(names = data_net_generation['Country'], to = "ISO3")

# TODO : Add data for 

### Basic plots:

In [None]:
def get_map_prototype(year: int, feature_type: str):
    data_feature: pd.DataFrame = data[data["Features"] == feature_type].copy()
    data_feature.drop(columns=["Features"], inplace=True)
    data_feature.reset_index(drop=True, inplace=True)

    data_feature['Country_Short'] = country_iso3

    fig = px.choropleth(data_frame=data_feature,
                        locations = "Country_Short",
                        color = "Value",
                        range_color=(min(data_feature['Value']), max(data_feature["Value"])),
                        color_continuous_scale = [[0, '#0d0887'],
                        [0.01, '#46039f'],
                        [0.03, '#7201a8'],
                        [0.5, '#9c179e'],
                        [0.7, '#bd3786'],
                        [0.9, '#d8576b'],
                        [1, '#ed7953']
                        ],
                        #  focus='south america',
                        title = f'Global Electricity {feature_type} in {year}',
                        hover_name = "Country",
                        hover_data = ["Value"],)
    fig.update_layout(height=500, width=800)
    return fig

get_map_prototype(2021, "net imports")

In [None]:
timeline = new_df[new_df["Country"] == "Afghanistan"].copy()

bars = px.bar(timeline, x="Year", y=['net generation', 'net consumption', 'imports', 'exports', 'net imports', 'installed capacity', 'distribution losses'], 
              color_discrete_sequence=["white", "red", "blue", "green", "orange", "pink", "brown"], title="Afghanistan's Net Generation",
              barmode='group')

bars.update_layout(
    xaxis = dict(
        tickmode = 'linear',
        tick0 = 1980,
        dtick = 1
    )
)

bars.show()

In [None]:
def get_barplot_for_country_year_prototype(country: str):
    timeline = new_df[new_df["Country"] == country].copy()
    bars = px.bar(timeline, x="Year", y=['net generation', 'net consumption', 'imports', 'exports', 'net imports', 'installed capacity', 'distribution losses'], 
              color_discrete_sequence=["white", "red", "blue", "green", "orange", "pink", "brown"], title=f"{country}'s Net Generation",
              barmode='group')
    bars.update_layout(
        xaxis = dict(
            tickmode = 'linear',
            tick0 = 1980,
            dtick = 1
        )
    )
    return bars

get_barplot_for_country_year_prototype("Afghanistan")

# Main dashly app

In [None]:
from dash import Dash, html, dcc, Input, Output, dash_table, State, callback_context, no_update

app = Dash(__name__, suppress_callback_exceptions=True)

#*******APP LAYOUT**************

app.layout = html.Div(

    style={'backgroundColor':'#323130',
        'height': '100%',
        'color': 'white',
        'margin': 0,
        'padding': '15px' 
    }, 

    children=[
        html.H1(
            children='Global Electricity Statistics',
            style={
                'textAlign': 'center',
                'color': 'white'
            }
        ),

        html.Div(children='''Dash: A web application framework for Python.''',
                style={
                    'textAlign': 'center',
                    'color': 'white'
                }
        ),

        dcc.Dropdown(
            id='country_dropdown',
            options=[{'label': i, 'value': i} for i in country_names_unique],
            value='Belgium',
            style={'color': 'black'}
        ),

        html.Div([
            
            dcc.Graph(
                id='main_map',
                #figure=fig,
                style={'width': '100%', 'display': 'inline-block'}
            ),
        
        ], id="main_component"),
        
        html.Div(id="controls", children= [    
            html.Div([
                html.Div([
                    dcc.Markdown('**Features**'),
                    dcc.Dropdown(options=[{'label': feat, 'value': feat} for feat in data["Features"]],
                                value='net generation',
                                id='feature_type',
                                style={'color': 'black'}),
                ], style={'width': '20%', 'display': 'inline-block', 'margin-right': '2%'}),

                html.Div([
                    dcc.Markdown('**Years**'),
                    dcc.Slider(1980, 2021, step=1, value=2021, id='slider',
                            marks={i: '{}'.format(i) for i in range(1980, 2021, 10)},
                            tooltip={'placement': 'bottom', 'always_visible': True}),
                ], style={'width': '55%', 'display': 'inline-block', 'margin-right': '2%'}),

                html.Div([
                    html.Button('Compare countries', id='comparison_button', style={'margin-right': '5%', 'height': '100%'}),
                    html.Button('Main map mode', id='main_map_button', style={'height': '100%'}),
                ], style={'width': '20%', 'display': 'inline-block'}),
            ], style={'height': '100vh'}),
        ]),
    ],
)

#**************FUNCTIONS*****************************

def get_map(year: int, feature_type: str):
    if year is None:
        year = 2021
        
    if feature_type is None:
        feature_type = "net generation"
    
    data_feature: pd.DataFrame = data[data["Features"] == feature_type].copy()
    data_feature.drop(columns=["Features"], inplace=True)
    data_feature.reset_index(drop=True, inplace=True)

    data_feature['Country_Short'] = country_iso3

    fig = px.choropleth(data_frame=data_feature,
                        locations = "Country_Short",
                        color = "Value",
                        range_color=(min(data_feature['Value']), max(data_feature["Value"])),
                        color_continuous_scale = [[0, '#0d0887'],
                        [0.01, '#46039f'],
                        [0.03, '#7201a8'],
                        [0.5, '#9c179e'],
                        [0.7, '#bd3786'],
                        [0.9, '#d8576b'],
                        [1, '#ed7953']
                        ],
                        #  focus='south america',
                        title = f'Global Electricity {feature_type} in {year}',
                        hover_name = "Country",
                        hover_data = ["Value"],)
    fig.update_layout(height=500, width=800)
    return fig

def get_barplot_for_country_year(country: str):
    if country is None:
        country = "Belgium"
    
    timeline = new_df[new_df["Country"] == country].copy()
    bars = px.bar(timeline, x="Year", y=['net generation', 'net consumption', 'imports', 'exports', 'net imports', 'installed capacity', 'distribution losses'], 
              color_discrete_sequence=["white", "red", "blue", "green", "orange", "pink", "brown"], title=f"{country}'s Net Generation",
              barmode='group')
    bars.update_layout(
        xaxis = dict(
            tickmode = 'linear',
            tick0 = 1980,
            dtick = 1
        )
    )
    return bars

#*************CALLBACKS*****************************************

#radio/slider->map
@app.callback(
    Output('main_map', 'figure'),
    Input('slider', 'value'),
    Input('feature_type', 'value')
)
def update_map(year, feature_type):
    fig = get_map(year, feature_type)
    return fig


@app.callback(
    Output('country_features_over_time', 'figure'),
    Input('country_dropdown', 'value')
)
def update_country_features_over_time(country):
    fig = get_barplot_for_country_year(country)
    return fig

"""
@app.callback(Output('main_component', 'children'),
              State('slider', 'value'),
              Input('main_map', 'clickData'),
              State('feature_type', 'value'))
def country_specific_mode(clickData, selected_year, selected_feature):
    print(clickData)
    country = 'Belgium'
    if clickData is not None:
        country = clickData['points'][0]['location']
    fig = get_barplot_for_country_year(country)
    
    map = get_map(2021, "net generation")
    return [dcc.Graph(figure=fig, id='country_features_over_time'), dcc.Graph(figure=map, id='main_map')]
"""

@app.callback(
    Output('main_component', 'children'),
    Input('comparison_button', 'n_clicks'),
    Input('main_map_button', 'n_clicks'),
    Input('country_dropdown', 'value'),
    State('slider', 'value'),
    State('feature_type', 'value')
)
def update_main_component(compare_clicks, main_map_clicks, selected_country, selected_year, selected_feature):
    print("update_main_component")
    print(compare_clicks)
    print(main_map_clicks)
    ctx = callback_context

    if not ctx.triggered:
        return no_update

    button_id = ctx.triggered[0]['prop_id'].split('.')[0]

    if button_id == 'comparison_button':
        # Comparison mode
        print("Comparison mode")
        fig = get_barplot_for_country_year(selected_country)
        return dcc.Graph(figure=fig, id='country_features_over_time')
    elif button_id == 'main_map_button':
        # Main map mode
        print("Main map mode")
        fig = get_map(selected_year, selected_feature)
        return dcc.Graph(figure=fig, id='main_map')
    else:
        # Country-specific mode
        print("Country-specific mode")
        fig = get_barplot_for_country_year(selected_country)
        return dcc.Graph(figure=fig, id='country_features_over_time')



#********RUNNING THE APP*************************************************
if __name__ == '__main__':
    app.run_server(debug=True, port="8999", jupyter_mode="external") # inline/tab/external jupyter_mode="external", 

TODO:

- Add country detail screen on country click 
- Add more graphs/components for the whole visualisation
- Fixup the styling