# Global energy statistics visualisation

This jupyter notebook has been created to visualise the global energy statistics dataset from this kaggle dataset: https://www.kaggle.com/datasets/akhiljethwa/global-electricity-statistics/

## Module Loading and Data Import

In [None]:
# For Data Analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# For Data Visualization
import plotly.express as px
import plotly.io as pio

# Addidtional Imports
from termcolor import colored
import country_converter as coco
import datetime
import calendar
import json

In [None]:
# Set Plotly Template
pio.templates.default = "plotly_dark"

In [None]:
data = pd.read_csv("./data/Global Electricity Statistics.csv")
map_json = json.load(open("./data/countries.json", "r"))

## Preparing dataframes

For our visualization we will need to preprocess our data a little.

**Get list of countries from geojson and prepare dataframe with Country and Year columns**

To get countries:
- iterate over "features" in map gejson and extract "geounit" name form feature's "properties"
- use set() to remove possible duplicates
- you can use asterisk (*) to unpack iterables into a list
- sort countries by name 

In [None]:
countries = []

for feature in map_json["features"]:
      countries.append(feature["properties"]["geounit"])

countries=[*set(countries)]
countries.sort()

Now lets create dataframe, that will have columns Country and Year and **one row for every country-year combination in years 1942-2022** (those present in the dataset).
We can get this by:
- creating dataframe with countries (just turn the list into df)
- creating dataframe with years (creat list of years and turn it to df)
- creating a Caretsian product of the two dataframes

In [None]:
country_df = pd.DataFrame(countries, columns =['Country'])
year_df = pd.DataFrame(list(range(1980, 2022)), columns =['Year'])

countries_df = country_df.merge(year_df, how='cross')

## Filling the Dataframes (Splitting, Aggregation, Counting, ...)

In [None]:
data

In [None]:
data.dtypes

### Preprocess data types:



In [None]:
# Convert all "--" and "ie" into NaN

data = data.replace("--", np.nan)
data = data.replace("ie", np.nan)

In [None]:
# Convert Country, Region and Features into categorical data

data["Country"] = data["Country"].apply(lambda x: x.strip()).astype("category")
data["Region"] = data["Region"].apply(lambda x: x.strip()).astype("category")
data["Features"] = data["Features"].apply(lambda x: x.strip()).astype("category")

In [None]:
# Convert all other columns into float

data.iloc[:, 3:] = data.iloc[:, 3:].astype("float")

In [None]:
data.dtypes

### Splitting:

In [None]:
data["Features"].unique()

In [None]:
data.sort_values(by=['Country'], inplace=True)

In [None]:
data_net_generation = data[data["Features"] == "net generation"].copy()
data_net_comsumption = data[data["Features"] == "net consumption"].copy()
data_imports = data[data["Features"] == "imports"].copy()
data_exports = data[data["Features"] == "exports"].copy()
data_net_imports = data[data["Features"] == "net imports"].copy()
data_installed_capacity = data[data["Features"] == "installed capacity"].copy()
data_distribution_losses = data[data["Features"] == "distribution losses"].copy()

In [None]:
data_net_generation.drop(columns=["Features"], inplace=True)
data_net_comsumption.drop(columns=["Features"], inplace=True)
data_imports.drop(columns=["Features"], inplace=True)
data_exports.drop(columns=["Features"], inplace=True)
data_net_imports.drop(columns=["Features"], inplace=True)
data_installed_capacity.drop(columns=["Features"], inplace=True)
data_distribution_losses.drop(columns=["Features"], inplace=True)

data_net_generation.reset_index(drop=True, inplace=True)
data_net_comsumption.reset_index(drop=True, inplace=True)
data_imports.reset_index(drop=True, inplace=True)
data_exports.reset_index(drop=True, inplace=True)
data_net_imports.reset_index(drop=True, inplace=True)
data_installed_capacity.reset_index(drop=True, inplace=True)
data_distribution_losses.reset_index(drop=True, inplace=True)

In [None]:
data_net_generation[data_net_generation["Country"].str.contains("Belarus")]

### Basic plots:

In [None]:
## Get the generation for countries for a given year:

year = 2021

data_net_generation_year = data_net_generation.filter(["Country", str(year)], axis=1).copy()
data_net_generation_year.rename(columns={str(year): "Net_Generation"}, inplace=True)

## We need to filter out NA values
# data_net_generation_year["Net_Generation"].dropna(inplace=True)

country = coco.convert(names = data_net_generation_year['Country'], to = "ISO3")
data_net_generation_year['Country_Short'] = country
data_net_generation_year.head()

fig = px.choropleth(data_frame=data_net_generation_year,
                    locations = "Country_Short",
                    color = "Net_Generation",
                    range_color=(0, max(data_net_generation_year["Net_Generation"])),
                    color_continuous_scale = [[0, '#0d0887'],
                      [0.01, '#46039f'],
                      [0.03, '#7201a8'],
                      [0.5, '#9c179e'],
                      [0.7, '#bd3786'],
                      [0.9, '#d8576b'],
                      [1, '#ed7953']
                     ],
                    #  focus='south america',
                    title = f'Global Electricity Generation in {year}',
                    hover_name = "Country",
                    hover_data = ["Net_Generation"],)
fig.update_layout(height=500, width=800)
fig.show()

In [None]:
data["Features"].unique()

In [None]:
def get_map(year: int, feature_type: str):
    data_feature: pd.DataFrame = data[data["Features"] == feature_type].copy()
    data_feature.drop(columns=["Features"], inplace=True)
    data_feature.reset_index(drop=True, inplace=True)

    data_feature_year = data_feature.filter(["Country", str(year)], axis=1).copy()
    data_feature_year.rename(columns={str(year): feature_type}, inplace=True)

    country = coco.convert(names = data_feature_year['Country'], to = "ISO3")
    data_feature_year['Country_Short'] = country

    fig = px.choropleth(data_frame=data_feature_year,
                        locations = "Country_Short",
                        color = feature_type,
                        range_color=(0, max(data_feature_year[feature_type])),
                        color_continuous_scale = [[0, '#0d0887'],
                        [0.01, '#46039f'],
                        [0.03, '#7201a8'],
                        [0.5, '#9c179e'],
                        [0.7, '#bd3786'],
                        [0.9, '#d8576b'],
                        [1, '#ed7953']
                        ],
                        #  focus='south america',
                        title = f'Global Electricity {feature_type} in {year}',
                        hover_name = "Country",
                        hover_data = [feature_type],)
    fig.update_layout(height=500, width=800)
    return fig

get_map(2021, "net imports")

In [None]:
from dash import Dash, html, dcc, Input, Output, dash_table

app = Dash(__name__)

#*******APP LAYOUT**************

app.layout = html.Div(

    style={'backgroundColor':'#323130',
        'height': '100%',
        'color': 'white',
        'margin': 0,
        'padding': '15px' 
    }, 

    children=[
        html.H1(
            children='Global Electricity Statistics',
            style={
                'textAlign': 'center',
                'color': 'white'
            }
        ),

        html.Div(children='''Dash: A web application framework for Python.''',
                style={
                    'textAlign': 'center',
                    'color': 'white'
                }
        ),

        dcc.Dropdown(
            id='country_dropdown',
            options=[{'label': i, 'value': i} for i in countries],
            value='United States of America'
        ),

        html.Div(children=[
            'Content type:', 
            dcc.RadioItems(data["Features"].unique(), #options
                           'net generation', #
                           id='content',
                           inline=True)],
            style={'width': '100%', 'display': 'inline-flex'}
        ),

        dcc.Graph(
            id='graph',
            figure=fig
        ),

        dcc.Markdown('**Years**'),
        html.Div([
            dcc.Slider(1980, 2021, step = 1, value=2021, id='slider',
                    marks={i: '{}'.format(i) for i in range(1980,2021,10)},
                    tooltip={'placement': 'bottom', 'always_visible': True})
            ], 
        )
    ],
)

#**************FUNCTIONS*****************************

def get_map(year: int, feature_type: str):
    data_feature: pd.DataFrame = data[data["Features"] == feature_type].copy()
    data_feature.drop(columns=["Features"], inplace=True)
    data_feature.reset_index(drop=True, inplace=True)

    data_feature_year = data_feature.filter(["Country", str(year)], axis=1).copy()
    data_feature_year.rename(columns={str(year): feature_type}, inplace=True)

    country = coco.convert(names = data_feature_year['Country'], to = "ISO3")
    data_feature_year['Country_Short'] = country

    fig = px.choropleth(data_frame=data_feature_year,
                        locations = "Country_Short",
                        color = feature_type,
                        range_color=(0, max(data_feature_year[feature_type])),
                        color_continuous_scale = [[0, '#0d0887'],
                        [0.01, '#46039f'],
                        [0.03, '#7201a8'],
                        [0.5, '#9c179e'],
                        [0.7, '#bd3786'],
                        [0.9, '#d8576b'],
                        [1, '#ed7953']
                        ],
                        #  focus='south america',
                        title = f'Global Electricity {feature_type} in {year}',
                        hover_name = "Country",
                        hover_data = [feature_type],)
    fig.update_layout(height=500, width=800)
    return fig

#*************CALLBACKS*****************************************

#radio/slider->map
@app.callback(
    Output('graph', 'figure'),
    Input('slider', 'value'),
    Input('content', 'value')
)
def update_map(year, feature_type):
    fig = get_map(year, feature_type)
    return fig

#timeline->slider
"""@app.callback(
    Output('slider', 'value'),
    Input('timeline', 'clickData')
)
def update_year(clickData):
    year = 2021
    if clickData is not None:
        year = clickData['points'][0]['x']
    return year
"""

#********RUNNING THE APP*************************************************
if __name__ == '__main__':
    app.run_server(debug=True, port="8999") # inline/tab/external jupyter_mode="external", 