In [1]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from dash import Dash, html, dash_table, dcc, Output, Input
import dash_bootstrap_components as dbc


# Importing and cleaning data

In [2]:
power_df = pd.read_csv("global_power_plant_database.csv", encoding='utf-8', dtype={'other_fuel3': str})
countries_df = pd.read_csv("list-of-countries-by-continent-2024.csv", encoding='utf-8', dtype={'country': str, 'continent': str})
power_df.shape

(34936, 36)


Update country names in power_df:
| Old Name                           | New Name                |
|------------------------------------|-------------------------|
| Swaziland                          | Eswatini                |
| Brunei Darussalam                  | Brunei                  |
| Congo                              | Republic of the Congo   |
| Cote D'Ivoire                      | Ivory Coast             |
| Democratic Republic of the Congo   | DR Congo                |
| Macedonia                          | North Macedonia         |
| Syrian Arab Republic               | Syria                   |
| Antarctica                         | New Zealand             |

Add records to country-continent CSV file for:
- United States of America (previously 'United States')
- Kosovo (missing)

In [3]:
replacement_dict={
    'Swaziland': 'Eswatini',
    'Brunei Darussalam': 'Brunei',
    'Congo': 'Republic of the Congo',
    'Cote DIvoire': 'Ivory Coast',
    'Democratic Republic of the Congo': 'DR Congo',
    'Macedonia': 'North Macedonia',
    'Syrian Arab Republic': 'Syria',
    'Antarctica': 'New Zealand'
}

power_df['country_long'] = power_df['country_long'].replace(replacement_dict)

In [4]:
# Uncomment if first time running notebook
# additional_countries = pd.DataFrame({"country": ["United States of America", "Kosovo"],
#                                     "continent": ["North America", "Europe"],
#                                     "unMember": ["true", "false"]})

# with open("list-of-countries-by-continent-2024.csv", 'a') as f:
#     additional_countries.to_csv(f, header=False, index=False)

In [5]:
power_df = power_df.merge(countries_df, how='left', left_on='country_long', right_on='country', suffixes=('_short', ''))

In [6]:
power_df.shape  # No duplicates compared to first import as rows are the same, with 3 columns added from country-continent dataframe

(34936, 39)

In [7]:
normalised_missing_values = power_df.isna().sum()/power_df.shape[0]
normalised_missing_values

country_short                     0.000000
country_long                      0.000000
name                              0.000000
gppd_idnr                         0.000000
capacity_mw                       0.000000
latitude                          0.000000
longitude                         0.000000
primary_fuel                      0.000000
other_fuel1                       0.944355
other_fuel2                       0.992100
other_fuel3                       0.997367
commissioning_year                0.500601
owner                             0.402679
source                            0.000429
url                               0.000515
geolocation_source                0.011993
wepp_id                           0.535322
year_of_capacity_data             0.573878
generation_gwh_2013               0.816321
generation_gwh_2014               0.793165
generation_gwh_2015               0.765199
generation_gwh_2016               0.738264
generation_gwh_2017               0.728074
generation_

In [8]:
power_df[power_df['unMember'].isna()] # Show records with missing continents

Unnamed: 0,country_short,country_long,name,gppd_idnr,capacity_mw,latitude,longitude,primary_fuel,other_fuel1,other_fuel2,...,estimated_generation_gwh_2016,estimated_generation_gwh_2017,estimated_generation_note_2013,estimated_generation_note_2014,estimated_generation_note_2015,estimated_generation_note_2016,estimated_generation_note_2017,country,continent,unMember


# Creating new features for analysis

Creating new columns for renewable vs non-renewable energy and 'Other' primary fuel_type by continent and world categories for data visualisation. Aim is that any primary fuel type that represents less than 1% of the output for a continent will be added to an 'Other' category.

In [9]:
fuel_type_capacity_percent = power_df.groupby(by='primary_fuel')['capacity_mw'].sum().sort_values(ascending=False)/power_df['capacity_mw'].sum()*100
main_fuel_types = fuel_type_capacity_percent[fuel_type_capacity_percent>=1]
main_fuel_types.astype('int')

primary_fuel
Coal       34
Gas        26
Hydro      18
Nuclear     7
Wind        4
Oil         4
Solar       3
Name: capacity_mw, dtype: int64

In [10]:
main_fuel_types.sum()  # Main primary fuels account for 99% of global capacity

np.float64(98.70215486671493)

In [11]:
print(power_df['primary_fuel'].unique())
renewable_energies = ['Hydro', 'Solar', 'Wind', 'Biomass', 'Wave and Tidal', 'Geothermal']
non_renewable_energies = ['Nuclear', 'Coal', 'Gas', 'Oil', 'Petcoke',]
other = ['Other', 'Waste', 'Cogeneration', 'Storage']

conditions = [
    power_df['primary_fuel'].isin(renewable_energies),
    power_df['primary_fuel'].isin(non_renewable_energies),
    power_df['primary_fuel'].isin(other)
]

# Define the categories
choices = ['Renewable', 'Non-Renewable', 'Other']

# Create new column
power_df['renew_energy'] = np.select(conditions, choices, default='Unknown')

['Hydro' 'Solar' 'Gas' 'Other' 'Oil' 'Wind' 'Nuclear' 'Coal' 'Waste'
 'Biomass' 'Wave and Tidal' 'Petcoke' 'Geothermal' 'Storage'
 'Cogeneration']


In [12]:
# Creating new column called text to use as data that shows ehn we hover over points in the world map figure
power_df['text'] = power_df['name'] + ', ' + power_df['country']

# Creating WebApp with Dash and Plotly

### Defining Color Maps

In [13]:
# Defining colour maps to be reused across plots
fuel_color_map = {
    # Renewable Energy Sources
    'Hydro': '#2ECC71',        # Light Green
    'Solar': '#F1C40F',        # Yellow
    'Wind': '#3498DB',         # Light Blue
    'Biomass': '#27AE60',      # Green
    'Wave and Tidal': '#D35400',  # Teal
    'Geothermal': '#9B59B6',   # Purple

    # Non-Renewable Energy Sources
    'Nuclear': '#34495E',      # Dark Blue-Grey
    'Coal': '#E74C3C',         # Red
    'Gas': '#7F8C8D',          # Dark Orange
    'Oil': '#1ABC9C',          # Darker Orange
    'Petcoke': '#95A5A6',      # Grey

    # Other Categories
    'Other': '#BDC3C7',        # Light Grey
    'Waste': '#E67E22',        # Dark Grey
    'Cogeneration': '#16A085', # Dark Teal
    'Storage': '#2980B9',      # Blue

    # Category Labels (Overall)
    'Renewable': '#2ECC71',    # Light Green
    'Non-Renewable': '#E74C3C', # Red
}


In [14]:

custom_colors = [
    "#FF5733",  # Red-Orange
    "#33FF57",  # Green
    "#3357FF",  # Blue
    "#F39C12",  # Orange
    "#9B59B6",  # Purple
    "#E74C3C",  # Red
    "#1ABC9C",  # Teal
    "#8E44AD",  # Dark Purple
    "#3498DB",  # Light Blue
    "#2ECC71",  # Light Green
    "#E67E22",  # Dark Orange
    "#ECF0F1",  # Light Grey
    "#95A5A6",  # Grey
    "#34495E",  # Dark Blue-Grey
    "#16A085"   # Dark Teal
]

### Defining unique plotting functions

In [15]:
# Function for making the title card
def drawTitle():
    return html.Div([
        dbc.Card(
            dbc.CardBody([
                html.Div([
                    html.H1("Dashboard for EDA of Global Power Plants Database"),
                        ], style={'textAlign': 'center',  'color': 'white'}) 
            ])
        ),
    ])

In [16]:
# For making the dropdown option
def drawDropdown():
    return html.Div([
        dbc.Card(
            dbc.CardBody([
                html.Div([
                    html.H3("Select Continent:"),
                ], style={'textAlign': 'center',  'color': 'white'}),
                dcc.Dropdown(id='cont_dropdown',
                        options = [{'label': continent, 'value': continent} for continent in power_df['continent'].unique()] + [{'label': 'World', 'value': 'All'}],
                        value='Europe'), 
            ])
        ),
    ])

In [17]:
# For making the table
def drawTable():
    return html.Div([
        dbc.Card(
            dbc.CardBody([
                html.H5("Power Plants Table Sorted by Capacity", style={'textAlign': 'center', 'color': 'white'}),
                html.Br(),
                dash_table.DataTable(
                    id='power-plants-table',
                    columns=[
                        {'name': 'Country', 'id': 'country_long'},
                        {'name': 'Name', 'id': 'name'},
                        {'name': 'Capacity (MW)', 'id': 'capacity_mw'},
                        {'name': 'Primary Fuel', 'id': 'primary_fuel'}
                    ],
                    data=[],
                    page_size=13,
                    style_header={
                        'backgroundColor': '#1f1f1f',  # Dark background for header
                        'color': 'white',              # White text for header
                        'border': '1px solid #444'     # Border color for header
                    },
                    style_cell={
                        'backgroundColor': '#333333',  # Dark background for cells
                        'color': 'white',              # White text for cells
                        'textAlign': 'left',           # Align text to the left
                        'border': '1px solid #444'     # Border color for cells
                    },
                    style_data_conditional=[
                        {
                            'if': {'row_index': 'odd'},  # Alternate row styling for better readability
                            'backgroundColor': '#2d2d2d'  # Slightly lighter dark background for odd rows
                        }
                    ],
                    fill_width=True
                )
            ])
        ),
    ])

In [18]:
# For making PieChart
def drawPieChart():
    return  html.Div([
        dbc.Card(
            dbc.CardBody([html.H5('Proportion of Total Power Plant Capacity', style={'textAlign': 'center', 'color': 'white'}),
                          dcc.RadioItems(id='pie_radioitems',
                        options=[{'label':' Primary Fuel', 'value':'primary_fuel'},
                                {'label':' Renewables', 'value':'renew_energy'}],
                        value='primary_fuel',
                        inline=True),
                        dcc.Graph(id='pie_chart_cap'),
            ])
        ),  
    ])

In [19]:
# For making the geo map
map_columns=['country_long', 'name', 'capacity_mw', 'primary_fuel', 'longitude', 'latitude', 'text']
map_df = power_df.sort_values(by='capacity_mw', ascending=False).head(500)[map_columns]
map_fig = px.scatter_geo(
    data_frame=map_df,
    lat='latitude',
    lon='longitude',
    color='primary_fuel',
    hover_name='text',
    color_discrete_sequence=custom_colors,
    size='capacity_mw',
    opacity=0.7,
    projection='natural earth',
)

# Update marker borders for better visibility
map_fig.update_traces(
    marker=dict(
        line=dict(
            color='black',  
            width=0.1  
        )
    )
)

# Changing map appearance to darkmode
map_fig.update_layout(
    template='plotly_dark',
    plot_bgcolor= 'rgba(0, 0, 0, 0)',
    paper_bgcolor= 'rgba(0, 0, 0, 0)',
    font=dict(color='white'),  
    geo=dict(
        showland=True, landcolor='#333333',  
        showocean=True, oceancolor='#444444', 
        lakecolor='#444444',  
    ),
    title_font=dict(color='white'), 
)

# Draw it in the card

def drawMap():
    return  html.Div([
        dbc.Card(
            dbc.CardBody([html.H5("Distribution of 500 Largest Power Plants by Capacity", style={'textAlign': 'center',  'color': 'white'}),
                         dcc.Graph(figure=map_fig)
            ])
        ),  
    ])

In [20]:
# Make histogram

def drawHist():
    return  html.Div([
        dbc.Card(
            dbc.CardBody([
                        dcc.Graph(id='capacity_hist')
            ])
        ),  
    ])


### Defining the web application

In [21]:
app = Dash(external_stylesheets=[dbc.themes.SLATE])

### Defining app callback functions

In [30]:

# Callback function for histogram
@app.callback(Output('capacity_hist', 'figure'),
              Input('cont_dropdown', 'value'))
def sync_input_hist(cont_selection):
    if cont_selection!='All':
        continent_filtered = power_df.loc[power_df['continent'] == cont_selection].copy()
    else:
        continent_filtered = power_df
    continent_filtered['log_capacity_mw'] = np.log10(continent_filtered.loc[:,'capacity_mw'])
    fig = px.histogram(continent_filtered,  
                       x='log_capacity_mw',
                       hover_name='text',
                       color='primary_fuel',
                       log_y=True,
                       nbins=100)
    
    tick_values = np.log10(np.array([1, 10, 100, 500, 1000, 5000, 10000]))
    tick_texts = ['1', '10', '100', '500', '1K', '5K', '10K']

    fig.update_layout(
        template='plotly_dark',
        plot_bgcolor= 'rgba(0, 0, 0, 0)',
        paper_bgcolor= 'rgba(0, 0, 0, 0)',   # Background color for the plot area
        font=dict(color='white'),  # Font color for axes and labels
        title_font=dict(color='white'),  # Title font color
        xaxis=dict(
            title='Capacity (MW)', showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.1)', color='white',
            tickmode='array', tickvals=tick_values, ticktext=tick_texts,
            range=[0, continent_filtered['log_capacity_mw'].max()+1]  # This sets the range from 10^0 to 10^5
                            ),
        yaxis=dict(
            title='Count', showgrid=True, gridcolor='rgba(255, 255, 255, 0.1)', color='white'
        ),
        bargap=0.1
    )

    fig.update_layout(barmode='overlay')
    return fig

In [23]:
# Callback function for Pie Chart
@app.callback(Output('pie_chart_cap', 'figure'),
              Input('cont_dropdown', 'value'),
              Input('pie_radioitems', 'value'))
def sync_input_pie_chart_cap(cont_selection, fuel_or_renew):
    if cont_selection!='All':
        continent_filtered = power_df.loc[power_df['continent'] == cont_selection].copy()
    else:
        continent_filtered = power_df
    grouped_by_fuel = continent_filtered.groupby(fuel_or_renew)['capacity_mw'].sum().reset_index()
    fig = px.pie(grouped_by_fuel, 
                 values='capacity_mw',
                 names=fuel_or_renew,
                 color=fuel_or_renew,
                 color_discrete_map=fuel_color_map)
    fig.update_layout(
        template='plotly_dark',
        plot_bgcolor= 'rgba(0, 0, 0, 0)',
        paper_bgcolor= 'rgba(0, 0, 0, 0)',  # Background color for the entire plot
        font=dict(color='white'),  # Font color for pie chart labels
        title_font=dict(color='white'),  # Title font color
    )

    return fig

In [24]:
# Callback function for Table

@app.callback(
    Output('power-plants-table', 'data'),
    [Input('cont_dropdown', 'value')]
)
def update_table(cont_selection):
    if cont_selection != 'All':
        filtered_data = power_df[power_df['continent'] == cont_selection]
    else:
        filtered_data = power_df
    
    sorted_data = filtered_data[['country_long', 'name', 'capacity_mw', 'primary_fuel']].sort_values('capacity_mw', ascending=False)

    return sorted_data.to_dict('records')

### App layout and loader

In [31]:

 # Actual app layout
app.layout = html.Div([
    dbc.Card(
        dbc.CardBody([
            dbc.Row([
                dbc.Col(drawTitle(), width=12),
            ], align='center'),
            dbc.Row([
                dbc.Col(drawDropdown(), width=4),
            ], justify='center', align='center'),
            dbc.Row([
                dbc.Col([
                    drawHist() 
                ], width=6),
                dbc.Col([
                    drawMap()
                ], width=6),
            ], align='center'), 
            dbc.Row([
                dbc.Col([
                    drawPieChart()
                ], width=6),
                dbc.Col([
                    drawTable()
                ], width=6),
            ], align='center'),      
        ]), color = 'dark'
    )
], style={'height': '100vh', 'overflow': 'auto'})

# Run the dashboard
app.run(jupyter_mode="external")
#app.run_server(debug=True, use_reloader=False)  # Turn off reloader if inside Jupyter


Dash app running on http://127.0.0.1:8050/
