In [27]:
# data fields to download

# Capacity	Fuel	Bioenergy	GW
# Capacity	Fuel	Coal	GW
# Capacity	Fuel	Gas	GW
# Capacity	Fuel	Hydro	GW
# Capacity	Fuel	Nuclear	GW
# Capacity	Fuel	Other Fossil	GW
# Capacity	Fuel	Other Renewables	GW
# Capacity	Fuel	Solar	GW
# Capacity	Fuel	Wind	GW
# Electricity generation	Fuel	Bioenergy	%
# Electricity generation	Fuel	Coal	%
# Electricity generation	Fuel	Gas	%
# Electricity generation	Fuel	Hydro	%
# Electricity generation	Fuel	Nuclear	%
# Electricity generation	Fuel	Other Fossil	%
# Electricity generation	Fuel	Other Renewables	%
# Electricity generation	Fuel	Solar	%
# Electricity generation	Fuel	Wind	%
# Electricity generation	Fuel	Bioenergy	TWh
# Electricity generation	Fuel	Coal	TWh
# Electricity generation	Fuel	Gas	TWh
# Electricity generation	Fuel	Hydro	TWh
# Electricity generation	Fuel	Nuclear	TWh
# Electricity generation	Fuel	Other Fossil	TWh
# Electricity generation	Fuel	Other Renewables	TWh
# Electricity generation	Fuel	Solar	TWh
# Electricity generation	Fuel	Wind	TWh
# Electricity generation	Total	Total Generation	TWh
# Electricity imports	Electricity imports	Net Imports	TWh
# Electricity demand	Demand	Demand	TWh
# Electricity demand	Demand per capita	Demand per capita	MWh


In [254]:
import pandas as pd
import plotly_express as px

geo_list = ['China', 'India', 'Indonesia', 'South Africa'] # select cuntries or regions (e.g. 'Europe' and/or 'World') for which to see the data for
data_fields = [
    ['Capacity', 'Fuel', 'GW', 'Capacity by fuel type (GW)'],
    ['Electricity generation', 'Fuel', 'TWh', 'Power generation by fuel type (TWh)'],
    ['Electricity imports', 'Electricity imports', 'TWh', 'Net power imports (TWh)'],
    ['Electricity demand', 'Demand', 'TWh', 'Power demand (TWh)']
    ] # creates list of lists. Each list represents a specific data fueld and its description

def bar_chart_data_func(df, data_fields): # formats and aggregates data for selected countries/regions. Returns result as a DataFrame
    df = df[(df['Category']==data_fields[0]) & (df['Subcategory']==data_fields[1]) & (df['Unit']==data_fields[2]) & (df['Area'].isin(geo_list))] # filter rows where values under 'Category' column equal first value in 'data_fields' list (e.g. 'Capacity'), where values under 'Subcategory' column equal second value in 'data_fields' list  (e.g. 'Fuel'), where values under 'Unit' column equal third value in 'data_fields' list  and where 'Area' matches a list of countries provided in 'geo_list' variable
    df = df.groupby(['Variable', 'Year'], group_keys=True, as_index=False)[['Value']].sum() # group fuel types 'Variables' and show it as total sum for selected countries in the 'geo_list'
    return df

def agg_bar_chart_data_func(df, data_fields): # aggregates data by fuel type (e.g. Capacity for Wind+Solar+Nuclear) into single value (e.g. total capacity) and returns result as a DataFrame
    df = df.groupby('Year', as_index=False).sum() # aggregates data by fuel type into single value. In the process, removes 'Variable' column
    df['Variable'] = data_fields[0] # adds back 'Variable' column
    return df

def pct_chg_data_func(df): # calculates % YoY change values based on data calculated using 'bar_chart_data_func()' function and returns result as a DataFrame
    df = df.pivot(index='Year', columns='Variable', values='Value') # moves 'Variable' names to individual columns
    df = df.pct_change()*100 # calculates yearly percentage change
    df = df[1:].reset_index() # removes first row with NaN values and moves 'Year' variable from being index to an individual 'Year' column
    df = df.melt(id_vars=['Year'], value_vars=list(df.columns)[1:], var_name='Variable', value_name='Value') # moves 'Variable' names back to a single column
    return df

def bar_chart_func(df, data_fields): # creates bar chart for actual data for countries/regions selected in 'geo_list' variable
    fig = px.bar(df, 
        x=df['Year'],
        y=df['Value'],
        color=df['Variable'],
        height=500, 
        width=900,
        title=data_fields[3] + ': ' + ' '.join(geo_list),
        # barmode = 'group',
        template='plotly_dark'
        )  # charts the data
    fig.show() # chart aggregated data for selected countries/regions

def pct_chg_line_chart_func(df, data_fields): # creates bar chart for % YoY change data for countries/regions selected in 'geo_list' variable
    fig = px.line(df, 
        x=df['Year'],
        y=df['Value'],
        color=df['Variable'],
        height=500, 
        width=900,
        title=data_fields[3] + ' (% Chg YoY): ' + ' '.join(geo_list),
        # barmode = 'group',
        template='plotly_dark'
        )  # charts the data
    fig.add_hline(y=df['Value'].describe()[1], line_dash="dash") # adds a vertical average dash line to the chart
    fig.show() # chart aggregated data for selected countries

df = pd.read_csv('yearly_full_release_long_format-1.csv') # copy content of csv file downloaded from 'https://ember-climate.org/app/uploads/2022/07/yearly_full_release_long_format-1.csv' to 'df' DataFrame
df.drop(['Ember region','EU', 'OECD', 'G20', 'G7', 'YoY absolute change', 'YoY % change'], axis=1, inplace=True) # remove unwanted columns

# run functions

for x in range(len(data_fields)): # loops through list of lists in 'data_fields' column to format and chart the data for each of those lists
    bar_chart_data = bar_chart_data_func(df, data_fields[x]) # function to format and aggregate data for selected countries/regions. Then, this function returns result as a DataFrame
    bar_chart = bar_chart_func(bar_chart_data, data_fields[x]) # function to create a bar chart for actual data for countries/regions selected in 'geo_list' variable
    if data_fields[x][1] == 'Fuel': # this condition helps avoid creating duplicate charts for data fields where there is no breakdown by fuel type (e.g. 'Electricity demand' and 'Electricity imports')
        agg_bar_chart_data = agg_bar_chart_data_func(bar_chart_data, data_fields[x]) # function to aggregate data by fuel type (e.g. Capacity for Wind+Solar+Nuclear) into single value (e.g. total capacity) and returns result as a DataFrame
        agg_bar_chart = bar_chart_func(agg_bar_chart_data, data_fields[x]) # function to create a bar chart for aggregated actual data for countries/regions selected in 'geo_list' variable
    pct_chg_data = pct_chg_data_func(bar_chart_data) # function to calculate % YoY change values based on data calculated previously using 'bar_chart_data_func()' function. Then, this function returns result as a DataFrame
    pct_chg_chart = pct_chg_line_chart_func(pct_chg_data, data_fields[x]) # function to create a bar chart for % YoY change data for countries/regions selected in 'geo_list' variable
    if data_fields[x][1] == 'Fuel': # this condition helps avoid creating duplicate charts for data fields where there is no breakdown by fuel type (e.g. 'Electricity demand' and 'Electricity imports')
        agg_pct_chg_data = pct_chg_data_func(agg_bar_chart_data) # function to calculate % YoY change values based on data calculated previously using 'bar_chart_data_func()' function. Then, this function returns result as a DataFrame
        agg_pct_chg_chart = pct_chg_line_chart_func(agg_pct_chg_data, data_fields[x]) # function to create a bar chart for % YoY change data for countries/regions selected in 'geo_list' variable
    


In [277]:
df_tmp = df
#df_tmp[df_tmp['Country']]

# save ISO3 country codes per country in a DataFrame. ISO3 codes will be used later to create geographical map chart
df_tmp.dropna(subset=['Country code'], inplace=True) # delete all blan (i.e. NaN) entries from 'Country code' column.
country_names  = list(df_tmp['Area'].unique()) # assign unique country names to country_names list
country_codes = list(df_tmp['Country code'].unique()) # assign unique ISO3 country codes to country_codes list
df_ISO_codes = pd.DataFrame({'Area': country_names, 'Country code': country_codes}) # create 'df_ISO_codes' DataFrame to host country names and their ISO3 codes

df_tmp = df_tmp[(df_tmp['Category']==data_fields[0][0]) & (df_tmp['Subcategory']==data_fields[0][1]) & (df_tmp['Unit']==data_fields[0][2])] # filter rows where values under 'Category' column equal first value in 'data_fields' list (e.g. 'Capacity'), where values under 'Subcategory' column equal second value in 'data_fields' list  (e.g. 'Fuel'), where values under 'Unit' column equal third value in 'data_fields' list. Downloads data for all countries (but not regions) under the 'Area' column and not just the countries provided in 'geo_list' variable
df_tmp = df_tmp.groupby(['Area', 'Year'], group_keys=True, as_index=False)[['Value']].sum() # group total yearly values per country in 'Area' column and show it as total sum for selected countries in the 'geo_list'
df_tmp = pd.merge(df_tmp, df_ISO_codes, on ='Area', how ='left').dropna() # add 'ISO3 Code' column to 'df_tmp' DataFrame. This will be used later to create a geographic map chart. Also, uses 'dropna()' function to remove NaN values to prevent geographic chart from showing wrong colours due to NaN values.
df_tmp = df_tmp.sort_values('Year').groupby('Area').tail(1) # filter to show the latest (by 'Year' column) value per country
df_tmp = df_tmp.sort_values('Value') # sort by values
df_tmp
# df_tmp

Unnamed: 0,Area,Year,Value,Country code
1494,French Guiana,2021,0.00,GUF
1648,Gibraltar,2021,0.00,GIB
3115,Niue,2021,0.00,NIU
3445,Reunion,2021,0.00,REU
3533,"Saint Helena, Ascension and Tristan da Cunha",2021,0.00,SHN
...,...,...,...,...
3489,Russian Federation (the),2021,232.61,RUS
2153,Japan,2021,316.70,JPN
1977,India,2021,425.57,IND
4464,United States of America,2021,1229.54,USA
