In [58]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import altair as alt

In [59]:
data = pd.read_csv("../data/preprocessed/processed_data.csv")
data.head()

Unnamed: 0,Entity,Access to electricity (% of population),Access to clean fuels for cooking,Renewable-electricity-generating-capacity-per-capita,Financial flows to developing countries (US $),Renewable energy share in the total final energy consumption (%),Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),Low-carbon electricity (% electricity),Primary energy consumption per capita (kWh/person),Energy intensity level of primary energy (MJ/$2017 PPP GDP),Value_co2_emissions_kt_by_country,Renewables (% equivalent primary energy),gdp_growth,gdp_per_capita,Land Area(Km2),Latitude,Longitude
0,Afghanistan,97.483227,30.86,9.628,31477500.0,18.9,0.166,0.0,0.938,84.902724,824.370898,2.3475,5557.500124,,1.53141,505.057656,652230.0,33.93911,67.709953
1,Albania,99.956,79.66,,,,0.0,0.0,6.226,100.0,12795.8734,2.61,4889.999986,,1.858918,4916.938059,28748.0,41.153332,20.168331
2,Algeria,99.597542,99.64,15.144,70000.0,0.1425,71.294,0.0,0.664,0.914286,15524.6174,5.0725,162509.998325,0.25463,0.3,3898.939141,2381741.0,28.033886,1.659626
3,Angola,44.52996,49.0,93.842,31380000.0,54.3325,3.798,0.0,9.344,70.504888,3518.68492,2.7525,25794.999597,,-2.151105,3095.464027,1246700.0,-11.202692,17.873887
4,Antigua and Barbuda,100.0,100.0,83.512,19550000.0,0.7525,0.32,0.0,0.01,3.012478,32032.2842,3.65,507.499993,,0.038858,15726.261038,443.0,17.060816,-61.796428


# energy consumption pie chart

In [60]:
#energy consumption pie chart
consump_data = data[['Entity', 'Renewable energy share in the total final energy consumption (%)']]
consump_data['Other'] = 100-consump_data['Renewable energy share in the total final energy consumption (%)']
consump_data = consump_data.rename(columns={'Renewable energy share in the total final energy consumption (%)': 'Renewable_energy_share'})
# Replace NaN in 'Renewable_energy_share' with 0
consump_data['Renewable_energy_share'] = consump_data['Renewable_energy_share'].fillna(0)
# Replace NaN in 'Other' with 100
consump_data['Other'] = consump_data['Other'].fillna(100)
consump_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  consump_data['Other'] = 100-consump_data['Renewable energy share in the total final energy consumption (%)']


Unnamed: 0,Entity,Renewable_energy_share,Other
0,Afghanistan,18.9,81.1
1,Albania,0.0,100.0
2,Algeria,0.1425,99.8575
3,Angola,54.3325,45.6675
4,Antigua and Barbuda,0.7525,99.2475


In [83]:
# Reshaping the dataset to long format
long_format_data = pd.melt(consump_data, id_vars=['Entity'], var_name='category', value_name='value')

# Renaming the categories
long_format_data['category'] = long_format_data['category'].replace({
    'Renewable_energy_share': 'Renewables',
    'Other': 'Other'
})
# Round the values to zero decimal places, convert to string, and append a percentage sign
long_format_data['Percentage'] = long_format_data['value'].round().astype(int).astype(str) + '%'
# Display the transformed data
long_format_data.head()

Unnamed: 0,Entity,category,value,Percentage
0,Afghanistan,Renewables,18.9,19%
1,Albania,Renewables,0.0,0%
2,Algeria,Renewables,0.1425,0%
3,Angola,Renewables,54.3325,54%
4,Antigua and Barbuda,Renewables,0.7525,1%


In [84]:
long_format_data.to_csv('../data/preprocessed/consump_pie_data.csv', index=False)

In [63]:
filtered_data = long_format_data[long_format_data['Entity'] == "Afghanistan"]
filtered_data

Unnamed: 0,Entity,category,value,Percentage
0,Afghanistan,Renewables,18.9,19%
175,Afghanistan,Other,81.1,81%


In [80]:
pie_data = filtered_data
    
# Define color domain and range
domain = ['Renewables', "Other"]
range_ = ['#4CBB17', '#DDDDDD']

# Base pie chart
energy_consumption_pie_chart = alt.Chart(pie_data).encode(
    theta=alt.Theta('value:Q', stack=True),  # Ensure values are quantitative and stacked
    color=alt.Color('category:N', legend=alt.Legend(title='Energy Source'), scale=alt.Scale(domain=domain, range=range_)),  # Named categories
    tooltip=['category', 'value']
)

# Add arc for pie chart
pie = energy_consumption_pie_chart.mark_arc(outerRadius=120, innerRadius=50)

# Add text labels
text = energy_consumption_pie_chart.mark_text(radius=140, size=20).encode(
    text='Percentage:N'
)

# Combine pie and text
final_chart = (pie + text).properties(width='container', height=150).interactive()

# Set properties and render chart
final_chart

In [90]:
#electricity generation pie chart
elec_data = data[['Entity', 'Electricity from renewables (TWh)','Electricity from nuclear (TWh)', 'Electricity from fossil fuels (TWh)']]
elec_data = elec_data.rename(columns={'Renewable energy share in the total final energy consumption (%)': 'Renewable_energy_share'})
# Calculate total electricity production for each entity
elec_data['Total Electricity (TWh)'] = elec_data['Electricity from renewables (TWh)'] + elec_data['Electricity from nuclear (TWh)'] + elec_data['Electricity from fossil fuels (TWh)']

# Calculate percentage for each type of electricity
elec_data['% Renewable'] = (elec_data['Electricity from renewables (TWh)'] / elec_data['Total Electricity (TWh)']) 
elec_data['% Nuclear'] = (elec_data['Electricity from nuclear (TWh)'] / elec_data['Total Electricity (TWh)']) 
elec_data['% Fossil Fuels'] = (elec_data['Electricity from fossil fuels (TWh)'] / elec_data['Total Electricity (TWh)']) 
elec_data.head()

Unnamed: 0,Entity,Electricity from renewables (TWh),Electricity from nuclear (TWh),Electricity from fossil fuels (TWh),Total Electricity (TWh),% Renewable,% Nuclear,% Fossil Fuels
0,Afghanistan,0.938,0.0,0.166,1.104,0.849638,0.0,0.150362
1,Albania,6.226,0.0,0.0,6.226,1.0,0.0,0.0
2,Algeria,0.664,0.0,71.294,71.958,0.009228,0.0,0.990772
3,Angola,9.344,0.0,3.798,13.142,0.711003,0.0,0.288997
4,Antigua and Barbuda,0.01,0.0,0.32,0.33,0.030303,0.0,0.969697


In [91]:
long_format_data_twh = pd.melt(elec_data, id_vars=['Entity'], 
                               value_vars=['Electricity from renewables (TWh)', 'Electricity from nuclear (TWh)', 'Electricity from fossil fuels (TWh)'],
                               var_name='Energy Source', value_name='Value')

# Melt the percentage data
long_format_data_percentage = pd.melt(elec_data, id_vars=['Entity'], 
                                      value_vars=['% Renewable', '% Nuclear', '% Fossil Fuels'], 
                                      var_name='Energy Source', value_name='Percentage')

# Mapping the new source names to the existing ones to merge correctly
source_mapping = {
    'Electricity from renewables (TWh)': 'Renewables',
    'Electricity from nuclear (TWh)': 'Nuclear',
    'Electricity from fossil fuels (TWh)': 'Fossil Fuels',
    '% Renewable': 'Renewables',
    '% Nuclear': 'Nuclear',
    '% Fossil Fuels': 'Fossil Fuels'
}

# Apply mapping
long_format_data_twh['Energy Source'] = long_format_data_twh['Energy Source'].replace(source_mapping)
long_format_data_percentage['Energy Source'] = long_format_data_percentage['Energy Source'].replace(source_mapping)

# Combine both TWh and percentage data
long_format_data_combined = pd.merge(long_format_data_twh, long_format_data_percentage, on=['Entity', 'Energy Source'])

# Fill NaN percentage values with 0 before conversion
long_format_data_combined['Percentage'] = long_format_data_combined['Percentage'].fillna(0)

# Round the percentage values to 0 decimals and append '%'
#long_format_data_combined['Percentage'] = long_format_data_combined['Percentage'].round(0).astype(int)
#long_format_data_combined['Percentage_label'] = long_format_data_combined['Percentage'].astype(str) + '%'

# Display the combined data
long_format_data_combined.head()

Unnamed: 0,Entity,Energy Source,Value,Percentage
0,Afghanistan,Renewables,0.938,0.849638
1,Albania,Renewables,6.226,1.0
2,Algeria,Renewables,0.664,0.009228
3,Angola,Renewables,9.344,0.711003
4,Antigua and Barbuda,Renewables,0.01,0.030303


In [92]:
long_format_data_combined.to_csv('../data/preprocessed/elec_pie_data.csv', index=False)