In [50]:
import pandas as pd
import matplotlib.pyplot as plt
import json

In [51]:
# load data
df_data = pd.read_csv('../data/EnergyMixEU.csv', sep='\t')

df_data.head()

Unnamed: 0.1,Unnamed: 0,renewables,fossil fuels,nuclear
0,Luxembourg,92.80%,7.20%,0.00%
1,Denmark,79.31%,20.69%,0.00%
2,Latvia,75.51%,24.49%,0.00%
3,Lithuania,72.55%,27.45%,0.00%
4,Austria,69.44%,30.56%,0.00%


In [52]:
# rename country column
df_data.rename(columns={df_data.columns[0]: 'Country'}, inplace=True)

In [53]:
# fix data types
df_data["renewables"] = df_data["renewables"].str.rstrip("%").astype("float")
df_data["nuclear"] = df_data["nuclear"].str.rstrip("%").astype("float")
df_data["fossil fuels"] = df_data["fossil fuels"].str.rstrip("%").astype("float")

df_data.head()


Unnamed: 0,Country,renewables,fossil fuels,nuclear
0,Luxembourg,92.8,7.2,0.0
1,Denmark,79.31,20.69,0.0
2,Latvia,75.51,24.49,0.0
3,Lithuania,72.55,27.45,0.0
4,Austria,69.44,30.56,0.0


In [54]:
# switzerland is not included in the data, so we add it manually

df_ch = pd.read_csv('../data/ogd32_elektrizitaetbilanz_jahreswerte.csv', sep=',', encoding='utf-16')
df_ch.head()
df_ch = df_ch[df_ch['Jahr'] == 2022]

df_production = df_ch[[
    'Jahr', 
    'Erzeugung_laufwerk_GWh',
    'Erzeugung_speicherwerk_GWh',
    'Erzeugung_andere_erneuerbare_abfaelle_GWh',
    'Erzeugung_holz_GWh',
    'Erzeugung_biogas_GWh',
    'Erzeugung_photovoltaik_GWh',
    'Erzeugung_wind_GWh',
    'Erzeugung_andere_total_GWh',
    'Erzeugung_andere_fossil_GWh',
    'Erzeugung_kernkraftwerk_GWh',
]]

df_production.rename(columns={
    'Jahr': 'Year',
    'Erzeugung_laufwerk_GWh': 'Hydropower (run-of-river)',
    'Erzeugung_speicherwerk_GWh': 'Hydropower (storage)',
    'Erzeugung_andere_erneuerbare_abfaelle_GWh': 'Renewable waste',
    'Erzeugung_holz_GWh': 'Wood',
    'Erzeugung_biogas_GWh': 'Biogas',
    'Erzeugung_photovoltaik_GWh': 'Photovoltaic',
    'Erzeugung_wind_GWh': 'Wind',
    'Erzeugung_andere_total_GWh': 'Other (total)',
    'Erzeugung_andere_fossil_GWh': 'Fossil',
    'Erzeugung_kernkraftwerk_GWh': 'Nuclear power',
}, inplace=True)

df_production = df_production.drop('Other (total)', axis=1)

# Grouping columns
df_production_grouped = df_production.copy()

df_production_grouped['Nuclear'] = df_production_grouped['Nuclear power']
df_production_grouped['Fossil'] = df_production_grouped['Fossil']
df_production_grouped['Renewable'] = (
    df_production_grouped['Renewable waste'] +
    df_production_grouped['Wood'] +
    df_production_grouped['Biogas'] +
    df_production_grouped['Photovoltaic'] +
    df_production_grouped['Wind']+
    df_production_grouped['Hydropower (run-of-river)'] +
    df_production_grouped['Hydropower (storage)']
)

# Selecting only the grouped columns and the Year column
df_production_grouped = df_production_grouped[['Year', 'Nuclear', 'Fossil', 'Renewable']]

df_production_grouped.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_production.rename(columns={


Unnamed: 0,Year,Nuclear,Fossil,Renewable
62,2022,23113.0,878.0,39514.0


In [55]:
total_energy_ch = df_production_grouped['Renewable'].sum() + df_production_grouped['Nuclear'].sum() + df_production_grouped['Fossil'].sum()

def get_percentage(value):
    print(value, total_energy_ch)
    return (100 / total_energy_ch) * value

renewables_percentage = get_percentage(df_production_grouped['Renewable'].sum())
nuclear_percentage = get_percentage(df_production_grouped['Nuclear'].sum())
fossil_fuels_percentage = get_percentage(df_production_grouped['Fossil'].sum())


new_row = {
    "Country": "Switzerland",
    "renewables": renewables_percentage,
    "nuclear": nuclear_percentage,
    "fossil fuels": fossil_fuels_percentage
}

display(new_row)


39514.0 63505.0
23113.0 63505.0
878.0 63505.0


{'Country': 'Switzerland',
 'renewables': np.float64(62.22187229352019),
 'nuclear': np.float64(36.39555940477128),
 'fossil fuels': np.float64(1.3825683017085268)}

In [56]:
# append row to the dataframe
df_data.loc[len(df_data)] = new_row

df_data.tail()

Unnamed: 0,Country,renewables,fossil fuels,nuclear
23,Bulgaria,19.97,45.95,34.08
24,Cyprus,17.42,82.58,0.0
25,Czechia,13.93,48.86,37.21
26,Malta,12.76,87.24,0.0
27,Switzerland,62.221872,1.382568,36.395559


In [57]:
# rename columns
df_data.columns.values[0] = "Country"
df_data.rename(columns={'fossil fuels': 'Fossil'}, inplace=True)
df_data.rename(columns={'renewables': 'Renewable'}, inplace=True)
df_data.rename(columns={'nuclear': 'Nuclear'}, inplace=True)

In [58]:
# sort by renewable
df_data = df_data.sort_values(by='Renewable', ascending=False)

display(df_data)

Unnamed: 0,Country,Renewable,Fossil,Nuclear
0,Luxembourg,92.8,7.2,0.0
1,Denmark,79.31,20.69,0.0
2,Latvia,75.51,24.49,0.0
3,Lithuania,72.55,27.45,0.0
4,Austria,69.44,30.56,0.0
5,Sweden,69.32,1.23,29.45
6,Croatia,64.64,35.36,0.0
7,Portugal,62.67,37.33,0.0
27,Switzerland,62.221872,1.382568,36.395559
8,Finland,53.98,11.0,35.01


In [59]:
# write to json
highcharts_series = []
for column in df_data.columns:
    series_data = {
        'name': column,
        'data': df_data[column].fillna(0).tolist()  # Replace NaN with 0 for Highcharts
    }
    highcharts_series.append(series_data) 
    
with open('../src/data/energiemix-eu.json', 'w') as json_file:
    json.dump(highcharts_series, json_file, indent=2)