In this chapter we intend to provide a basic overview of energy consumption evolution in the EU.

We will see:

1. The evolution of total EU consumption per capita brokendown by energy source
2. The evolution of total EU consumption per capita brokendown by consuming sector
3. The evolution of consumption per capita by country
4. For the last year, the relationship between consumer sector and energy source


In [1]:
from pysdmx.io import get_datasets
import utils
import json
import plotly.express as px


with open('sub_codelists.json', 'r') as f:
    sub_codelists = json.load(f)

def calculate_energy_consumption_per_capita(df):
    df['energy_consumption_PC_kwh'] = df['energy_consumption_gwh'] / df['population'] * 1000000
    return df

We are going to get the data from the Eurostat DB.
We need to get two datasets: The Energy Balances (simplified is enough for this analysis) and the population by country.

For each dataset, we have to provide the list of values we need.
We have created a method that generates automatically the SDMX query from a dictionary of values. We will load the required values from the file with created after the metadata research.

In [2]:
nrg_bal_constraints = {
    'freq': 'A',
    'nrg_bal': sub_codelists['nrg_bal_consumption_basic_plus_other'],
    'siec': sub_codelists['nrg_bal_siec_breakdown'],
    'unit': 'GWH',
    'geo': sub_codelists['geo_eu_countries'],
}

demo_pjan_constraints = {
    'freq': 'A',
    'unit': 'NR',
    'age': 'TOTAL',
    'sex': 'T',
    'geo': sub_codelists['geo_eu_countries'],
    }


In [None]:
nrg_bal = utils.get_dataset_with_selection('nrg_bal_s', nrg_bal_constraints)
nrg_bal = utils.add_labels(nrg_bal, 'siec')
nrg_bal = utils.add_labels(nrg_bal, 'nrg_bal')

display(nrg_bal.data)

In [None]:
demo_pjan = utils.get_dataset_with_selection('demo_pjan', demo_pjan_constraints)


# We have to subtract 1 year to the TIME_PERIOD to match the nrg_bal_s_total data
demo_pjan.data.TIME_PERIOD  = demo_pjan.data.TIME_PERIOD  - 1
display(demo_pjan.data)



In [None]:
demo_pjan_aggregated = demo_pjan.data.groupby(['TIME_PERIOD'])['OBS_VALUE'].sum().reset_index()
display(demo_pjan_aggregated.data)

Preparing a dataset with the consumption per capita in kw/h


In [None]:
full_dataset = nrg_bal.data.merge(demo_pjan.data, on=['TIME_PERIOD', 'geo'], how='inner')
full_dataset.rename(columns={'OBS_VALUE_x': 'energy_consumption_gwh', 'OBS_VALUE_y': 'population'}, inplace=True)

display(full_dataset)

In [None]:


data_full_breakdown = nrg_bal.data

data_by_energy_source = nrg_bal.data.groupby(['TIME_PERIOD', 'siec_label'])['OBS_VALUE'].sum().reset_index()
# data_by_energy_source = calculate_energy_consumption_per_capita(data_by_energy_source)

dataset_by_sector = nrg_bal.data.groupby(['TIME_PERIOD', 'nrg_bal_label'])['OBS_VALUE'].sum().reset_index()
# dataset_by_sector = calculate_energy_consumption_per_capita(dataset_by_sector)

dataset_by_country = nrg_bal.data.groupby(['TIME_PERIOD', 'geo'])['OBS_VALUE'].sum().reset_index()

display(dataset_by_country)

In [None]:
fig = px.area(data_by_energy_source, x='TIME_PERIOD', y='OBS_VALUE', color='siec_label')
fig.show()

In [None]:
fig = px.area(dataset_by_sector, x='TIME_PERIOD', y='OBS_VALUE', color='nrg_bal_label')
fig.show()

In [None]:
by_sector_pc =  dataset_by_sector.merge(demo_pjan_aggregated, on=['TIME_PERIOD'], how='inner')
by_sector_pc.rename(columns={'OBS_VALUE_x': 'energy_consumption_gwh', 'OBS_VALUE_y': 'population'}, inplace=True)

by_sector_pc = calculate_energy_consumption_per_capita(by_sector_pc)

fig = px.area(by_sector_pc, x='TIME_PERIOD', y='energy_consumption_PC_kwh', color='nrg_bal_label')
fig.show()

In [None]:
consumption_pc_by_coubtry = dataset_by_country.merge(demo_pjan.data, on=['TIME_PERIOD', 'geo'], how='inner')

consumption_pc_by_coubtry.rename(columns={'OBS_VALUE_x': 'energy_consumption_gwh', 'OBS_VALUE_y': 'population'}, inplace=True)

consumption_pc_by_coubtry = calculate_energy_consumption_per_capita(consumption_pc_by_coubtry)

fig = px.line(consumption_pc_by_coubtry, x='TIME_PERIOD', y='energy_consumption_PC_kwh', color='geo')
fig.show()

In [None]:
import plotly.graph_objects as go

df = full_dataset[(full_dataset['TIME_PERIOD'] == 2022)]


sources = df['siec_label'].unique()
targets = df['nrg_bal_label'].unique()


labels = []
source = []
target = []
value = []

index = 0
index_dict = {}

for s in sources:
    labels.append(s)
    index_dict[s] = index
    index += 1

    for t in targets:
        if t not in index_dict:
            labels.append(t)
            index_dict[t] = index
            index += 1
        source.append(index_dict[s])
        target.append(index_dict[t])
        value.append(df[(df['siec_label'] == s) & (df['nrg_bal_label'] == t)]['energy_consumption_gwh'].sum())
        
    


fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels,
      color = "blue"
    ),
    link = dict(
      source = source,
      target = target,
      value = value
  ))])

fig.update_layout(title_text="Source of energy by sectors", font_size=10)
fig.show()