# Eurostat energy balance 2018

In [16]:
def read_archive(file, var, var_key, col_start, col_end):
    d = {}

    countries = ('CZ', 'AT', 'DK', 'NL', 'PL', 'SK')

    with zipfile.ZipFile(file, 'r') as f:
        for name in f.namelist():
            if name.startswith(countries):
                country = name[:2]
                file = f.read(name)
                data = get_values(file, country, var, var_key, col_start, col_end)
                d.update(data)
    return d

In [1]:
import pandas as pd
import os
import pyxlsb
import numpy as np
from scipy.stats import linregress
import zipfile
import requests

In [2]:
def get_values(file, country, variable, fuel, col_start, col_end):
    """Reads fuel variable in multiple sheets 2002-2018.
    Sums the values across multiple columns if relevant.
    Returns: dict
    """
    trajectory = {}
    for year in range(2002,2019):
        df = pd.read_excel(
            file,
            engine='pyxlsb',
            sheet_name=str(year),
            skiprows=[0,1,2,3],
            index_col=1
            )
        try:
            trajectory[(country, year, fuel)] = df.loc[variable, col_start:col_end].sum()
        except TypeError:
            trajectory[(country, year, fuel)] = pd.to_numeric(df.loc[variable, col_start:col_end], errors='coerce').sum()
    return trajectory


In [3]:
tj_ktoe = 41.868

In [4]:
url = 'https://ec.europa.eu/eurostat/documents/38154/4956218/Energy-Balances-April-2020-edition.zip/69da6e9f-bf8f-cd8e-f4ad-50b52f8ce616'

r = requests.get(url)
with open('eurostat_balances_2020.zip', 'wb') as f:
    f.write(r.content)

In [3]:
# The zipfile reader is pretty slow or I do something wrong
def read_archive(file, var, fuel_key, col_start, col_end):
    d = {}

    countries = ('CZ', 'AT', 'DK', 'NL', 'PL', 'SK')

    with zipfile.ZipFile(file, 'r') as myzip:
        for name in myzip.namelist():
            if name.startswith(countries):
                with myzip.open(name) as myfile:
                    country = name[:2]
                    data = get_values(myfile.read(), country, var, fuel_key, col_start, col_end)
                    d.update(data)
    return d

In [4]:
# All variables of interest
all_data = {}

variables = [
    'Primary production',
    'Imports',
    'Exports',
    'Gross inland consumption',
    ]

fuel_tuples = [
    ('total','Total', 'Total',),
    ('renewables', 'Renewables and biofuels', 'Renewables and biofuels'),
    ('bioenergy', 'Bioenergy', 'Bioenergy',),
    ('solid_biomass', 'Primary solid biofuels', 'Primary solid biofuels'),
    ('biofuels', 'Pure biogasoline', 'Other liquid biofuels'),
    ('biogas', 'Biogases', 'Biogases'),
    ('ren_mun_waste', 'Renewable municipal waste', 'Renewable municipal waste'),
    ]

for variable in variables:
    for fuel_key, start, end in fuel_tuples:
        data = read_archive('eurostat_balances_2020.zip', variable, fuel_key, start, end)
        all_data.setdefault(variable.lower().replace(' ','_'), {}).update(data)

df = pd.DataFrame(all_data)
df.index.names = ['country', 'year', 'fuel']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,primary_production,imports,exports,gross_inland_consumption
country,year,fuel,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AT,2002,total,9831.767,24645.177,3538.177,31002.109
AT,2003,total,9483.763,27068.006,3881.256,32638.250
AT,2004,total,9842.878,28084.148,4434.439,33249.096
AT,2005,total,9888.261,29620.112,4932.991,34376.159
AT,2006,total,10326.256,30536.606,5451.498,34687.504
...,...,...,...,...,...,...
SK,2014,ren_mun_waste,11.584,0.000,0.000,11.584
SK,2015,ren_mun_waste,14.928,0.000,0.000,14.928
SK,2016,ren_mun_waste,19.466,0.000,0.000,19.466
SK,2017,ren_mun_waste,28.518,0.000,0.000,28.518


In [5]:
df.to_csv(
    'bioenergy_2002_2018.csv',
    decimal=',',
    )

In [6]:
df2 = df.copy()

In [8]:
df2['dependency'] = (df2['imports'] - df2['exports']) / df2['gross_inland_consumption']

In [9]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,primary_production,imports,exports,gross_inland_consumption,dependency
country,year,fuel,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AT,2002,total,9831.767,24645.177,3538.177,31002.109,0.680825
AT,2003,total,9483.763,27068.006,3881.256,32638.250,0.710416
AT,2004,total,9842.878,28084.148,4434.439,33249.096,0.711289
AT,2005,total,9888.261,29620.112,4932.991,34376.159,0.718147
AT,2006,total,10326.256,30536.606,5451.498,34687.504,0.723174
...,...,...,...,...,...,...,...
SK,2014,ren_mun_waste,11.584,0.000,0.000,11.584,0.000000
SK,2015,ren_mun_waste,14.928,0.000,0.000,14.928,0.000000
SK,2016,ren_mun_waste,19.466,0.000,0.000,19.466,0.000000
SK,2017,ren_mun_waste,28.518,0.000,0.000,28.518,0.000000


In [11]:
df2.sort_index(ascending=True, inplace=True)

In [12]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,primary_production,imports,exports,gross_inland_consumption,dependency
country,year,fuel,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AT,2002,bioenergy,2920.414,196.600,203.428,2913.585,-0.002344
AT,2002,biofuels,20.347,0.000,0.000,20.347,0.000000
AT,2002,biogas,21.950,0.000,0.000,21.950,0.000000
AT,2002,ren_mun_waste,47.053,0.000,0.000,47.053,0.000000
AT,2002,renewables,6490.482,164.493,172.781,6482.194,-0.001279
...,...,...,...,...,...,...,...
SK,2018,biogas,148.753,0.000,0.000,148.753,0.000000
SK,2018,ren_mun_waste,15.023,0.000,0.000,15.023,0.000000
SK,2018,renewables,1613.427,113.184,138.882,1581.295,-0.016251
SK,2018,solid_biomass,907.591,3.224,11.799,889.223,-0.009643


In [14]:
df2.to_csv(
    'eurostat_2002_2018_ktoe.csv',
    decimal=',',
    )

In [23]:
df3 = df2.copy()

In [24]:
tj_ktoe = 41.868

df3 = df3.loc[:, 'primary_production': 'gross_inland_consumption'] * tj_ktoe

# Keep the share based on the original data in ktoe
df3['dependency'] = df2['dependency']
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,primary_production,imports,exports,gross_inland_consumption,dependency
country,year,fuel,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AT,2002,bioenergy,122271.893352,8231.248800,8517.123504,121985.976780,-0.002344
AT,2002,biofuels,851.888196,0.000000,0.000000,851.888196,0.000000
AT,2002,biogas,919.002600,0.000000,0.000000,919.002600,0.000000
AT,2002,ren_mun_waste,1970.015004,0.000000,0.000000,1970.015004,0.000000
AT,2002,renewables,271743.500376,6886.992924,7233.994908,271396.498392,-0.001279
...,...,...,...,...,...,...,...
SK,2018,biogas,6227.990604,0.000000,0.000000,6227.990604,0.000000
SK,2018,ren_mun_waste,628.982964,0.000000,0.000000,628.982964,0.000000
SK,2018,renewables,67550.961636,4738.787712,5814.711576,66205.659060,-0.016251
SK,2018,solid_biomass,37999.019988,134.982432,494.000532,37229.988564,-0.009643


In [25]:
df3.to_csv(
    'eurostat_2002_2018_tj.csv',
    decimal=',',
    )

In [13]:
df2.loc[pd.IndexSlice['CZ', 2002:2018], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,primary_production,imports,exports,gross_inland_consumption,dependency
country,year,fuel,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CZ,2002,bioenergy,1768.482,5.427,39.640,1734.145,-0.019729
CZ,2002,biofuels,92.261,0.035,27.661,64.512,-0.428230
CZ,2002,biogas,35.946,0.000,0.000,35.946,0.000000
CZ,2002,ren_mun_waste,56.009,0.000,0.000,56.009,0.000000
CZ,2002,renewables,1988.066,0.035,27.661,1960.317,-0.014093
CZ,...,...,...,...,...,...,...
CZ,2018,biogas,603.782,0.000,0.000,603.782,0.000000
CZ,2018,ren_mun_waste,87.628,0.000,0.000,87.628,0.000000
CZ,2018,renewables,4570.730,431.137,428.974,4569.797,0.000473
CZ,2018,solid_biomass,3070.351,206.854,296.378,2980.827,-0.030033


In [34]:
df3.loc[pd.IndexSlice[:,2018,'bioenergy'], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,primary_production,imports,exports,gross_inland_consumption,dependency
country,year,fuel,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AT,2018,bioenergy,224047.767852,46039.811256,36676.912284,233978.983056,0.040016
CZ,2018,bioenergy,166713.268104,20343.326256,23004.498204,163922.43096,-0.016234
DK,2018,bioenergy,106376.246604,87731.421372,12966.059052,181200.182256,0.412612
NL,2018,bioenergy,171180.625572,32024.288916,66122.96976,129750.062436,-0.262803
PL,2018,bioenergy,311466.644604,39538.087668,29267.992872,321006.226536,0.031993
SK,2018,bioenergy,51824.377872,7459.998372,7719.873048,51295.082616,-0.005066


In [37]:
# Share of bioenegy on consumption
df3.loc[pd.IndexSlice['CZ',2018,'bioenergy'], 'gross_inland_consumption']/df3.loc[pd.IndexSlice['CZ',2018,'total'], 'gross_inland_consumption']

0.08989817783762694