# Eurostat energy balance 2018

In [16]:
def read_archive(file, var, var_key, col_start, col_end):
    d = {}

    countries = ('CZ', 'AT', 'DK', 'NL', 'PL', 'SK')

    with zipfile.ZipFile(file, 'r') as f:
        for name in f.namelist():
            if name.startswith(countries):
                country = name[:2]
                file = f.read(name)
                data = get_values(file, country, var, var_key, col_start, col_end)
                d.update(data)
    return d

In [1]:
import pandas as pd
import os
import pyxlsb
import numpy as np
from scipy.stats import linregress
import zipfile
import requests

In [2]:
def get_values(file, country, variable, fuel, col_start, col_end):
    """Reads fuel variable in multiple sheets 2002-2018.
    Sums the values across multiple columns if relevant.
    Returns: dict
    """
    trajectory = {}
    for year in range(2002,2019):
        df = pd.read_excel(
            file,
            engine='pyxlsb',
            sheet_name=str(year),
            skiprows=[0,1,2,3],
            index_col=1
            )
        try:
            trajectory[(country, year, fuel)] = df.loc[variable, col_start:col_end].sum()
        except TypeError:
            trajectory[(country, year, fuel)] = pd.to_numeric(df.loc[variable, col_start:col_end], errors='coerce').sum()
    return trajectory


In [3]:
tj_ktoe = 41.868

In [4]:
url = 'https://ec.europa.eu/eurostat/documents/38154/4956218/Energy-Balances-April-2020-edition.zip/69da6e9f-bf8f-cd8e-f4ad-50b52f8ce616'

r = requests.get(url)
with open('eurostat_balances_2020.zip', 'wb') as f:
    f.write(r.content)

In [3]:
def read_archive(file, var, fuel_key, col_start, col_end):
    d = {}

    countries = ('CZ', 'AT', 'DK', 'NL', 'PL', 'SK')

    with zipfile.ZipFile(file, 'r') as myzip:
        for name in myzip.namelist():
            if name.startswith(countries):
                with myzip.open(name) as myfile:
                    country = name[:2]
                    data = get_values(myfile.read(), country, var, fuel_key, col_start, col_end)
                    d.update(data)
    return d

In [4]:
# All variables of interest
all_data = {}

variables = [
    'Primary production',
    'Imports',
    'Exports',
    'Gross inland consumption',
    ]

fuel_tuples = [
    ('total','Total', 'Total',),
    ('renewables', 'Renewables and biofuels', 'Renewables and biofuels'),
    ('bioenergy', 'Bioenergy', 'Bioenergy',),
    ('solid_biomass', 'Primary solid biofuels', 'Primary solid biofuels'),
    ('biofuels', 'Pure biogasoline', 'Other liquid biofuels'),
    ('biogas', 'Biogases', 'Biogases'),
    ('ren_mun_waste', 'Renewable municipal waste', 'Renewable municipal waste'),
    ]

for variable in variables:
    for fuel_key, start, end in fuel_tuples:
        data = read_archive('eurostat_balances_2020.zip', variable, fuel_key, start, end)
        all_data.setdefault(variable.lower().replace(' ','_'), {}).update(data)

df = pd.DataFrame(all_data)
df.index.names = ['country', 'year', 'fuel']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,primary_production,imports,exports,gross_inland_consumption
country,year,fuel,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AT,2002,total,9831.767,24645.177,3538.177,31002.109
AT,2003,total,9483.763,27068.006,3881.256,32638.250
AT,2004,total,9842.878,28084.148,4434.439,33249.096
AT,2005,total,9888.261,29620.112,4932.991,34376.159
AT,2006,total,10326.256,30536.606,5451.498,34687.504
...,...,...,...,...,...,...
SK,2014,ren_mun_waste,11.584,0.000,0.000,11.584
SK,2015,ren_mun_waste,14.928,0.000,0.000,14.928
SK,2016,ren_mun_waste,19.466,0.000,0.000,19.466
SK,2017,ren_mun_waste,28.518,0.000,0.000,28.518


In [5]:
df.to_csv(
    'bioenergy_2002_2018.csv',
    decimal=',',
    )

In [24]:
df.loc[pd.IndexSlice['CZ', 2002:2018], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gross_inland_consumption
country,year,fuel,Unnamed: 3_level_1
CZ,2002,bioenergy,1734.145
CZ,2002,biofuels,64.512
CZ,2002,solid_biomass,1584.265
CZ,2002,total,42937.716
CZ,2003,bioenergy,1702.880
CZ,...,...,...
CZ,2017,total,43464.680
CZ,2018,bioenergy,3915.220
CZ,2018,biofuels,308.708
CZ,2018,solid_biomass,2980.827
