## Cleaning Eurostat data
### Let's start with electricity import and export per country

In [72]:
import pandas as pd

df= pd.read_csv('Electricity import export transformation per country.tsv', sep='\t')
df = df.set_index(df.columns[0])
df = df.transpose()
df.index.name = 'date'

#display(df)

#We will keep just the columns with import, export and electricity available to market
df.rename(columns=lambda col: col.replace('AIM', 'Electricity available to market (GWh)'), inplace=True)
df.rename(columns=lambda col: col.replace('IMP,', 'Electricity imported (GWh)'), inplace=True)
df.rename(columns=lambda col: col.replace('EXP,', 'Electricity exported (GWh)'), inplace=True)
df = df.loc[:,~df.columns.str.endswith('20')]
df = df.loc[:,df.columns.str.contains('GWh')]

# Multiindexing for date and country

## Mapping dictionary for European countries and adjacent countries
country_mapping = {
    'AL': 'Albania',
    'AT': 'Austria',
    'BA': 'Bosnia and Herzegovina',
    'BE': 'Belgium',
    'BG': 'Bulgaria',
    'CH': 'Switzerland',
    'CY': 'Cyprus',
    'CZ': 'Czech Republic',
    'DE': 'Germany',
    'DK': 'Denmark',
    'EE': 'Estonia',
    'EL': 'Greece',
    'ES': 'Spain',
    'FI': 'Finland',
    'FR': 'France',
    'HR': 'Croatia',
    'HU': 'Hungary',
    'IE': 'Ireland',
    'IS': 'Iceland',
    'IT': 'Italy',
    'LI': 'Liechtenstein',
    'LT': 'Lithuania',
    'LU': 'Luxembourg',
    'LV': 'Latvia',
    'ME': 'Montenegro',
    'MK': 'North Macedonia',
    'MT': 'Malta',
    'NL': 'Netherlands',
    'NO': 'Norway',
    'PL': 'Poland',
    'PT': 'Portugal',
    'RO': 'Romania',
    'RU': 'Russia',
    'RS': 'Serbia',
    'SE': 'Sweden',
    'SI': 'Slovenia',
    'SK': 'Slovakia',
    'TR': 'Turkey',
    'UK': 'United Kingdom',
    'XK': 'Kosovo',
    'UA': 'Ukraine',
    'GE': 'Georgia',
    'MD': 'Moldova'
}

ref = [col.split(',')[-1][-2:] for col in df.columns]
for i in range(0,len(ref)):
    df.rename(columns=lambda col: col.replace(ref[i], country_mapping.get(ref[i])), inplace=True)
display(df)

#Giving better names
df.rename(columns=lambda col: col.replace('M,', ''), inplace=True)
df.rename(columns=lambda col: col.replace('E7000,GWH,', ''), inplace=True)

display(df)


"freq,nrg_bal,siec,unit,geo\TIME_PERIOD","M,Electricity available to market (GWh),E7000,GWH,Albania","M,Electricity available to market (GWh),E7000,GWH,Austria","M,Electricity available to market (GWh),E7000,GWH,Bosnia and Herzegovina","M,Electricity available to market (GWh),E7000,GWH,Belgium","M,Electricity available to market (GWh),E7000,GWH,Bulgaria","M,Electricity available to market (GWh),E7000,GWH,Cyprus","M,Electricity available to market (GWh),E7000,GWH,Czech Republic","M,Electricity available to market (GWh),E7000,GWH,Germany","M,Electricity available to market (GWh),E7000,GWH,Denmark","M,Electricity available to market (GWh),E7000,GWH,Estonia",...,"M,Electricity imported (GWh)E7000,GWH,Portugal","M,Electricity imported (GWh)E7000,GWH,Romania","M,Electricity imported (GWh)E7000,GWH,Serbia","M,Electricity imported (GWh)E7000,GWH,Sweden","M,Electricity imported (GWh)E7000,GWH,Slovenia","M,Electricity imported (GWh)E7000,GWH,Slovakia","M,Electricity imported (GWh)E7000,GWH,Turkey","M,Electricity imported (GWh)E7000,GWH,Ukraine","M,Electricity imported (GWh)E7000,GWH,United Kingdom","M,Electricity imported (GWh)E7000,GWH,Kosovo"
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01,:,6187.000,:,8400.000,3808.000,436.000,6352.000,44608.000,3430.000,864.000,...,894.000,90.000,:,840.000,587.000,1153.000,:,:,1021.000,:
2008-02,:,5725.000,:,7932.000,3281.000,393.000,5878.000,43957.000,3149.000,775.000,...,858.000,65.000,:,894.000,553.000,995.000,:,:,902.000,:
2008-03,:,5878.000,:,8047.000,2923.000,321.000,6019.000,42685.000,3190.000,816.000,...,1079.000,60.000,:,933.000,674.000,724.000,:,:,782.000,:
2008-04,:,5438.000,:,7547.000,2593.000,304.000,5492.000,44500.000,2932.000,704.000,...,959.000,45.000,:,379.000,575.000,736.000,:,:,1012.000,:
2008-05,:,5218.000,:,7222.000,2449.000,338.000,5111.000,41890.000,2830.000,637.000,...,854.000,58.000,:,351.000,456.000,431.000,:,:,1437.000,:
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10,485.513,4963.020,858.918,5824.031,2265.471,403.583,4673.827,35980.840,2906.386,717.900,...,1043.157,976.000,459.279,402.000,961.574,1191.000,279.445,:,:,243.327
2023-11,608.443,5420.151,928.415,6230.851,2659.730,367.448,5097.823,37787.840,3109.731,772.500,...,619.015,1084.000,385.535,594.000,862.748,905.000,479.960,:,:,:
2023-12,725.075,5689.853,1045.975,6392.196,3104.017,379.353,5266.225,36524.621,3355.682,835.400,...,1124.188,1015.000,494.669,1227.000,900.495,1394.000,579.964,:,:,:
2024-01,736.456,5979.092,1103.611,7028.775,3407.924,411.326,5855.079,39635.236,3484.259,918.300,...,886.930,886.000,354.739,945.000,:,1120.000,133.485,:,:,:


"freq,nrg_bal,siec,unit,geo\TIME_PERIOD","Electricity available to market (GWh),Albania","Electricity available to market (GWh),Austria","Electricity available to market (GWh),Bosnia and Herzegovina","Electricity available to market (GWh),Belgium","Electricity available to market (GWh),Bulgaria","Electricity available to market (GWh),Cyprus","Electricity available to market (GWh),Czech Republic","Electricity available to market (GWh),Germany","Electricity available to market (GWh),Denmark","Electricity available to market (GWh),Estonia",...,Electricity imported (GWh)Portugal,Electricity imported (GWh)Romania,Electricity imported (GWh)Serbia,Electricity imported (GWh)Sweden,Electricity imported (GWh)Slovenia,Electricity imported (GWh)Slovakia,Electricity imported (GWh)Turkey,Electricity imported (GWh)Ukraine,Electricity imported (GWh)United Kingdom,Electricity imported (GWh)Kosovo
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01,:,6187.000,:,8400.000,3808.000,436.000,6352.000,44608.000,3430.000,864.000,...,894.000,90.000,:,840.000,587.000,1153.000,:,:,1021.000,:
2008-02,:,5725.000,:,7932.000,3281.000,393.000,5878.000,43957.000,3149.000,775.000,...,858.000,65.000,:,894.000,553.000,995.000,:,:,902.000,:
2008-03,:,5878.000,:,8047.000,2923.000,321.000,6019.000,42685.000,3190.000,816.000,...,1079.000,60.000,:,933.000,674.000,724.000,:,:,782.000,:
2008-04,:,5438.000,:,7547.000,2593.000,304.000,5492.000,44500.000,2932.000,704.000,...,959.000,45.000,:,379.000,575.000,736.000,:,:,1012.000,:
2008-05,:,5218.000,:,7222.000,2449.000,338.000,5111.000,41890.000,2830.000,637.000,...,854.000,58.000,:,351.000,456.000,431.000,:,:,1437.000,:
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10,485.513,4963.020,858.918,5824.031,2265.471,403.583,4673.827,35980.840,2906.386,717.900,...,1043.157,976.000,459.279,402.000,961.574,1191.000,279.445,:,:,243.327
2023-11,608.443,5420.151,928.415,6230.851,2659.730,367.448,5097.823,37787.840,3109.731,772.500,...,619.015,1084.000,385.535,594.000,862.748,905.000,479.960,:,:,:
2023-12,725.075,5689.853,1045.975,6392.196,3104.017,379.353,5266.225,36524.621,3355.682,835.400,...,1124.188,1015.000,494.669,1227.000,900.495,1394.000,579.964,:,:,:
2024-01,736.456,5979.092,1103.611,7028.775,3407.924,411.326,5855.079,39635.236,3484.259,918.300,...,886.930,886.000,354.739,945.000,:,1120.000,133.485,:,:,:
