In [1]:
import altair as alt
from vega_datasets import data
import pandas as pd
import numpy as np

In [2]:
def convert_time_period(row):
    year, half = row.split('-')
    if half == 'S1':
        return pd.Timestamp(f'{year}-01-01')  # First half of the year
    else:
        return pd.Timestamp(f'{year}-07-01')  # Second half of the year

In [3]:
# FINAL 1
# show how electricity prices went up over time
electricity_prices = pd.read_csv("nrg_pc_204_page_linear.csv")
electricity_prices = electricity_prices.drop(columns = ["DATAFLOW", "LAST UPDATE",\
                                                        "freq", "product", "nrg_cons",\
                                                        "unit", "tax", "currency", \
                                                        "OBS_FLAG"])

# apply the conversion to the TIME_PERIOD column
electricity_prices['TIME_PERIOD_dt'] = electricity_prices['TIME_PERIOD'].apply(convert_time_period)

# filter for the EU
electricity_prices_eu = electricity_prices[electricity_prices['geo'] == 'EU27_2020']

# make line chart with modifications
# https://altair-viz.github.io/user_guide/customization.html
chart = alt.Chart(electricity_prices_eu,
        title = alt.Title(
            ['Price of Electricity', 'in the European Union (2016-2014)'],
            subtitle = ['Prices given in Purchasing Power Standard', 'which is a standardised form of currency'],
            subtitleFontSize=10)
    ).mark_line().encode(
    x = alt.X('TIME_PERIOD_dt:T', title = 'Time'),
    y = alt.Y('OBS_VALUE:Q', title = 'Purchasing Power Standar'),
    color = alt.Color("geo:N", title = None,
                      scale = alt.Scale(domain = ['EU27_2020'],
                                        range=['#134226'])).legend(None)
)

# create vertical line
# https://github.com/vega/altair/issues/2379
line1 = alt.Chart(pd.DataFrame({'TIME_PERIOD_dt': ['2022-01-01']})).mark_rule(color='#f3892c').encode(
    x = 'TIME_PERIOD_dt:T'
)

# add text for the vertical line
text11 = alt.Chart(pd.DataFrame({
    'TIME_PERIOD_dt': ['2022-01-01'],
    'label': ['Russian sanctions']
})).mark_text(
    align='left', 
    dx=5, 
    dy=-55, 
).encode(
    x='TIME_PERIOD_dt:T',
    text='label:N'
)
text12 = alt.Chart(pd.DataFrame({
    'TIME_PERIOD_dt': ['2022-01-01'],
    'label': ['implemented']
})).mark_text(
    align='left', 
    dx=5, 
    dy=-45, 
).encode(
    x='TIME_PERIOD_dt:T',
    text='label:N'
)

# add dot to graph
# https://altair-viz.github.io/user_guide/marks/point.html
dot_data1 = pd.DataFrame()
dot_data1['time'] = ['2022-01-01']
dot_data1['value'] = 0.255

dot1 = alt.Chart(dot_data1).mark_point(filled=True, color = '#f3892c', size = 100).encode(
    x="time:T",
    y="value:Q"
)

# create vertical line
line12 = alt.Chart(pd.DataFrame({'TIME_PERIOD_dt': ['2020-06-01']})).mark_rule(color='#f3892c').encode(
    x = 'TIME_PERIOD_dt:T'
)

# add text for the vertical line
text13 = alt.Chart(pd.DataFrame({
    'TIME_PERIOD_dt': ['2020-06-01'],
    'label': ['COVID-19']
})).mark_text(
    align='right', 
    dx=-5, 
    dy=-43, 
).encode(
    x='TIME_PERIOD_dt:T',
    text='label:N'
)

dot_data12 = pd.DataFrame()
dot_data12['time'] = ['2020-06-01']
dot_data12['value'] = 0.214

dot12 = alt.Chart(dot_data12).mark_point(filled=True, color = '#f3892c', size = 100).encode(
    x="time:T",
    y="value:Q"
)

chart1 = chart + line1 + text11 + text12 + dot1 + line12 + text13 + dot12

# create standardized title format
# https://www.geeksforgeeks.org/increase-font-size-of-chart-title-in-altair/
chart1 = chart1.configure_title(
    fontSize=17,
    anchor = "start",
    offset = 5
)

chart1

In [4]:
# FINAL 2
# gas consumption in the EU 2016-2024
gas_consumption = pd.read_csv('gas_consumption.csv')

# make line chart with modifications
# https://altair-viz.github.io/user_guide/customization.html
chart = alt.Chart(gas_consumption,
        title = alt.Title(
            ["Gas Consumption", "in the European Union (2016-2024)"])
    ).mark_line().encode(
    x = alt.X('TIME_PERIOD:T', title = 'Time'),
    y = alt.Y('OBS_VALUE:Q', title = 'Terajoule'),
    color = alt.Color("geo:N", title = None,
                      scale = alt.Scale(domain = ['EU27_2020'],
                                        range=['#134226'])).legend(None)
).properties(
    width = 400,
    height = 200
)

# make trend line
trend_line = alt.Chart(gas_consumption).transform_loess(
    'TIME_PERIOD', 'OBS_VALUE', groupby=['geo'], bandwidth=0.5
).mark_line(color='#feb99a').encode(
    x='TIME_PERIOD:T',
    y='OBS_VALUE:Q'
)

# create vertical line
line2 = alt.Chart(pd.DataFrame({'TIME_PERIOD_dt': ['2022-06-06']})).mark_rule(color='#f3892c').encode(
    x = 'TIME_PERIOD_dt:T'
)

# add text for the vertical line
text21 = alt.Chart(pd.DataFrame({
    'TIME_PERIOD_dt': ['2022-06-06'],
    'label': ['Russian sanctions']
})).mark_text(
    align='left', 
    dx=5, 
    dy=-60, 
).encode(
    x='TIME_PERIOD_dt:T',
    text='label:N'
)
text22 = alt.Chart(pd.DataFrame({
    'TIME_PERIOD_dt': ['2022-06-06'],
    'label': ['implemented']
})).mark_text(
    align='left', 
    dx=5, 
    dy=-50, 
).encode(
    x='TIME_PERIOD_dt:T',
    text='label:N'
)

# create COVID vertical line
# create vertical line
line22 = alt.Chart(pd.DataFrame({'TIME_PERIOD_dt': ['2020-03-01']})).mark_rule(color='#f3892c').encode(
    x = 'TIME_PERIOD_dt:T'
)

# add text for the vertical line
text23 = alt.Chart(pd.DataFrame({
    'TIME_PERIOD_dt': ['2020-03-01'],
    'label': ['COVID-19']
})).mark_text(
    align='left', 
    dx=5, 
    dy=-83, 
).encode(
    x='TIME_PERIOD_dt:T',
    text='label:N'
)


# create first dot for legend
dot21_data = pd.DataFrame()
dot21_data['time'] = ['2024-08']
dot21_data['value'] = int(gas_consumption.loc[(gas_consumption['geo'] == 'EU27_2020') \
                                              & (gas_consumption['TIME_PERIOD'] == '2024-08')]['OBS_VALUE'])

dot21 = alt.Chart(dot21_data).mark_circle(filled=True, color = '#134226', size = 100).encode(
    x="time:T",
    y="value:Q"
)

dot21_label = dot21.mark_text(align="left", dx=8, dy=1, fontSize = 8).encode(
    text=alt.value("Total Consumption")
)

# create second dot
dot22_data = pd.DataFrame()
dot22_data['time'] = ['2024-08']
dot22_data['value'] = 900000

dot22 = alt.Chart(dot22_data).mark_circle(filled=True, color = '#feb99a', size = 100).encode(
    x="time:T",
    y="value:Q"
)

dot22_label = dot22.mark_text(align="left", dx=8, dy=1, fontSize = 8).encode(
    text=alt.value("Adjusted for Seasonality")
)

# combine charts
# https://github.com/vega/altair/issues/2146
chart2 = alt.layer(chart, trend_line, line2, text21, text22, dot21, dot21_label, dot22, \
                   dot22_label, text23, line22).resolve_scale(
    color='independent'
)

# format the title to be uniform
chart2 = chart2.configure_title(
    fontSize=17,
    anchor = "start",
    offset = 15
)

chart2

  dot21_data['value'] = int(gas_consumption.loc[(gas_consumption['geo'] == 'EU27_2020') \


In [5]:
# FINAL 3
# Share of russian imports of total consumption
import_russia = pd.read_csv("russia_gas_2021.csv")
consumption = pd.read_csv("consumption_2021.csv")

# create new table with ratio
import_ratio = pd.DataFrame()
import_ratio["country"] = import_russia["geo"]
import_ratio["ratio"] = (import_russia["OBS_VALUE"] / consumption["OBS_VALUE"]) * 100
import_ratio = import_ratio[import_ratio["country"] != "EU27_2020"]

# create country id column for visualization
# https://github.com/vega/altair/issues/2044
# https://en.wikipedia.org/wiki/ISO_3166-1_numeric
import_ratio["id"] = ["040", "056", "100", "196", "203", "276", "208", "233", "300", "724", "246", "250", "191", \
                      "348", "372", "380", "440", "442", "428", "470", "528", "616", "620", "642", "752", "705", \
                        "703"]

# create buckets for better interpretation
# https://stackoverflow.com/questions/45273731/binning-a-column-with-pandas

bins = [0, 20, 40, 60, 80, 100]
labels = ['0-20%', '21-40%', '41-60%', '61-80%', '81-100%']
import_ratio['bucket'] = pd.cut(import_ratio['ratio'], bins=bins, labels=labels, include_lowest=True)

# create map visualization
# https://altair-viz.github.io/altair-tutorial/notebooks/09-Geographic-plots.html
data_map = alt.topo_feature(data.world_110m.url, 'countries')

foreground = alt.Chart(data_map).mark_geoshape().encode(
    tooltip = [
        alt.Tooltip('properties.name:N', title='Country'),
        alt.Tooltip('ratio:Q', title = 'Ratio (%)', format = '.1f'),
        alt.Tooltip('bucket:N', title='Bucket')  
    ],
    # change n/a countries' color to grey
    # https://stackoverflow.com/questions/55229651/dealing-with-missing-values-nulls-in-altair-choropleth-map
    color = alt.Color('bucket:N', title = 'Percent', 
                      legend = alt.Legend(offset=10, titleColor='white'),
                      sort = ['81-100%', '61-80%', '41-60%', '21-40%', '0-20%'],
                      scale = alt.Scale(domain = ['81-100%', '61-80%', '41-60%', '21-40%', '0-20%'],
                                        range = ['#57560e', '#8c6c12', '#be7b14', '#f3892c','#feb99a']))
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(import_ratio, 'id', ['ids', 'ratio', 'bucket'])
).properties(
    width=400,
    height=300,
    title = ["Share of Russian Gas", "in Total EU Gas Consumption (2021)"]
# get proper map of Europe
# https://stackoverflow.com/questions/61135952/vega-lite-altair-how-to-center-or-crop-a-map-of-europe
).project(
    type='mercator',
    scale= 350,                       
    center= [20,50],                     
    clipExtent= [[0, 0], [400, 300]],
# remove null 
).transform_filter(
    'isValid(datum.bucket)'
)

# make layered graph to show null values
# https://stackoverflow.com/questions/55229651/dealing-with-missing-values-nulls-in-altair-choropleth-map
background = alt.Chart(data_map).mark_geoshape(
        fill='lightgrey',
        stroke='white'
    ).project(
        type= 'mercator',
        scale= 350,                          # Magnify
        center= [20,50],                     # [lon, lat]
        clipExtent= [[0, 0], [400, 300]],    # [[left, top], [right, bottom]]
    ).properties(
        width=400, height=300
    )

# format the title to be uniform
chart_3 = background + foreground
chart_3 = chart_3.configure_title(
    fontSize=17,
    anchor = "start",
    offset = 15
)

chart_3

In [7]:
# FINAL 4
# creating the dataset

# create electricity generation pie charts
electricity_generation = pd.read_csv('electricity_generation.csv')

# filter for EU countries
electricity_generation = electricity_generation[electricity_generation['EU'] == 1]

# drop unneccesary columns
electricity_generation = electricity_generation.drop(columns = ['Area type', \
                                                                'Continent', \
                                                                'Ember region', \
                                                                'EU', 'OECD', \
                                                                'G20', 'G7', \
                                                                'ASEAN', 'Category', \
                                                                'YoY absolute change', \
                                                                f'YoY % change'])

# create 2021 and 2024 datasets
electricity_generation_2021 = electricity_generation[electricity_generation['Date'].str.contains('2021')]
electricity_generation_2024 = electricity_generation[electricity_generation['Date'].str.contains('2024')]

# filter for relevant data
electricity_generation_2021 = electricity_generation_2021.loc[(electricity_generation_2021['Subcategory'] == \
                                                               'Fuel') & (electricity_generation_2021['Unit'] == 'TWh')]
electricity_generation_2024 = electricity_generation_2024.loc[(electricity_generation_2024['Subcategory'] == \
                                                               'Fuel') & (electricity_generation_2024['Unit'] == 'TWh')]

# sum up the energy generated for the entire EU
# https://stackoverflow.com/questions/40553002/group-by-two-columns-to-get-sum-of-another-column
electricity_europe_2024 = electricity_generation_2024.groupby(['Variable'], as_index=False)['Value'].sum()
electricity_europe_2021 = electricity_generation_2021.groupby(['Variable'], as_index=False)['Value'].sum()

# create other energy row
other_2021 = electricity_europe_2021[electricity_europe_2021['Variable'].isin(['Other Renewables', \
                                                                               'Other Fossil'])]['Value'].sum()
other_2024 = electricity_europe_2024[electricity_europe_2024['Variable'].isin(['Other Renewables', \
                                                                               'Other Fossil'])]['Value'].sum()

electricity_europe_2021 = electricity_europe_2021.loc[(electricity_europe_2021['Variable'] != \
                                                       'Other Renewables') & (electricity_europe_2021['Variable'] != \
                                                        'Other Fossil')]
electricity_europe_2024 = electricity_europe_2024.loc[(electricity_europe_2024['Variable'] != \
                                                       'Other Renewables') & (electricity_europe_2024['Variable'] != \
                                                        'Other Fossil')]

# append the new "Other" row 
# https://www.geeksforgeeks.org/how-to-add-one-row-in-an-existing-pandas-dataframe/
electricity_europe_2021 = electricity_europe_2021._append({'Variable': 'Other', 'Value': other_2021}, ignore_index=True)
electricity_europe_2024 = electricity_europe_2024._append({'Variable': 'Other', 'Value': other_2024}, ignore_index=True)


In [8]:
# modify dataframe and add column to sort by
electricity_europe_2021["Order"] = [4, 1, 2, 5, 3, 6, 7, 8]
# https://github.com/vega/altair/issues/3069
# define a color scale for the "Variable" categories
color_scale_2021 = alt.Scale(
    domain=["Coal", "Gas", "Nuclear", "Bioenergy",  "Hydro", "Solar", "Wind", "Other"],
    range=['#8c6c12', '#f3892c', '#feb99a', '#134226', '#8fc6fa', '#1D435E', '#BBDFC5', '#808080']
)

# base chart
# https://github.com/vega/altair/issues/3069
base_2021 = alt.Chart(electricity_europe_2021).encode(
    theta=alt.Theta("Value:Q", type = "quantitative", stack=True),
    color=alt.Color("Variable:N",
                    legend=alt.Legend(
                        title = None,
                        orient = "right",
                    ),
                    scale=color_scale_2021
    ),
    tooltip=alt.Tooltip("Value:Q", title="Value"),
    order = "Order"
).properties(
    title=alt.Title(text="Share of Energy Sources in the EU (2021)", offset=10),
    height=200
)

# pie chart
# https://github.com/vega/altair/issues/3069
pie_2021 = base_2021.mark_arc(innerRadius=50)

# Text labels showing percentage
# https://github.com/vega/altair/issues/3069 
text_2021 = base_2021.mark_text(
    radius=120, size=12
).transform_joinaggregate(
    total_value='sum(Value)'
).transform_calculate(
    percentage="datum.Value / datum.total_value"
).encode(
    text=alt.Text('percentage:Q', format='.2%')
)

# Combine pie chart and text labels
chart_4 = pie_2021 + text_2021

# format the title to be uniform
chart_4 = chart_4.configure_title(
    fontSize=17,
    anchor = "start",
    offset = 15
)

chart_4

In [9]:
# FINAL 5
# modify dataframe and add column to sort by
electricity_europe_2024["Order"] = [4, 1, 2, 5, 3, 6, 7, 8]

# https://github.com/vega/altair/issues/3069
# Define a color scale for the "Variable" categories
color_scale_2024 = alt.Scale(
    domain=["Coal", "Gas", "Nuclear", "Bioenergy",  "Hydro", "Solar", "Wind", "Other"],
    range=['#8c6c12', '#f3892c', '#feb99a', '#134226', '#8fc6fa', '#1D435E', '#BBDFC5', '#808080']
)

# Base chart
base_2024 = alt.Chart(electricity_europe_2024).encode(
    theta=alt.Theta("Value:Q", type = "quantitative", stack=True),
    color=alt.Color("Variable:N",
                    legend=alt.Legend(
                        title = None,
                        orient = "right",
                    ),
                    scale=color_scale_2024 
    ),
    tooltip=alt.Tooltip("Value:Q", title="Value"),
    order = "Order"
).properties(
    title=alt.Title(text="Share of Energy Sources in the EU (2024)", offset=10),
    height=200
)

# Pie chart
pie_2024 = base_2024.mark_arc(innerRadius=50)

# Text labels showing percentage
# https://github.com/vega/altair/issues/3069 
text_2024 = base_2024.mark_text(
    radius=120, size=12
).transform_joinaggregate(
    total_value='sum(Value)'
).transform_calculate(
    percentage="datum.Value / datum.total_value"
).encode(
    text=alt.Text('percentage:Q', format='.2%')
)

# Combine pie chart and text labels
chart_5 = pie_2024 + text_2024

# format the title to be uniform
chart_5 = chart_5.configure_title(
    fontSize=17,
    anchor = "start",
    offset = 15
)

chart_5

In [10]:
# FINAL 6
# bubble chart for change in share of renewable energy

# get share of renewable by country for 2021
renewable_2021 = electricity_generation_2021[electricity_generation_2021['Variable'].isin(['Bioenergy', 'Solar', 'Wind', 'Other Renewables'])]
renewable_country_2021 = renewable_2021.groupby(['Country code'], as_index=False)['Value'].sum()
renewable_country_2021_total = electricity_generation_2021.groupby(['Country code'], as_index=False)['Value'].sum()
renewable_country_2021.loc[renewable_country_2021['Country code'] == 'Malta', 'Value'] = 0.0001
renewable_country_2021['Total'] = renewable_country_2021_total['Value']
renewable_country_2021['Percent'] = (renewable_country_2021['Value'] / renewable_country_2021['Total']) * 100

# same for 2024
renewable_2024 = electricity_generation_2024[electricity_generation_2024['Variable'].isin(['Bioenergy', 'Solar', 'Wind', 'Other Renewables'])]
renewable_country_2024 = renewable_2024.groupby(['Country code'], as_index=False)['Value'].sum()
renewable_country_2024_total = electricity_generation_2024.groupby(['Country code'], as_index=False)['Value'].sum()
renewable_country_2024['Total'] = renewable_country_2024_total['Value']
renewable_country_2024['Percent'] = (renewable_country_2024['Value'] / renewable_country_2024['Total']) * 100

# change by country
change = pd.DataFrame()
change['Country'] = renewable_country_2024['Country code']
change['Change'] = (renewable_country_2024['Percent'] - renewable_country_2021['Percent']) / renewable_country_2021['Percent']
# take out Malta because it goes to infinity
change = change[change['Country'] != 'MLT']

change['Order'] = [2,10,18,22,26,20,6,4,13,3,17,19,11,5,14,16,1,12,8,23,15,25,24,9,21,7]
change['2021'] = renewable_country_2021['Percent']
change['2024'] = renewable_country_2024['Percent']
change

Unnamed: 0,Country,Change,Order,2021,2024
0,AUT,0.622359,2,15.888011,25.776065
1,BEL,0.693865,10,19.364909,32.801532
2,BGR,1.730218,18,6.51801,17.795591
3,CYP,0.53109,22,15.051546,23.045267
4,CZE,0.524462,26,9.798898,14.938045
5,DEU,0.469573,20,36.800506,54.081039
6,DNK,0.187083,6,68.556543,81.382289
7,ESP,0.30154,4,36.143799,47.042616
8,EST,0.883717,13,26.712329,50.318471
9,FIN,0.175777,3,28.86243,33.935782


In [11]:
# map full names of countries to the country codes
country_code_to_name = {
    "AUT": "Austria",
    "BEL": "Belgium",
    "BGR": "Bulgaria",
    "CYP": "Cyprus",
    "CZE": "Czech Republic",
    "DEU": "Germany",
    "DNK": "Denmark",
    "ESP": "Spain",
    "EST": "Estonia",
    "FIN": "Finland",
    "FRA": "France",
    "GRC": "Greece",
    "HRV": "Croatia",
    "HUN": "Hungary",
    "IRL": "Ireland",
    "ITA": "Italy",
    "LTU": "Lithuania",
    "LUX": "Luxembourg",
    "LVA": "Latvia",
    "NLD": "Netherlands",
    "POL": "Poland",
    "PRT": "Portugal",
    "ROU": "Romania",
    "SVK": "Slovakia",
    "SVN": "Slovenia",
    "SWE": "Sweden"
}
change["Country_Name"] = change["Country"].map(country_code_to_name)

# reformat the dataset for the dot range plot
change_reformat = change.melt(id_vars=["Country_Name"], value_vars=["2021", "2024"], 
                    var_name="Year", value_name="Value")

# Define the Altair chart
base = alt.Chart(change_reformat).encode(
    y=alt.Y('Country_Name:N', 
    sort=alt.EncodingSortField(field="Order", order='descending'), title=None)
)

# Line connecting the points from 2021 to 2024
line = base.mark_line(color='#134226').encode(
    x=alt.X('Value:Q', scale=alt.Scale(domain=[0, 85], nice = False), title='Value'),
    detail='Country_Name:N'
)

# Points for 2021 and 2024 values
points = base.mark_point(size=100, filled=True).encode(
    x='Value:Q',
    color=alt.Color('Year:O', scale=alt.Scale(domain=["2021", "2024"], range=['#99c2a4', '#134226']))
)

# Combine line and points
chart_6 = (line + points).properties(
    title = ["Share of Renewable Energy in Total", "Energy Production (2021 to 2024)"],
    width=300,
    height=600
)

# format the title to be uniform
chart_6 = chart_6.configure_title(
    fontSize=17,
    anchor = "start",
    offset = 15
)
chart_6

In [12]:
# FINAL 7
# amount of natural gas import by partner country 
import_2021 = pd.read_csv('import_country_2021.csv')
import_2024 = pd.read_csv('import_country_2024.csv')

# add azerbaijan
azer_2021 = pd.read_csv("azerbaijan_2021.csv")
azer_2024 = pd.read_csv("azerbaijan_2024.csv")
azer_2021 = azer_2021.loc[azer_2021["partner"] == "AZ"]
import_2021 = import_2021._append(azer_2021)
azer_2024["partner"] = "AZ"
import_2024 = import_2024._append(azer_2024)

# group the data by import country
import_2024 = import_2024.groupby('partner', as_index=False)['OBS_VALUE'].sum()

# group the two datasets together
import_2024['TIME_PERIOD'] = ['2024', '2024', '2024', '2024', '2024', '2024', '2024', '2024']
imports = import_2021
imports = imports._append(import_2024)

# calculate other imports from Total
imports = imports._append({'partner': 'Other', 'OBS_VALUE': 32749.24, 'TIME_PERIOD': 2021}, ignore_index=True)
imports = imports._append({'partner': 'Other', 'OBS_VALUE': 44052.02, 'TIME_PERIOD': '2024'}, ignore_index=True)
imports = imports.loc[imports['partner'] != 'TOTAL']

country_names = {'RU' : 'Russia', 'NO':'Norway', 'DZ':'Algeria', 'UK': 'United Kingdom', 'US':'United States', \
                 'QA':'Qatar', 'Other':'Other', 'AZ':'Azerbaijan'}
imports = imports.replace({'partner':country_names})

# https://altair-viz.github.io/gallery/grouped_bar_chart.html
# https://stackoverflow.com/questions/78219650/custom-ordering-in-a-grouped-bar-chart-not-applied-in-altair
chart_7 = alt.Chart(imports).mark_bar().encode(
    x=alt.X(
        'TIME_PERIOD:O',
            axis=alt.Axis(labelAngle=-0, title=None,)
        ),
    y= alt.Y(
        'OBS_VALUE:Q',
        title = 'Million cubic meters'),
    color = alt.Color('TIME_PERIOD:N', title = None,
                      scale = alt.Scale(domain = [2021, '2024'],
                                        range = ['#8c6c12', '#f3892c'])).legend(None),
    column=alt.Column(
        'partner:N',
        header=alt.Header(title=None, labelOrient='bottom'),
        sort = ['Russia', 'Norway', 'Algeria', 'United States', 'Qatar', 'Azerbaijan', 'United Kingdom', 'Other']
    ),
    xOffset='year:O',
).properties(
    title = ['Imports of Natural Gas to the European Union','by Partner Country']
)

# format the title to be uniform
chart_7 = chart_7.configure_title(
    fontSize=17,
    anchor = "start",
    offset = 15
)
chart_7

In [13]:
# gas flow by various channels
route = pd.read_csv('gas_lines.csv', sep = ';')
route

# https://altair-viz.github.io/gallery/simple_stacked_area_chart.html
# format the data so it can work as a stacked chart
# get only nordstream data
nordstream = pd.DataFrame()
nordstream['Time'] = route['Year']
nordstream['Volume'] = route['Nordstream']
nordstream['Pipeline'] = 'Nord Stream'

# get only ukraine data
ukraine = pd.DataFrame()
ukraine['Time'] = route['Year']
ukraine['Volume'] = route['Ukraine Gas Transit']
ukraine['Pipeline'] = 'Ukraine Gas Transit'

# get only yamal data
yamal = pd.DataFrame()
yamal['Time'] = route['Year']
yamal['Volume'] = route['Yamal']
yamal['Pipeline'] = 'Yamal'

# get only turk data
turk = pd.DataFrame()
turk['Time'] = route['Year']
turk['Volume'] = route['Turkstream']
turk['Pipeline'] = 'TurkStream'

# combine the datasets into the pipelines data
pipelines = nordstream._append(ukraine)
pipelines = pipelines._append(yamal)
pipelines = pipelines._append(turk)

# fill any nas
pipelines['Volume'] = pipelines['Volume'].fillna(method='ffill')
pipelines['Volume'] = pipelines['Volume'].interpolate(method='linear')

# convert the time variable
pipelines['Time'] = pd.to_datetime(pipelines['Time'], dayfirst=True)

  pipelines['Volume'] = pipelines['Volume'].fillna(method='ffill')


In [14]:
# FINAL 8
stacked = alt.Chart(pipelines).mark_area().encode(
    # set a tick every year
    # https://stackoverflow.com/questions/70875690/change-date-axis-ticks-in-altair-to-show-years
    x=alt.X("Time:T", axis=alt.Axis(
        tickCount = "year")),
    y=alt.Y("Volume:Q", title = 'Million cubic meters per week'),
    color=alt.Color("Pipeline:N",
                    scale = alt.Scale(range = ['#8c6c12', '#be7b14', '#f3892c', '#feb99a'],
                                      domain = ['Nord Stream', 'TurkStream', 'Ukraine Gas Transit',
                                                'Yamal']))
).properties(
    title = 'Russian gas exports to the EU by Pipeline (2021-2024)',
    height = 200,
    width = 500
)

# create vertical line
line_81 = alt.Chart(pd.DataFrame({'Time': ['2022-09-26']})).mark_rule(color='#f3892c').encode(
    x = 'Time:T'
)

# create vertical line
line_82 = alt.Chart(pd.DataFrame({'Time': ['2022-06-06']})).mark_rule(color='#f3892c').encode(
    x = 'Time:T'
)

# add text for the vertical line
text_81 = alt.Chart(pd.DataFrame({
    'Time': ['2022-09-26'],
    'label': ['Nord Stream pipeline']
})).mark_text(
    align='left', 
    dx=5, 
    dy=-5, 
).encode(
    x='Time:T',
    text='label:N'
)
text_82 = alt.Chart(pd.DataFrame({
    'Time': ['2022-09-26'],
    'label': ['blown up']
})).mark_text(
    align='left', 
    dx=5, 
    dy=5, 
).encode(
    x='Time:T',
    text='label:N'
)

# add text for the vertical line
text_83 = alt.Chart(pd.DataFrame({
    'Time': ['2022-06-06'],
    'label': ['Russian sanctions']
})).mark_text(
    align='right', 
    dx=-5, 
    dy=-75, 
).encode(
    x='Time:T',
    text='label:N'
)
text_84 = alt.Chart(pd.DataFrame({
    'Time': ['2022-06-06'],
    'label': ['implemented']
})).mark_text(
    align='right', 
    dx=-5, 
    dy=-65, 
).encode(
    x='Time:T',
    text='label:N'
)

# combine chart
chart_8 = line_81 + line_82 + stacked + text_81 + text_82 + text_83 + text_84

# format the title to be uniform
chart_8 = chart_8.configure_title(
    fontSize=17,
    anchor = "start",
    offset = 15
)
chart_8