# Import required libraries

In [242]:
import pandas as pd
import altair as alt

# disable max number of rows (5000) so we can plot all rows
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Trasform data (merge rows by year+month)

In [243]:
# we will restrict countries to Europe region only
european_countries = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Hungary', 'Iceland', 'Ireland', 'Italy', 'Latvia', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Ukraine']

In [244]:
country_vaccinations = dict()
with open("/Users/alexey/Downloads/archive/country_vaccinations.csv") as f:
    # skip header
    next(f)
    for line in f:
        line = line.rstrip()
        data = line.split(",")
        # only keep year and month
        date_formatted = "-".join(data[2].split("-")[:-1])
        # add country name
        date_formatted = f"{data[0]}+{date_formatted}"
        # setup data structure that will accumulate all required statistics
        country_vaccinations.setdefault(date_formatted, {'tvph': 0, 'pvph': 0, 'pfvph': 0})
        try:
            country_vaccinations[date_formatted]['tvph'] += float(data[8])
        except ValueError:
            pass
        try:
            country_vaccinations[date_formatted]['pvph'] += float(data[9])
        except ValueError:
            pass
        try:
            country_vaccinations[date_formatted]['pfvph'] += float(data[10])
        except ValueError:
            pass
# generate new file with merged rows
output = open("country_vaccinations_collapsed.csv", 'w')
output.write("Country,Date,Total Vaccinations Per Hundred,People Vaccinated Per Hundred,People Fully Vaccinated Per Hundred\n")
for k, v in country_vaccinations.items():
    country, date = k.split("+")
    if country not in european_countries:
        continue
    output.write(f"{country},{date},{v['tvph']},{v['pvph']},{v['pfvph']}\n")
output.close()

In [245]:
country_vaccinations_by_manufacturer = dict()
with open("/Users/alexey/Downloads/archive/country_vaccinations_by_manufacturer.csv") as f:
    next(f)
    for line in f:
        line = line.rstrip()
        data = line.split(",")
        date_formatted = "-".join(data[1].split("-")[:-1])
        date_formatted = f"{data[0]}+{data[2]}+{date_formatted}"
        country_vaccinations_by_manufacturer.setdefault(date_formatted, 0)
        country_vaccinations_by_manufacturer[date_formatted] += int(data[3])
output = open("country_vaccinations_by_manufacturer_collapsed.csv", 'w')
output.write("Country,Date,Vaccine,Total Vaccinations\n")
for k, v in country_vaccinations_by_manufacturer.items():
    country, vaccine, date = k.split("+")
    if country not in european_countries:
        continue
    output.write(f"{country},{date},{vaccine},{v}\n")
output.close()

# Import data (transformed - with rows merged by year+month)

In [246]:
data = pd.read_csv("country_vaccinations_collapsed.csv")
data.head()

Unnamed: 0,Country,Date,Total Vaccinations Per Hundred,People Vaccinated Per Hundred,People Fully Vaccinated Per Hundred
0,Austria,2021-01,6.96,6.65,0.31
1,Austria,2021-02,23.68,15.08,8.58
2,Austria,2021-03,55.71,40.75,14.96
3,Austria,2021-04,142.68,102.35,40.35
4,Austria,2021-05,197.94,143.26,55.61


In [247]:
data2 = pd.read_csv("country_vaccinations_by_manufacturer_collapsed.csv")
data2.head()

Unnamed: 0,Country,Date,Vaccine,Total Vaccinations
0,Austria,2021-01,Johnson&Johnson,4
1,Austria,2021-01,Moderna,4788
2,Austria,2021-01,Novavax,0
3,Austria,2021-01,Oxford/AstraZeneca,0
4,Austria,2021-01,Pfizer/BioNTech,624094


In [249]:
dropdown = alt.binding_select (options=data["Country"].unique(), name="Select a country:")
selection = alt.selection(type="single", fields=["Country"], bind=dropdown)
base =  alt.Chart(data)
tv = base.mark_bar().encode(
    x = "Date",
    y = "Total Vaccinations Per Hundred",
    color=alt.Color("Country", legend=None),
    tooltip=["Country"],
    opacity=alt.condition(selection,alt.value(1),alt.value(.2))
).add_selection(selection).interactive()

pv = base.mark_bar().encode(
    x = "Date",
    y = "People Vaccinated Per Hundred"
).transform_filter(selection)

pfv = base.mark_bar().encode(
    x = "Date",
    y = "People Fully Vaccinated Per Hundred"
).transform_filter(selection)

base2 =  alt.Chart(data2)
total_vaccination = base2.mark_bar().encode(
    x = "Date",
    y = "Total Vaccinations",
    color=alt.Color("Country", legend=None),
    tooltip=["Country"],
    opacity=alt.condition(selection,alt.value(1),alt.value(.2))
).add_selection(selection).interactive()
vaccine_type = base2.mark_bar().encode(
    x = "Date",
    y = "Total Vaccinations",
    color="Vaccine",
    tooltip=["Vaccine"]
).transform_filter(selection).interactive()

(tv | pv | pfv) & (total_vaccination | vaccine_type)