In [1]:
import gc
import os
from pathlib import Path
import random
import sys

from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import scipy as sp


import matplotlib.pyplot as plt
import seaborn as sns

from IPython.core.display import display, HTML

# --- plotly ---
from plotly import tools, subplots
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
import plotly.io as pio
pio.templates.default = "plotly_dark"

# --- models ---
from sklearn import preprocessing
from sklearn.model_selection import KFold
import lightgbm as lgb
import xgboost as xgb
import catboost as cb

# --- setup ---
pd.set_option('max_columns', 50)

In [2]:
vaccination = pd.read_csv('../input/covid-world-vaccination-progress/country_vaccinations.csv')
vaccination

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.00,0.00,,,Oxford/AstraZeneca,Government of Afghanistan,https://reliefweb.int/report/afghanistan/afgha...
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,35.0,Oxford/AstraZeneca,Government of Afghanistan,https://reliefweb.int/report/afghanistan/afgha...
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,35.0,Oxford/AstraZeneca,Government of Afghanistan,https://reliefweb.int/report/afghanistan/afgha...
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,35.0,Oxford/AstraZeneca,Government of Afghanistan,https://reliefweb.int/report/afghanistan/afgha...
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,35.0,Oxford/AstraZeneca,Government of Afghanistan,https://reliefweb.int/report/afghanistan/afgha...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14225,Zimbabwe,ZWE,2021-04-23,365985.0,319732.0,46253.0,14186.0,8755.0,2.46,2.15,0.31,589.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1387135728...
14226,Zimbabwe,ZWE,2021-04-24,372099.0,324488.0,47611.0,6114.0,8600.0,2.50,2.18,0.32,579.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1387135728...
14227,Zimbabwe,ZWE,2021-04-25,390018.0,336600.0,53418.0,17919.0,10796.0,2.62,2.26,0.36,726.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1387135728...
14228,Zimbabwe,ZWE,2021-04-26,411610.0,353834.0,57776.0,21592.0,13517.0,2.77,2.38,0.39,909.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1387135728...


In [3]:
europe_country_list =list([
    'Austria','Belgium','Bulgaria','Czechia','Denmark','France','Germany','Greece',
    'Italy','Luxembourg','Lithuania','Norway','Poland','Portugal','Romania','Slovakia','Slovenia',
    'Spain', 'United Kingdom', 'Russia', 'Serbia', 'Ukraine'])

vaccination = vaccination[vaccination['country'].isin(europe_country_list)]
vaccination.shape

(2585, 15)

In [4]:
vaccination = vaccination.drop(['iso_code', 'source_name', 'source_website'], axis = 1).reset_index()
vaccination

Unnamed: 0,index,country,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines
0,692,Austria,2020-12-27,732.0,732.0,,,,0.01,0.01,,,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ..."
1,693,Austria,2020-12-28,2031.0,2031.0,,1299.0,1299.0,0.02,0.02,,144.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ..."
2,694,Austria,2020-12-29,3651.0,3651.0,,1620.0,1460.0,0.04,0.04,,162.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ..."
3,695,Austria,2020-12-30,5003.0,5003.0,,1352.0,1424.0,0.06,0.06,,158.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ..."
4,696,Austria,2020-12-31,5011.0,5011.0,,8.0,1070.0,0.06,0.06,,119.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2580,13708,United Kingdom,2021-04-22,45012308.0,33388637.0,11623671.0,562056.0,500934.0,66.31,49.18,17.12,7379.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech"
2581,13709,United Kingdom,2021-04-23,45613109.0,33524423.0,12088686.0,600801.0,496017.0,67.19,49.38,17.81,7307.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech"
2582,13710,United Kingdom,2021-04-24,46309512.0,33673190.0,12636322.0,696403.0,504206.0,68.22,49.60,18.61,7427.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech"
2583,13711,United Kingdom,2021-04-25,46650008.0,33752885.0,12897123.0,340496.0,509360.0,68.72,49.72,19.00,7503.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech"


In [5]:
vcc_all = vaccination.drop(['vaccines'], axis = 1)
vcc_all = vcc_all.groupby(['date']).sum()
vcc_all.reset_index(inplace=True)
vcc_all

Unnamed: 0,date,index,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million
0,2020-12-15,10845,28500.0,28500.0,0.0,0.0,0.0,0.02,0.02,0.00,0.0
1,2020-12-16,10846,0.0,0.0,0.0,0.0,3357.0,0.00,0.00,0.00,23.0
2,2020-12-17,10847,0.0,0.0,0.0,0.0,3357.0,0.00,0.00,0.00,23.0
3,2020-12-18,10848,0.0,0.0,0.0,0.0,3357.0,0.00,0.00,0.00,23.0
4,2020-12-19,10849,0.0,0.0,0.0,0.0,3357.0,0.00,0.00,0.00,23.0
...,...,...,...,...,...,...,...,...,...,...,...
129,2021-04-23,176097,178678253.0,128386932.0,50295190.0,3501740.0,2957607.0,591.19,420.69,170.52,100892.0
130,2021-04-24,176119,156527782.0,111451445.0,45082148.0,2707878.0,3028411.0,540.55,382.23,158.32,101679.0
131,2021-04-25,176141,177487799.0,127524561.0,50005726.0,1267949.0,2985132.0,557.81,402.62,155.31,100550.0
132,2021-04-26,176163,175563635.0,125186333.0,50444263.0,2261828.0,3022095.0,612.82,434.86,178.10,99973.0


In [6]:

vaccination.fillna(method='ffill', inplace=True)
countries = vaccination.groupby('country')['total_vaccinations'].max().sort_values(ascending=False)[:10].index.tolist()

line_plots = []
for c in countries:
    vacc_data = vaccination[vaccination.country == c]
    line_plots.append(
        go.Scatter(
            name = c,
            x = vacc_data.date,
            mode='lines',
            y=vacc_data['total_vaccinations'],
        )
    )
    
fig = go.Figure(line_plots)
fig.update_layout(
    title ="Rate of vaccinations for the top-10 vaccinated countries",
    yaxis_title="Count",
    hovermode='x',
    legend_orientation = 'h',

)


fig.show()

In [7]:

fig = px.line(vaccination, x="date", y="people_vaccinated_per_hundred", color='country', 
              title="date vs people_vaccinated_per_hundred")
fig.show()

In [8]:
# Grouping data by vaccines
vaccines = vaccination.groupby(['country', 'vaccines']).count().reset_index()[['country', 'vaccines']]\
                  .groupby('vaccines').count()['country'].reset_index().sort_values('country', ascending=False)

# Select only the required columns
vaccines.columns = ['Vaccines', 'Number of countries used']

fig = px.bar(vaccines, x='Vaccines', y= 'Number of countries used')
fig.show()