In [1]:
# Example 1: learning about data collect using a real Kaggle dataset.
# Dataset: https://www.kaggle.com/gpreda/covid-world-vaccination-progress (Data Update: 2021/09/21)
# Author: Humberto Bianchini

In [2]:
# 1) Importing all necessary libraries.
import pandas as pd
import os

In [3]:
# 2) Reading and showing the first rows of the dataset.
dataframe = pd.read_csv('datasets/country_vaccinations.csv')
dataframe.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...


In [4]:
# 3) Data analysis

In [5]:
# Checking data referring to Brazil (descending order per day)
dataframe.query('country == "Brazil"').sort_values('date', ascending=False)[:2]

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
6089,Brazil,BRA,2021-09-20,222478696.0,146421254.0,80596815.0,156042.0,1410161.0,103.97,68.42,37.66,6590.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",Ministry of Health,https://qsprod.saude.gov.br/extensions/DEMAS_C...
6088,Brazil,BRA,2021-09-19,222322654.0,146354111.0,80507122.0,1725658.0,1681917.0,103.89,68.39,37.62,7860.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",Ministry of Health,https://qsprod.saude.gov.br/extensions/DEMAS_C...


In [6]:
# Checking the number of different countries in the dataset
dataframe['country'].nunique()

222

In [7]:
# Last update date for each country
dataframe.groupby('country')['date'].max().sort_values(ascending=False)

country
Zimbabwe            2021-09-20
Lithuania           2021-09-20
Isle of Man         2021-09-20
Israel              2021-09-20
Italy               2021-09-20
                       ...    
Kuwait              2021-08-14
Niue                2021-08-02
Saint Helena        2021-05-05
Falkland Islands    2021-04-14
Turkmenistan        2021-04-04
Name: date, Length: 222, dtype: object

In [8]:
# Total vaccinations for each country
immunized_per_country = dataframe.groupby('country')['total_vaccinations'].max().sort_values(ascending=False)
immunized = immunized_per_country.to_frame().reset_index()
immunized

Unnamed: 0,country,total_vaccinations
0,China,2.180986e+09
1,India,8.122325e+08
2,United States,3.862379e+08
3,Brazil,2.224787e+08
4,Japan,1.531187e+08
...,...,...
217,Falkland Islands,4.407000e+03
218,Montserrat,2.856000e+03
219,Niue,2.352000e+03
220,Tokelau,9.680000e+02


In [9]:
# Top 10 countries by total vaccinations
immunized.sort_values('total_vaccinations', ascending=False).head(10)

Unnamed: 0,country,total_vaccinations
0,China,2180986000.0
1,India,812232500.0
2,United States,386237900.0
3,Brazil,222478700.0
4,Japan,153118700.0
5,Indonesia,124882400.0
6,Germany,105834300.0
7,Turkey,105611300.0
8,Mexico,95271400.0
9,United Kingdom,93059140.0


In [10]:
# Checking the most used vaccines by country
vaccines_per_country = dataframe[['country', 'date', 'vaccines']]
vaccines_per_country.groupby('vaccines')['country'].nunique().sort_values(ascending=False)

vaccines
Oxford/AstraZeneca                                                            35
Johnson&Johnson, Moderna, Oxford/AstraZeneca, Pfizer/BioNTech                 23
Oxford/AstraZeneca, Sinopharm/Beijing                                         16
Moderna, Oxford/AstraZeneca, Pfizer/BioNTech                                  15
Oxford/AstraZeneca, Pfizer/BioNTech                                           13
                                                                              ..
Moderna, Oxford/AstraZeneca                                                    1
Medigen, Moderna, Oxford/AstraZeneca                                           1
Johnson&Johnson, Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac, Sputnik V     1
Johnson&Johnson, Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac                  1
Abdala, Soberana02                                                             1
Name: country, Length: 68, dtype: int64

In [11]:
# Checking which country has the highest number of people vaccinated per day
dataframe.groupby('country')['daily_vaccinations'].max().sort_values(ascending=False)

country
China            22424286.0
India             9340631.0
United States     3384387.0
Brazil            2045834.0
Japan             1997542.0
                    ...    
Niue                   87.0
Montserrat             53.0
Tokelau                23.0
Pitcairn                1.0
Turkmenistan            NaN
Name: daily_vaccinations, Length: 222, dtype: float64

In [12]:
# Checking the average vaccination by country
average_c = dataframe.groupby('country')['daily_vaccinations'].mean().sort_values(ascending=False)
average_c = average_c.to_frame().reset_index
average_c

<bound method DataFrame.reset_index of                    daily_vaccinations
country                              
China                    7.772743e+06
India                    3.156185e+06
United States            1.400649e+06
Brazil                   8.878472e+05
Japan                    7.002678e+05
...                               ...
Wallis and Futuna        5.373714e+01
Tokelau                  1.866071e+01
Montserrat               1.338914e+01
Pitcairn                 5.119048e-01
Turkmenistan                      NaN

[222 rows x 1 columns]>