## From Excel to Python

In [1]:
import numpy as np
import pandas as pd

In [2]:
file = "data/country_vaccinations.csv"
df = pd.read_csv(file, decimal=',')

In [3]:
df.columns

Index(['country', 'iso_code', 'date', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated',
       'daily_vaccinations_raw', 'daily_vaccinations',
       'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred',
       'people_fully_vaccinated_per_hundred', 'daily_vaccinations_per_million',
       'vaccines', 'source_name', 'source_website'],
      dtype='object')

In [4]:
df['total_vaccinations'].isnull().sum() / df['total_vaccinations'].isnull().count()

0.4382888640357885

In [5]:
df['total_vaccinations'].isnull().mean()

0.4382888640357885

In [6]:
df['total_vaccinations'].fillna(0.0, inplace=True)

In [7]:
df.dtypes

country                                object
iso_code                               object
date                                   object
total_vaccinations                     object
people_vaccinated                      object
people_fully_vaccinated                object
daily_vaccinations_raw                 object
daily_vaccinations                     object
total_vaccinations_per_hundred         object
people_vaccinated_per_hundred          object
people_fully_vaccinated_per_hundred    object
daily_vaccinations_per_million         object
vaccines                               object
source_name                            object
source_website                         object
dtype: object

In [13]:
df['total_vaccinations'] = df['total_vaccinations'].astype('float')

In [14]:
df.dtypes

country                                 object
iso_code                                object
date                                    object
total_vaccinations                     float64
people_vaccinated                       object
people_fully_vaccinated                 object
daily_vaccinations_raw                  object
daily_vaccinations                      object
total_vaccinations_per_hundred          object
people_vaccinated_per_hundred           object
people_fully_vaccinated_per_hundred     object
daily_vaccinations_per_million          object
vaccines                                object
source_name                             object
source_website                          object
dtype: object

In [16]:
df = df.astype('float', errors='ignore')
df

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2021-02-23,0.0,,,,1367.0,,,,35.0,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2021-02-24,0.0,,,,1367.0,,,,35.0,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2021-02-25,0.0,,,,1367.0,,,,35.0,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2021-02-26,0.0,,,,1367.0,,,,35.0,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25031,Zimbabwe,ZWE,2021-06-10,1089506.0,690452.0,399054.0,5523.0,5857.0,7.33,4.65,2.68,394.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1404519668...
25032,Zimbabwe,ZWE,2021-06-11,1096854.0,691251.0,405603.0,7348.0,5802.0,7.38,4.65,2.73,390.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1404519668...
25033,Zimbabwe,ZWE,2021-06-12,1103398.0,692180.0,411218.0,6544.0,5921.0,7.42,4.66,2.77,398.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1404519668...
25034,Zimbabwe,ZWE,2021-06-13,1107058.0,693147.0,413911.0,3660.0,5564.0,7.45,4.66,2.78,374.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1404519668...


In [17]:
df.fillna(0.00, inplace=True)
df

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,0,0,0,0.0,0.0,0,0,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2021-02-23,0.0,0,0,0,1367.0,0,0,0,35.0,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2021-02-24,0.0,0,0,0,1367.0,0,0,0,35.0,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2021-02-25,0.0,0,0,0,1367.0,0,0,0,35.0,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2021-02-26,0.0,0,0,0,1367.0,0,0,0,35.0,"BBIBP-CorV, Oxford/AstraZeneca, Pfizer/BioNTech",World Health Organization,https://covid19.who.int/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25031,Zimbabwe,ZWE,2021-06-10,1089506.0,690452.0,399054.0,5523.0,5857.0,7.33,4.65,2.68,394.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1404519668...
25032,Zimbabwe,ZWE,2021-06-11,1096854.0,691251.0,405603.0,7348.0,5802.0,7.38,4.65,2.73,390.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1404519668...
25033,Zimbabwe,ZWE,2021-06-12,1103398.0,692180.0,411218.0,6544.0,5921.0,7.42,4.66,2.77,398.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1404519668...
25034,Zimbabwe,ZWE,2021-06-13,1107058.0,693147.0,413911.0,3660.0,5564.0,7.45,4.66,2.78,374.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1404519668...


In [21]:
df = df.apply(pd.to_numeric, errors='ignore', downcast='float')
df.dtypes

country                                 object
iso_code                                object
date                                    object
total_vaccinations                     float32
people_vaccinated                      float32
people_fully_vaccinated                float32
daily_vaccinations_raw                 float32
daily_vaccinations                     float32
total_vaccinations_per_hundred         float32
people_vaccinated_per_hundred          float32
people_fully_vaccinated_per_hundred    float32
daily_vaccinations_per_million         float32
vaccines                                object
source_name                             object
source_website                          object
dtype: object