# Global Covid Vaccination Rates

# 1.Import Libraries and Data

#### Import Libraries

In [7]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import scipy

#### Import Data

In [8]:
## create a path

path =  r"C:\Users\sahin\Documents\Country_Vaccinations"

In [9]:
## import data

df = pd.read_csv(os.path.join(path, "02-Data", "Original Data", "county-vaccinations.csv"), index_col = False)

In [10]:
##print head

df.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2/22/2021,0.0,0.0,,,,0.0,0.0,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2/23/2021,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2/24/2021,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2/25/2021,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2/26/2021,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/


In [11]:
##print tail

df.tail()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
81971,Zimbabwe,ZWE,3/3/2022,7921113.0,4372925.0,3406482.0,10373.0,8903.0,52.48,28.97,22.57,590.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
81972,Zimbabwe,ZWE,3/4/2022,7930621.0,4374896.0,3408609.0,9508.0,8603.0,52.55,28.99,22.59,570.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
81973,Zimbabwe,ZWE,3/5/2022,7936145.0,4377373.0,3410340.0,5524.0,8458.0,52.58,29.0,22.6,560.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
81974,Zimbabwe,ZWE,3/6/2022,7938362.0,4378029.0,3410960.0,2217.0,8017.0,52.6,29.01,22.6,531.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
81975,Zimbabwe,ZWE,3/7/2022,7943325.0,4379875.0,3412556.0,4963.0,7482.0,52.63,29.02,22.61,496.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...


In [12]:
##print shape

df.shape

(81976, 15)

# 2.Consistency Checks and Cleaning

### Dropping Columns

In [13]:
##dropping unnecessary columns: iso_code

df = df.drop(columns = ['iso_code'])

In [14]:
##dropping unnecessary columns: source_name

df = df.drop(columns = ['source_name'])

In [15]:
##dropping unnecessary columns: source_website

df = df.drop(columns = ['source_website'])

In [16]:
##print head

df.head()

Unnamed: 0,country,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines
0,Afghanistan,2/22/2021,0.0,0.0,,,,0.0,0.0,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi..."
1,Afghanistan,2/23/2021,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi..."
2,Afghanistan,2/24/2021,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi..."
3,Afghanistan,2/25/2021,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi..."
4,Afghanistan,2/26/2021,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi..."


In [17]:
##print shape

df.shape

(81976, 12)

### Checking for data types

In [18]:
## check for mixed data types

for col in df.columns.tolist():
  weird = (df[[col]].applymap(type) != df[[col]].iloc[0].apply(type)).any(axis = 1)
  if len (df[weird]) > 0:
    print (col)

#### No mixed data types

In [19]:
## check data types

df.dtypes

country                                 object
date                                    object
total_vaccinations                     float64
people_vaccinated                      float64
people_fully_vaccinated                float64
daily_vaccinations_raw                 float64
daily_vaccinations                     float64
total_vaccinations_per_hundred         float64
people_vaccinated_per_hundred          float64
people_fully_vaccinated_per_hundred    float64
daily_vaccinations_per_million         float64
vaccines                                object
dtype: object

### Missing Values

In [20]:
## finding missing values

df.isnull().sum()

country                                    0
date                                       0
total_vaccinations                     40103
people_vaccinated                      42338
people_fully_vaccinated                44857
daily_vaccinations_raw                 47943
daily_vaccinations                       279
total_vaccinations_per_hundred         40103
people_vaccinated_per_hundred          42338
people_fully_vaccinated_per_hundred    44857
daily_vaccinations_per_million           279
vaccines                                   0
dtype: int64

In [21]:
## Imputing missing values with "0"

df.fillna(0, inplace =True)

In [22]:
df.isnull().sum()

country                                0
date                                   0
total_vaccinations                     0
people_vaccinated                      0
people_fully_vaccinated                0
daily_vaccinations_raw                 0
daily_vaccinations                     0
total_vaccinations_per_hundred         0
people_vaccinated_per_hundred          0
people_fully_vaccinated_per_hundred    0
daily_vaccinations_per_million         0
vaccines                               0
dtype: int64

### Duplicates

In [23]:
df[df.duplicated()]

Unnamed: 0,country,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines


#### No duplicates

### Consistency

In [24]:
pd.options.display.max_rows = None

In [25]:
#### country names

df['country'].value_counts()

Norway                              460
Latvia                              459
United States                       450
Canada                              449
Russia                              448
Denmark                             448
China                               448
Israel                              444
Switzerland                         441
Liechtenstein                       441
Qatar                               441
Chile                               438
Mexico                              438
Lithuania                           436
Slovenia                            436
Czechia                             436
Germany                             436
Italy                               436
Estonia                             436
Hungary                             435
Poland                              435
Greece                              435
France                              435
Portugal                            435
Romania                             435


#### All country names looks correct and the format looks consistent

In [26]:
#### date

df['date'].value_counts()

8/28/2021     220
8/17/2021     220
8/26/2021     220
8/25/2021     220
8/24/2021     220
8/23/2021     220
8/22/2021     220
8/21/2021     220
8/20/2021     220
8/18/2021     220
8/16/2021     220
8/29/2021     220
8/15/2021     220
8/14/2021     220
8/13/2021     220
8/12/2021     220
8/11/2021     220
8/10/2021     220
8/9/2021      220
8/8/2021      220
8/27/2021     220
8/19/2021     220
7/30/2021     219
7/24/2021     219
7/29/2021     219
8/30/2021     219
8/31/2021     219
9/1/2021      219
7/16/2021     219
7/17/2021     219
7/18/2021     219
7/19/2021     219
7/20/2021     219
7/21/2021     219
7/22/2021     219
7/23/2021     219
7/25/2021     219
7/26/2021     219
7/27/2021     219
7/28/2021     219
8/7/2021      219
8/6/2021      219
8/5/2021      219
8/4/2021      219
8/3/2021      219
8/2/2021      219
8/1/2021      219
7/31/2021     219
7/13/2021     218
6/21/2021     218
7/15/2021     218
7/14/2021     218
6/22/2021     218
7/12/2021     218
7/11/2021     218
6/23/2021 

### Basic Descriptive Statistics

In [27]:
df.describe().to_clipboard()

In [28]:
df.shape

(81976, 12)

### Export Data

In [29]:
df.to_csv(os.path.join(path, "02-Data", "Prepared", "country-vaccinations-cleaned.csv"))