#### Resources:

- [5 Wrong Ways to Do Covid-19 Data Smoothing](https://towardsdatascience.com/five-wrong-ways-to-do-covid-19-data-smoothing-1538db6ff182)


In [1]:
# import the libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# read the csv file and put it into a dataframe

df = pd.read_csv('owid-covid-data.csv')
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-01-03,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
1,AFG,Asia,Afghanistan,2020-01-04,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
2,AFG,Asia,Afghanistan,2020-01-05,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
3,AFG,Asia,Afghanistan,2020-01-06,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
4,AFG,Asia,Afghanistan,2020-01-07,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,


In [2]:
# shape of the dataframe

df.shape

(355536, 67)

In [4]:
# data types of the columns

df.dtypes

iso_code                                    object
continent                                   object
location                                    object
date                                        object
total_cases                                float64
                                            ...   
population                                 float64
excess_mortality_cumulative_absolute       float64
excess_mortality_cumulative                float64
excess_mortality                           float64
excess_mortality_cumulative_per_million    float64
Length: 67, dtype: object

In [8]:
# columns in the dataframe

df.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'total_boosters',
       'new_vaccinations', 'new_vaccinations_smoothed',
       't

In [7]:
# data description

df.describe()

Unnamed: 0,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
count,317456.0,345855.0,344596.0,295737.0,345907.0,344677.0,317456.0,345855.0,344596.0,295737.0,...,204077.0,135084.0,243473.0,327130.0,267294.0,355536.0,12211.0,12211.0,12211.0,12211.0
mean,6796974.0,9457.2,9491.605,86668.38,84.23524,84.532007,103457.466834,143.042496,143.560684,879.848094,...,32.9098,50.790062,3.097263,73.712023,0.722445,128493200.0,53121.68,9.760347,11.337701,1675.39835
std,41238930.0,109419.4,93742.32,442644.4,608.98649,554.835927,152731.556394,1154.807442,595.505275,1105.379761,...,13.573743,31.956205,2.548317,7.394807,0.148967,660850700.0,149448.0,12.293895,25.166908,1941.471481
min,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.7,1.188,0.1,53.28,0.394,47.0,-37726.1,-44.23,-95.92,-2752.9248
25%,8285.0,0.0,0.286,128.0,0.0,0.0,2646.23475,0.0,0.03,61.388,...,22.6,20.859,1.3,69.59,0.602,449002.0,121.65,1.43,-1.59,73.764453
50%,71436.0,1.0,23.571,1341.0,0.0,0.143,29017.293,0.088,6.143,384.979,...,33.1,49.839,2.5,75.05,0.74,5882259.0,5968.999,8.1,5.72,1115.9562
75%,766734.0,251.0,482.143,11988.0,3.0,4.857,136995.77025,33.849,79.974,1374.303,...,41.3,82.502,4.2,79.46,0.829,28301700.0,37707.31,15.38,16.3,2746.7037
max,771820200.0,8401960.0,6402036.0,6978162.0,27939.0,14821.857,737554.506,228872.025,37241.781,6511.885,...,78.1,100.0,13.8,86.75,0.957,7975105000.0,1289776.0,76.62,377.63,10292.916


In [9]:
# check for missing values

df.isnull().sum()

iso_code                                        0
continent                                   16928
location                                        0
date                                            0
total_cases                                 38080
                                            ...  
population                                      0
excess_mortality_cumulative_absolute       343325
excess_mortality_cumulative                343325
excess_mortality                           343325
excess_mortality_cumulative_per_million    343325
Length: 67, dtype: int64

In [11]:
# pairplot

sns.pairplot(df)

KeyboardInterrupt: 

In [None]:
# pairplot

sns.pairplot(df, hue='continent', palette='viridis', diag_kind='bar', height=2.5, aspect=1.5, diag_kws={'alpha':0.6}, plot_kws={'alpha':0.6}, corner=True, markers='o')