# World covid-19 analysis for the year 2023
#### World Health Organization (WHO) Website [Link](https://covid19.who.int/data)
#### Source of data [Link](https://covid19.who.int/WHO-COVID-19-global-data.csv)

In [1]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "https://img.freepik.com/premium-vector/covid19-coronavirus-banner-design-world-health-organization-who-new-official-name-coronavirus_35947-437.jpg?w=1380")

In [2]:
# Importing libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
pd.options.mode.chained_assignment = None

Metadata

In [135]:
# Importing Data

url="https://covid19.who.int/WHO-COVID-19-global-data.csv"
urldf=pd.read_csv(url,delimiter=",")

In [136]:
df=urldf.copy()

### 1. Exploring Data

In [137]:
df.head()

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0
1,2020-01-04,AF,Afghanistan,EMRO,0,0,0,0
2,2020-01-05,AF,Afghanistan,EMRO,0,0,0,0
3,2020-01-06,AF,Afghanistan,EMRO,0,0,0,0
4,2020-01-07,AF,Afghanistan,EMRO,0,0,0,0


In [138]:
df.shape
f"{df.shape[0]} Rows by {df.shape[1]} Columns"

'331800 Rows by 8 Columns'

In [139]:
df.describe()

Unnamed: 0,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
count,331800.0,331800.0,331800.0,331800.0
mean,2325.737,1518453.0,21.027797,18345.86
std,38702.92,6883106.0,147.262467,76553.02
min,-65079.0,0.0,-3520.0,0.0
25%,0.0,2664.75,0.0,21.0
50%,0.0,37503.0,0.0,409.0
75%,146.0,439900.2,1.0,5874.25
max,6966046.0,103436800.0,11447.0,1138309.0


In [140]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 331800 entries, 0 to 331799
Data columns (total 8 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   Date_reported      331800 non-null  object
 1   Country_code       330400 non-null  object
 2   Country            331800 non-null  object
 3   WHO_region         331800 non-null  object
 4   New_cases          331800 non-null  int64 
 5   Cumulative_cases   331800 non-null  int64 
 6   New_deaths         331800 non-null  int64 
 7   Cumulative_deaths  331800 non-null  int64 
dtypes: int64(4), object(4)
memory usage: 20.3+ MB


### 2. Cleaning up the Data!

In [141]:
df

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0
1,2020-01-04,AF,Afghanistan,EMRO,0,0,0,0
2,2020-01-05,AF,Afghanistan,EMRO,0,0,0,0
3,2020-01-06,AF,Afghanistan,EMRO,0,0,0,0
4,2020-01-07,AF,Afghanistan,EMRO,0,0,0,0
...,...,...,...,...,...,...,...,...
331795,2023-10-29,ZW,Zimbabwe,AFRO,0,265821,0,5720
331796,2023-10-30,ZW,Zimbabwe,AFRO,0,265821,0,5720
331797,2023-10-31,ZW,Zimbabwe,AFRO,0,265821,0,5720
331798,2023-11-01,ZW,Zimbabwe,AFRO,0,265821,0,5720


In [142]:
df.rename(columns=str.lower,inplace=True)
df.sample()

Unnamed: 0,date_reported,country_code,country,who_region,new_cases,cumulative_cases,new_deaths,cumulative_deaths
2343,2022-08-03,AL,Albania,EURO,716,312097,2,3545


In [143]:
df=df.rename(columns=({"date_reported":"date","who_region":"region"}))
df

Unnamed: 0,date,country_code,country,region,new_cases,cumulative_cases,new_deaths,cumulative_deaths
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0
1,2020-01-04,AF,Afghanistan,EMRO,0,0,0,0
2,2020-01-05,AF,Afghanistan,EMRO,0,0,0,0
3,2020-01-06,AF,Afghanistan,EMRO,0,0,0,0
4,2020-01-07,AF,Afghanistan,EMRO,0,0,0,0
...,...,...,...,...,...,...,...,...
331795,2023-10-29,ZW,Zimbabwe,AFRO,0,265821,0,5720
331796,2023-10-30,ZW,Zimbabwe,AFRO,0,265821,0,5720
331797,2023-10-31,ZW,Zimbabwe,AFRO,0,265821,0,5720
331798,2023-11-01,ZW,Zimbabwe,AFRO,0,265821,0,5720


In [144]:
filtercolumns=["date","country","region","new_cases","cumulative_cases","new_deaths","cumulative_deaths"]
df=df.loc[:,filtercolumns]
df

Unnamed: 0,date,country,region,new_cases,cumulative_cases,new_deaths,cumulative_deaths
0,2020-01-03,Afghanistan,EMRO,0,0,0,0
1,2020-01-04,Afghanistan,EMRO,0,0,0,0
2,2020-01-05,Afghanistan,EMRO,0,0,0,0
3,2020-01-06,Afghanistan,EMRO,0,0,0,0
4,2020-01-07,Afghanistan,EMRO,0,0,0,0
...,...,...,...,...,...,...,...
331795,2023-10-29,Zimbabwe,AFRO,0,265821,0,5720
331796,2023-10-30,Zimbabwe,AFRO,0,265821,0,5720
331797,2023-10-31,Zimbabwe,AFRO,0,265821,0,5720
331798,2023-11-01,Zimbabwe,AFRO,0,265821,0,5720


In [145]:
df["region"]=df.region.replace({"AFRO":"Africa","AMRO":"America","EMRO":"Eastern Mediterranean","EURO":"Europe","SEARO":"Asia","WPRO":"Western Pacific"})
df

Unnamed: 0,date,country,region,new_cases,cumulative_cases,new_deaths,cumulative_deaths
0,2020-01-03,Afghanistan,Eastern Mediterranean,0,0,0,0
1,2020-01-04,Afghanistan,Eastern Mediterranean,0,0,0,0
2,2020-01-05,Afghanistan,Eastern Mediterranean,0,0,0,0
3,2020-01-06,Afghanistan,Eastern Mediterranean,0,0,0,0
4,2020-01-07,Afghanistan,Eastern Mediterranean,0,0,0,0
...,...,...,...,...,...,...,...
331795,2023-10-29,Zimbabwe,Africa,0,265821,0,5720
331796,2023-10-30,Zimbabwe,Africa,0,265821,0,5720
331797,2023-10-31,Zimbabwe,Africa,0,265821,0,5720
331798,2023-11-01,Zimbabwe,Africa,0,265821,0,5720


In [148]:
filter=df.region!="Other"
df=df.loc[filter]
df

Unnamed: 0,date,country,region,new_cases,cumulative_cases,new_deaths,cumulative_deaths
0,2020-01-03,Afghanistan,Eastern Mediterranean,0,0,0,0
1,2020-01-04,Afghanistan,Eastern Mediterranean,0,0,0,0
2,2020-01-05,Afghanistan,Eastern Mediterranean,0,0,0,0
3,2020-01-06,Afghanistan,Eastern Mediterranean,0,0,0,0
4,2020-01-07,Afghanistan,Eastern Mediterranean,0,0,0,0
...,...,...,...,...,...,...,...
331795,2023-10-29,Zimbabwe,Africa,0,265821,0,5720
331796,2023-10-30,Zimbabwe,Africa,0,265821,0,5720
331797,2023-10-31,Zimbabwe,Africa,0,265821,0,5720
331798,2023-11-01,Zimbabwe,Africa,0,265821,0,5720


In [149]:
filter=df.country!="occupied Palestinian territory, including east Jerusalem"
df=df.loc[filter]
df

Unnamed: 0,date,country,region,new_cases,cumulative_cases,new_deaths,cumulative_deaths
0,2020-01-03,Afghanistan,Eastern Mediterranean,0,0,0,0
1,2020-01-04,Afghanistan,Eastern Mediterranean,0,0,0,0
2,2020-01-05,Afghanistan,Eastern Mediterranean,0,0,0,0
3,2020-01-06,Afghanistan,Eastern Mediterranean,0,0,0,0
4,2020-01-07,Afghanistan,Eastern Mediterranean,0,0,0,0
...,...,...,...,...,...,...,...
331795,2023-10-29,Zimbabwe,Africa,0,265821,0,5720
331796,2023-10-30,Zimbabwe,Africa,0,265821,0,5720
331797,2023-10-31,Zimbabwe,Africa,0,265821,0,5720
331798,2023-11-01,Zimbabwe,Africa,0,265821,0,5720


In [150]:
df["date"]=pd.to_datetime(df["date"]).dt.date
df

Unnamed: 0,date,country,region,new_cases,cumulative_cases,new_deaths,cumulative_deaths
0,2020-01-03,Afghanistan,Eastern Mediterranean,0,0,0,0
1,2020-01-04,Afghanistan,Eastern Mediterranean,0,0,0,0
2,2020-01-05,Afghanistan,Eastern Mediterranean,0,0,0,0
3,2020-01-06,Afghanistan,Eastern Mediterranean,0,0,0,0
4,2020-01-07,Afghanistan,Eastern Mediterranean,0,0,0,0
...,...,...,...,...,...,...,...
331795,2023-10-29,Zimbabwe,Africa,0,265821,0,5720
331796,2023-10-30,Zimbabwe,Africa,0,265821,0,5720
331797,2023-10-31,Zimbabwe,Africa,0,265821,0,5720
331798,2023-11-01,Zimbabwe,Africa,0,265821,0,5720


In [165]:
filter=df.cumulative_cases!=0
df=df.loc[filter]
df

Unnamed: 0,date,country,region,new_cases,cumulative_cases,new_deaths,cumulative_deaths
54,2020-02-26,Afghanistan,Eastern Mediterranean,1,1,0,0
55,2020-02-27,Afghanistan,Eastern Mediterranean,0,1,0,0
56,2020-02-28,Afghanistan,Eastern Mediterranean,0,1,0,0
57,2020-02-29,Afghanistan,Eastern Mediterranean,0,1,0,0
58,2020-03-01,Afghanistan,Eastern Mediterranean,0,1,0,0
...,...,...,...,...,...,...,...
331795,2023-10-29,Zimbabwe,Africa,0,265821,0,5720
331796,2023-10-30,Zimbabwe,Africa,0,265821,0,5720
331797,2023-10-31,Zimbabwe,Africa,0,265821,0,5720
331798,2023-11-01,Zimbabwe,Africa,0,265821,0,5720


In [166]:
df["country"]=df.country.str.replace("Russian Federation","Russia")
df

Unnamed: 0,date,country,region,new_cases,cumulative_cases,new_deaths,cumulative_deaths
54,2020-02-26,Afghanistan,Eastern Mediterranean,1,1,0,0
55,2020-02-27,Afghanistan,Eastern Mediterranean,0,1,0,0
56,2020-02-28,Afghanistan,Eastern Mediterranean,0,1,0,0
57,2020-02-29,Afghanistan,Eastern Mediterranean,0,1,0,0
58,2020-03-01,Afghanistan,Eastern Mediterranean,0,1,0,0
...,...,...,...,...,...,...,...
331795,2023-10-29,Zimbabwe,Africa,0,265821,0,5720
331796,2023-10-30,Zimbabwe,Africa,0,265821,0,5720
331797,2023-10-31,Zimbabwe,Africa,0,265821,0,5720
331798,2023-11-01,Zimbabwe,Africa,0,265821,0,5720


In [167]:
df.to_csv("covid2023.csv",index=False)

### 4. To Google Looker

### [Link](https://lookerstudio.google.com/reporting/261f150a-939b-4144-8f5f-335f581cf28d)