# COVID 2020-2021 Analysis 

____

**Changelog**:
- 31/12/2021: File created

____

## Table of Contents
- [Importing libraries](#Importing-libraries)
- [Load data](#Load-data)
- [Data Cleaning and Preparation](#Data-Cleaning-and-Preparation)
- [Data Exploration & Visualization](#Data-Exploration-&-Visualization)

____

## Importing libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

____

## Load data

source: https://ourworldindata.org/coronavirus-source-data, https://covid.ourworldindata.org/data/owid-covid-data.csv

In [None]:
df = pd.read_csv("../data/owid-covid-data.csv") #from 31/12/2021

### Check the dimensions 

In [None]:
df.head(5)

In [None]:
df.shape

## Data Cleaning and Preparation

#### Check for null values

In [None]:
df.isnull().values.any()

#### Replace Blank for Nulls

In [None]:
df = df.fillna('NULL')

In [None]:
df.isnull().values.any()

### Save prepared Data for MySQL

In [None]:
df.to_csv('../data/clean_owid_covid.csv', encoding='utf-8')

____

### Reload & update data from source

In [None]:
df = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")

## Data Cleaning and Preparation

### Check columns names

In [None]:
df.columns

In [None]:
df.shape

In [None]:
df.drop(['iso_code','new_cases_smoothed', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units',
       'people_vaccinated', 'total_boosters',
       'new_vaccinations', 'new_vaccinations_smoothed',
       'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred',
       'people_fully_vaccinated_per_hundred', 'total_boosters_per_hundred',
       'new_vaccinations_smoothed_per_million',
       'new_people_vaccinated_smoothed',
       'new_people_vaccinated_smoothed_per_hundred', 'stringency_index', 
       'population_density', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy', 'human_development_index',
       'excess_mortality_cumulative_absolute', 'excess_mortality_cumulative',
       'excess_mortality', 'excess_mortality_cumulative_per_million'], axis=1, inplace=True)

In [None]:
df.head()

In [None]:
df.shape

### Check for nulls

In [None]:
df.isnull().any()

### Replace Blank for zeros

In [None]:
df = df.fillna(0)

In [None]:
df.isnull().any()

### Check data types

In [None]:
df.dtypes

### Change data types

In [None]:
df['date'] = pd.to_datetime(df['date'])

## Data Exploration & Visualization

### Total cases

In [None]:
total_cases = df['total_cases'].max()
total_cases

### Total deaths

In [None]:
total_deaths = df['total_deaths'].max()
total_deaths

In [None]:
data_per = total_deaths/total_cases*100
data_per

In [None]:
plt.figure(figsize=(10,8))
plt.pie([total_cases-total_deaths, total_deaths], labels = ['alive', 'deaths'])
plt.title('total_deaths from total_cases')
circle = plt.Circle( (0,0), 0.7, color='white')
p=plt.gcf()
p.gca().add_artist(circle)
plt.show()

In [None]:
result = {'alive': [total_cases-total_deaths], 'deaths': [total_deaths]}
result = pd.DataFrame(result, index=None)
result

In [None]:
result.to_csv('../results/total_deaths_cases_owid_covid.csv', encoding='utf-8',index=None)

### Cases per day

In [None]:
cases_per_day = df.groupby('date').max()
cases_per_day = cases_per_day[['new_cases']].sort_values(by=['date'], ascending=True)
cases_per_day['7dayAvg'] = cases_per_day['new_cases'].rolling(7).mean()
cases_per_day

In [None]:
plt.figure(figsize=(10,8))
plt.plot(cases_per_day.index, cases_per_day['new_cases'], cases_per_day['7dayAvg'])
plt.title('New cases per day')
plt.xlabel('Month')
plt.ylabel('New Cases')
plt.legend(['New Cases', '7dayAvg'])
plt.show()

In [None]:
cases_per_day.to_csv('../results/new_cases_per_day_owid_covid.csv', encoding='utf-8')

### Total Vaccinations

In [None]:
total_vacc = df['total_vaccinations'].max()
total_vacc

### Fully Vaccinated

In [None]:
fully_vacc = df['people_fully_vaccinated'].max()
fully_vacc

In [None]:
max_pop = df['population'].max()
full_per = (fully_vacc/max_pop)*100
full_per

In [None]:
plt.figure(figsize=(10,8))
plt.pie([fully_vacc, max_pop-fully_vacc], labels = ['people_fully_vaccinated', 'not_or_not_fully_vaccinated'])
plt.title('% of people_fully_vaccinated')
circle = plt.Circle( (0,0), 0.7, color='white')
p=plt.gcf()
p.gca().add_artist(circle)
plt.show()

In [None]:
result = {'not_or_not_fully_vaccinated': [max_pop-fully_vacc], 'fully_vacc': [fully_vacc]}
result = pd.DataFrame(result, index=None)
result

In [None]:
result.to_csv('../results/total_fully_vacc_owid_covid.csv', encoding='utf-8',index=None)

### Top 5 Countries with most deaths

In [None]:
top_deaths = df.groupby('location').max()
top_deaths = top_deaths[['total_deaths']].sort_values(by=['total_deaths'], ascending=False)
top_deaths.head(15)

In [None]:
top_deaths = top_deaths.iloc[9:]
top_deaths = top_deaths.head(5)
top_deaths

In [None]:
plt.figure(figsize=(10,8))
plt.bar(top_deaths.index, top_deaths['total_deaths'])
plt.title('Top 5 countries')
plt.show()

In [None]:
top_deaths.to_csv('../results/top_countries_owid_covid.csv', encoding='utf-8')

## Deaths per day by country 

In [None]:
deaths_per_day = df[['date','location','total_deaths']]
deaths_per_day = deaths_per_day.pivot(index='location', columns='date', values='total_deaths')
deaths_per_day.tail(20)

In [None]:
deaths_per_day.to_csv('../results/deaths_perday_countries_owid_covid.csv', encoding='utf-8')