## Importing Packages

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import altair as alt
import zipfile

## Data Exploration

In [13]:
with zipfile.ZipFile("owid-covid-data.zip") as z:
    with z.open("owid-covid-data.json") as f:
        covid_data = pd.read_json(f)

In [15]:
covid_data = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")
covid_data.head(5)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-02-24,5.0,5.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
1,AFG,Asia,Afghanistan,2020-02-25,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
2,AFG,Asia,Afghanistan,2020-02-26,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
3,AFG,Asia,Afghanistan,2020-02-27,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
4,AFG,Asia,Afghanistan,2020-02-28,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,


In [16]:
covid_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 127314 entries, 0 to 127313
Data columns (total 65 columns):
 #   Column                                   Non-Null Count   Dtype  
---  ------                                   --------------   -----  
 0   iso_code                                 127314 non-null  object 
 1   continent                                121556 non-null  object 
 2   location                                 127314 non-null  object 
 3   date                                     127314 non-null  object 
 4   total_cases                              120366 non-null  float64
 5   new_cases                                120362 non-null  float64
 6   new_cases_smoothed                       119348 non-null  float64
 7   total_deaths                             109388 non-null  float64
 8   new_deaths                               109541 non-null  float64
 9   new_deaths_smoothed                      119348 non-null  float64
 10  total_cases_per_million         

In [17]:
# Max of total_cases column
covid_data.total_cases.max()

246407250.0

In [18]:
# Max of positive_rate
covid_data.positive_rate.max()

0.97

In [19]:
# Show the data of havıng max total cases row
covid_data.loc[ covid_data.total_cases.idxmax() ]

iso_code                                      OWID_WRL
continent                                          NaN
location                                         World
date                                        2021-10-30
total_cases                                246407250.0
                                              ...     
human_development_index                          0.737
excess_mortality_cumulative_absolute               NaN
excess_mortality_cumulative                        NaN
excess_mortality                                   NaN
excess_mortality_cumulative_per_million            NaN
Name: 125562, Length: 65, dtype: object

In [20]:
# Calculate mean of total_case, new_cases,new_deaths
covid_data[['total_cases', 'new_cases', 'new_deaths']].mean(axis=0)

total_cases    1.536968e+06
new_cases      6.463036e+03
new_deaths     1.436271e+02
dtype: float64

## Data Visualization

In [22]:
data = covid_data.groupby(by=['continent']).sum().reset_index()
data

Unnamed: 0,continent,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,Africa,1966430000.0,8496089.0,8482790.0,50205783.0,218099.0,217558.395,162135000.0,840318.055,838235.281,...,55178.6,587102.2,663271.457,35870.54,2065248.0,17678.583,8575547.2,950.5,2568.62,110829.7
1,Asia,16702230000.0,78453109.0,78146590.0,252232564.0,1174690.0,1169346.753,466910200.0,2171831.619,2156416.476,...,112438.2,930290.2,1104017.288,78487.57,2198945.0,21143.162,9576888.6,4204.22,8569.98,253417.9
2,Europe,15579330000.0,65138548.0,64422090.0,370647073.0,1304566.0,1294693.651,1132716000.0,4674464.641,4616689.164,...,578521.7,878049.8,171047.325,136973.811,2283448.0,22848.604,44100410.0,14981.04,32276.13,2111611.0
3,North America,14034750000.0,55222989.0,54968460.0,328086690.0,1121054.0,1114974.588,217501300.0,1215543.898,1204592.989,...,55960.5,163499.9,609539.863,32062.6,1247978.0,10515.356,55941161.2,4677.36,8566.26,343908.3
4,Oceania,29429080.0,258286.0,252284.5,542889.0,2805.0,2738.057,6882027.0,69471.996,68937.684,...,59603.3,122812.8,53091.691,8943.67,443156.4,3395.732,-254917.6,-457.67,-330.86,-31967.24
5,South America,9939450000.0,38211595.0,38150200.0,315432743.0,1157308.0,1155910.255,201559200.0,859708.579,857492.229,...,61147.7,121186.2,238341.648,15405.7,559170.1,5632.033,22799657.5,14410.19,20118.95,673321.8


In [23]:
alt.Chart(data).mark_bar().encode(
x= 'continent',
y='new_cases')

In [24]:
# Task: Change x and y axes
alt.Chart(data).mark_bar().encode(
y= 'continent',
x='new_cases')

In [25]:
alt.Chart(data).mark_bar().encode(
x= 'continent',
y='new_cases',
tooltip =['new_cases_per_million','new_deaths']).properties(
    width = 200,
    height= 200
    
)