# Exploratory Data Analysis of Latest COVID-19 OWID Data

In [1]:
import pandas as pd
import urllib.request as rq
import json

In [2]:
COVID_LATEST_DATA_URL = "https://covid.ourworldindata.org/data/latest/owid-covid-latest.json"
COVID_WHOLE_DATA_URL = "https://covid.ourworldindata.org/data/owid-covid-data.json"


def get_covid_df(url:str) -> pd.DataFrame:
    """Helper function for data extraction tasks"""
    with rq.urlopen(url) as url:
        covid_data = json.loads(url.read().decode())
        covid_df = pd.DataFrame(covid_data)
        return covid_df

def filter_data(covid_df:pd.DataFrame, country:str) -> pd.DataFrame:
    """Filter for the given country"""
    filtered_df = covid_df[country]
    return filtered_df

def extract_full_country_data(data:pd.DataFrame) -> pd.DataFrame:
    """Creates a dataframe from the data dictionary nested in the covid data"""
    df = pd.DataFrame.from_dict(data.data)
    return df

In [3]:
covid_df = get_covid_df(COVID_LATEST_DATA_URL)
filtered_df = filter_data(covid_df, "USA")
# full_df = extract_full_country_data(filtered_df)

In [4]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set()
%matplotlib inline

In [5]:
countries_df = covid_df.T
countries_df = countries_df.convert_dtypes()
countries_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, AFG to ZWE
Data columns (total 66 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   continent                                   222 non-null    string 
 1   location                                    235 non-null    string 
 2   last_updated_date                           235 non-null    string 
 3   total_cases                                 229 non-null    Int64  
 4   new_cases                                   229 non-null    Int64  
 5   new_cases_smoothed                          229 non-null    Float64
 6   total_deaths                                223 non-null    Int64  
 7   new_deaths                                  223 non-null    Int64  
 8   new_deaths_smoothed                         223 non-null    Float64
 9   total_cases_per_million                     228 non-null    Float64
 10  new_cases_per_mil

In [25]:
corr = countries_df.select_dtypes(include=np.number).corr()
corr['new_cases'].sort_values(ascending=False).head(10)
# sns.heatmap(test)

new_cases                  1.000000
new_deaths                 0.963412
new_cases_smoothed         0.943911
new_deaths_smoothed        0.925382
total_cases                0.907981
total_deaths               0.865402
total_boosters             0.819575
weekly_icu_admissions      0.807323
total_vaccinations         0.791586
people_fully_vaccinated    0.782795
Name: new_cases, dtype: float64

In [7]:
countries_df['total_deaths']

AFG            7709
OWID_AFR     254437
ALB            3497
DZA            6875
AND             153
             ...   
WLF               7
OWID_WRL    6308976
YEM            2149
ZMB            3989
ZWE            5518
Name: total_deaths, Length: 235, dtype: Int64