<a href="https://colab.research.google.com/github/aidasalova/COVID_analysis/blob/main/Coronavirus_Plot_Analysis_Weekly_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title STEP 1 - Import Libraries (Click on Run button)

#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
import seaborn as sns

print("Step completed successfully")

In [None]:
#@title STEP 2 - Import Weekly CSV Dataset from ECDC (Click on Run button)

pd.options.mode.chained_assignment = None  # default='warn'

def read_file(url, reps):
  try:
    df=pd.read_csv(url, parse_dates=[0], dayfirst=True)
    df['dateRep']=pd.to_datetime(df['dateRep'], format='%Y-%m-%d')
    df.sort_values(by='dateRep')
    print("Step completed successfully")
    return df
  except:
    if reps <= 3:
      read_file(url, reps+1)
    else:
      print("Cannot load the file, please try later")

df = read_file("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/data.csv", 1)

In [None]:
#@title STEP 3 - Enable country selection (Click on Run button)

country_list = df.countriesAndTerritories.unique().tolist()

country_input_1 = widgets.Dropdown(
    options=country_list,
    value=country_list[0],
    description='Country:',
    disabled=False,
)
country_input_2 = widgets.Dropdown(
    options=country_list,
    value=country_list[0],
    description='Country:',
    disabled=False,
)

print("Step completed successfully")

In [None]:
#@title STEP 4.1 - Select country 1 from the dropdown

country_input_1

In [None]:
#@title STEP 4.2 - Select country 2 from the dropdown

country_input_2

In [None]:
#@title STEP 5 - Define function to plot weekly COVID-19 cases for two selected countries (click on Run button)

def my_graph_comparison():
    #Subset dataset by chosen countries (create two subsets)
    countrydf_comparison_1 = df[df["countriesAndTerritories"] == country_input_1.value]
    countrydf_comparison_2 = df[df["countriesAndTerritories"] == country_input_2.value]
    #Set x and y for the first plot
    x_1 = countrydf_comparison_1["dateRep"]
    y_1 = countrydf_comparison_1["cases_weekly"]
    #Set x and y for the second plot
    x_2 = countrydf_comparison_2["dateRep"]
    y_2 = countrydf_comparison_2["cases_weekly"]
    #Create plot
    plt.figure(figsize=(20,10))
    plt.plot(x_1,y_1, label=country_input_1.value)
    plt.plot(x_2,y_2, label=country_input_2.value)
    plt.legend()
    plt.title(f"Weekly Covid-19 cases in {country_input_1.value} and {country_input_2.value}")   
    plt.show()

print("Step completed successfully")

In [None]:
#@title STEP 6 - Define function to plot weekly COVID-19 deaths for two selected countries (click on Run button)

def my_graph_weekly_deaths_comparison():
    #Subset dataset by chosen countries (create two new subsets)
    countrydf_comparison_1 = df[df["countriesAndTerritories"] == country_input_1.value]
    countrydf_comparison_2 = df[df["countriesAndTerritories"] == country_input_2.value]
    #Set x and y for the first plot
    x_1 = countrydf_comparison_1["dateRep"]
    y_1 = countrydf_comparison_1["deaths_weekly"]
    #Set x and y for the second plot
    x_2 = countrydf_comparison_2["dateRep"]
    y_2 = countrydf_comparison_2["deaths_weekly"]
    #Create plot
    plt.figure(figsize=(20,10))
    plt.plot(x_1,y_1, label=country_input_1.value)
    plt.plot(x_2,y_2, label=country_input_2.value)
    plt.legend()
    plt.title(f"Weekly Covid-19 deaths in {country_input_1.value} and {country_input_2.value}")   
    plt.show()

print("Step completed successfully")

In [None]:
#@title STEP 7 - Define function to plot weekly COVID-19 cases and deaths for two selected countries (click on Run button)

def cases_deaths_comparison():
    #Subset dataset by chosen countries (create two subsets)
    countrydf_comparison_1 = df[df["countriesAndTerritories"] == country_input_1.value]
    countrydf_comparison_2 = df[df["countriesAndTerritories"] == country_input_2.value]
    #Set x and y for the first cases plot
    x_1 = countrydf_comparison_1["dateRep"]
    y_1 = countrydf_comparison_1["cases_weekly"]
    #Set x and y for the second cases plot
    x_2 = countrydf_comparison_2["dateRep"]
    y_2 = countrydf_comparison_2["cases_weekly"]
    #Set x and y for the first deaths plot
    x_3 = countrydf_comparison_1["dateRep"]
    y_3 = countrydf_comparison_1["deaths_weekly"]
    #Set x and y for the second deaths plot
    x_4 = countrydf_comparison_2["dateRep"]
    y_4 = countrydf_comparison_2["deaths_weekly"]
    #Create plot
    plt.figure(figsize=(20,10))
    plt.plot(x_1,y_1, label=(f"Weekly cases in {country_input_1.value}"))
    plt.plot(x_2,y_2, label=(f"Weekly cases in {country_input_2.value}"))
    plt.plot(x_3, y_3, label=(f"Weekly deaths in {country_input_1.value}"))
    plt.plot(x_4, y_4, label=(f"Weekly deaths in {country_input_2.value}"))
    plt.yscale("log")
    plt.legend()
    plt.title(f"Weekly Covid-19 cases and deaths in {country_input_1.value} and {country_input_2.value}")   
    plt.show()

print("Step completed successfully")

In [None]:
#@title STEP 8 - Define function to calculate countries' percentile by the weekly amount of COVID-19 cases and deaths (click on Run button)

def percentil_absolute_comp_2():
    
    #Select the latest number weekly cases and deaths and calculate percentile
    df_latest = df[df["dateRep"] == df["dateRep"].max()]
    df_latest["pct_rank_cases"] = df_latest["cases_weekly"].rank(pct=True)
    df_latest["pct_rank_deaths"] = df_latest["deaths_weekly"].rank(pct=True)
    #Subset for the selected countries
    country_percentil_df_1 = df_latest[df_latest["countriesAndTerritories"] == country_input_1.value].reset_index()
    country_percentil_df_2 = df_latest[df_latest["countriesAndTerritories"] == country_input_2.value].reset_index()
    #Round the percentiles for cases and put in %
    country_percentil_cases_df_pct_1 = round(country_percentil_df_1.iloc[0, -2] * 100, 2)
    country_percentil_cases_df_pct_2 = round(country_percentil_df_2.iloc[0, -2] * 100, 2)
    #Subset the absolute number of cases
    country_percentil_cases_df_absolute_1 = country_percentil_df_1.iloc[0, 3]
    country_percentil_cases_df_absolute_2 = country_percentil_df_2.iloc[0, 3]
    #Round the percentiles for deaths and put in %
    country_percentil_deaths_df_pct_1 = round(country_percentil_df_1.iloc[0, -1] * 100, 2)
    country_percentil_deaths_df_pct_2 = round(country_percentil_df_2.iloc[0, -1] * 100, 2)
    #Subset the absolute number of deaths
    country_percentil_deaths_df_absolute_1 = country_percentil_df_1.iloc[0, 4]
    country_percentil_deaths_df_absolute_2 = country_percentil_df_2.iloc[0, 4]
    
    #Print the results
    print(f"{country_input_1.value} is at percentile {country_percentil_cases_df_pct_1} in the world by the latest weekly amount of Covid-19 cases with {country_percentil_cases_df_absolute_1} weekly cases")
    print(f"{country_input_2.value} is at percentile {country_percentil_cases_df_pct_2} in the world by the latest weekly amount of Covid-19 cases with {country_percentil_cases_df_absolute_2} weekly cases")
    print(f"{country_input_1.value} is at percentile {country_percentil_deaths_df_pct_1} in the world by the latest weekly amount of Covid-19 deaths with {country_percentil_deaths_df_absolute_1} weekly deaths")
    print(f"{country_input_2.value} is at percentile {country_percentil_deaths_df_pct_2} in the world by the latest weekly amount of Covid-19 deaths with {country_percentil_deaths_df_absolute_2} weekly deaths")

print("Step completed successfully")

In [None]:
#@title STEP 9 - Define function to boxplot the latest global cases and deaths by continent

def my_boxplot():
    df_latest = df[df["dateRep"] == df["dateRep"].max()]
    fig1 = sns.boxplot(x="continentExp", y="cases_weekly", data=df_latest)
    fig1.set_yscale("log")
    fig1.set_title("Weekly cases globally")
    fig1.set(xlabel='Continent', ylabel='Weekly Cases')
    plt.show()
    fig2 = sns.boxplot(x="continentExp", y="deaths_weekly", data=df_latest)
    fig2.set_yscale("log")
    fig2.set_title("Weekly deaths globally")
    fig2.set(xlabel='Continent', ylabel='Weekly Deaths')
    plt.show()

print("Step completed successfully")

In [None]:
#@title STEP 10 - Visualize all results from the above functions (click on Run and view the output below)

my_graph_comparison()
my_graph_weekly_deaths_comparison()
cases_deaths_comparison()
percentil_absolute_comp_2()
my_boxplot()