# Covid-19 Project: Data analysis and visualization

Previous steps see: Covid-19Project_DataPreparation1, Covid-19Project_DataPreparation2, Covid-19Project_DataPreparation2A_PopulationWebScrap

Raw data Source: COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University <br>
https://github.com/CSSEGISandData/COVID-19

In [49]:
# Import packages

import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

import folium
from folium import plugins

import ipywidgets as widgets
from ipywidgets import interact

from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)

In [2]:
# Import datasets (prepared)

# -> Data preparation in other files: Covid-19 Project: Data Preparation Part 1 / Part 2 / Population Web Scrap

# Cumulative 
confirmed_global_cleaned = pd.read_csv("C:/Users/Agnieszka/Downloads/Datasets/Covid_temp/confirmed_global_cleaned.csv")
deaths_global_cleaned = pd.read_csv("C:/Users/Agnieszka/Downloads/Datasets/Covid_temp/deaths_global_cleaned.csv")
recovered_global_cleaned = pd.read_csv("C:/Users/Agnieszka/Downloads/Datasets/Covid_temp/recovered_global_cleaned.csv")

# Cases per day
confirmed_global_cpr = pd.read_csv("C:/Users/Agnieszka/Downloads/Datasets/Covid_temp/confirmed_global_cpr.csv")
deaths_global_cpr = pd.read_csv("C:/Users/Agnieszka/Downloads/Datasets/Covid_temp/deaths_global_cpr.csv")
recovered_global_cpr = pd.read_csv("C:/Users/Agnieszka/Downloads/Datasets/Covid_temp/recovered_global_cpr.csv")

# Incidence
confirmed_global_incidence = pd.read_csv("C:/Users/Agnieszka/Downloads/Datasets/Covid_temp/confirmed_global_incidence.csv")

In [3]:
# Some small adjustements for "_cleaned" datasets

def preparation1 (df):

    df.set_index("Country/Region", inplace=True)
    df.index.rename("Country", inplace=True)
    
    return df

In [4]:
confirmed_global = preparation1(confirmed_global_cleaned)
confirmed_global.tail(5)

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,1/29/21,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Wallis and Futuna,0,0,0,0,0,0,0,0,0,0,...,5,5,5,5,5,5,5,5,5,5
West Bank and Gaza,0,0,0,0,0,0,0,0,0,0,...,158168,158559,158962,159443,159956,160426,161087,161559,162029,162601
Yemen,0,0,0,0,0,0,0,0,0,0,...,2120,2120,2121,2122,2122,2122,2122,2124,2127,2127
Zambia,0,0,0,0,0,0,0,0,0,0,...,51624,53352,54217,55042,56233,57489,59003,60427,61427,62633
Zimbabwe,0,0,0,0,0,0,0,0,0,0,...,32952,33273,33388,33548,33814,33964,34171,34331,34487,34552


In [5]:
deaths_global = preparation1(deaths_global_cleaned)
deaths_global.head()

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,1/29/21,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,2399,2400,2400,2404,2405,2406,2407,2407,2408,2410
Albania,0,0,0,0,0,0,0,0,0,0,...,1358,1369,1380,1393,1398,1404,1418,1433,1446,1460
Algeria,0,0,0,0,0,0,0,0,0,0,...,2884,2888,2891,2894,2898,2900,2904,2909,2909,2914
Andorra,0,0,0,0,0,0,0,0,0,0,...,100,101,101,101,102,103,103,104,105,106
Angola,0,0,0,0,0,0,0,0,0,0,...,464,464,466,466,468,470,473,474,475,475


In [6]:
recovered_global = preparation1(recovered_global_cleaned)
recovered_global.head()

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,1/29/21,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,47606,47609,47679,47723,47798,47982,47995,48018,48027,48041
Albania,0,0,0,0,0,0,0,0,0,0,...,46420,46927,47424,47922,48377,48958,49543,50219,50875,51444
Algeria,0,0,0,0,0,0,0,0,0,0,...,72956,73152,73344,73530,73732,73940,74136,74330,74330,74692
Andorra,0,0,0,0,0,0,0,0,0,0,...,8986,9045,9093,9206,9252,9313,9395,9444,9492,9533
Angola,0,0,0,0,0,0,0,0,0,0,...,17918,18015,18035,18180,18232,18335,18381,18431,18475,18540


In [7]:
# Some small adjustements for "_cpr" datasets

def preparation2 (df_cpr):
    
    df_cpr.set_index("Unnamed: 0", inplace=True)
    df_cpr.index.name = None
    df_cpr_trans = df_cpr.transpose()
    df_cpr_trans.index.rename("Country", inplace=True)
    df_cpr_trans.sort_index(inplace=True)
    #df.drop(columns=["total"], inplace=True)
    return df_cpr_trans

In [8]:
confirmed_global_cpr2 = preparation2(confirmed_global_cpr)
confirmed_global_cpr2.head()

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,1/29/21,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,48,69,15,36,62,53,57,34,65,5
Albania,0,0,0,0,0,0,0,0,0,0,...,896,901,876,865,942,1007,1052,1089,1130,1124
Algeria,0,0,0,0,0,0,0,0,0,0,...,277,235,217,239,263,275,265,248,0,459
Andorra,0,0,0,0,0,0,0,0,0,0,...,58,48,52,35,45,53,67,35,34,45
Angola,0,0,0,0,0,0,0,0,0,0,...,51,59,14,33,71,37,59,34,32,24


In [9]:
deaths_global_cpr2 = preparation2(deaths_global_cpr)
deaths_global_cpr2.head()

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,1/29/21,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,2,1,0,4,1,1,1,0,1,2
Albania,0,0,0,0,0,0,0,0,0,0,...,8,11,11,13,5,6,14,15,13,14
Algeria,0,0,0,0,0,0,0,0,0,0,...,3,4,3,3,4,2,4,5,0,5
Andorra,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,1,1,1
Angola,0,0,0,0,0,0,0,0,0,0,...,0,0,2,0,2,2,3,1,1,0


In [10]:
recovered_global_cpr2 = preparation2(recovered_global_cpr)
recovered_global_cpr2.head()

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,1/29/21,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,23,3,70,44,75,184,13,23,9,14
Albania,0,0,0,0,0,0,0,0,0,0,...,514,507,497,498,455,581,585,676,656,569
Algeria,0,0,0,0,0,0,0,0,0,0,...,211,196,192,186,202,208,196,194,0,362
Andorra,0,0,0,0,0,0,0,0,0,0,...,44,59,48,113,46,61,82,49,48,41
Angola,0,0,0,0,0,0,0,0,0,0,...,226,97,20,145,52,103,46,50,44,65


In [11]:
# Some adjustments for the "_incidence" dataset

confirmed_global_incidence.rename(columns={"Unnamed: 0" :"Country"}, inplace=True)
confirmed_global_incidence.set_index("Country", inplace=True)
confirmed_global_incidence.head()

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21,div100,sum7days,incidence
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,36,62,53,57,34,65,5,389.28,312,0.8
Albania,0,0,0,0,0,0,0,0,0,0,...,865,942,1007,1052,1089,1130,1124,28.78,7209,250.49
Algeria,0,0,0,0,0,0,0,0,0,0,...,239,263,275,265,248,0,459,438.51,1749,3.99
Andorra,0,0,0,0,0,0,0,0,0,0,...,35,45,53,67,35,34,45,0.77,314,407.79
Angola,0,0,0,0,0,0,0,0,0,0,...,33,71,37,59,34,32,24,328.66,290,0.88


In [12]:
# All datasets:

# 1. Cumulative sum of cases:

# confirmed_global
# deaths_global
# recovered_global

# 2. Cases per day

# confirmed_global_cpr2
# deaths_global_cpr2
# recovered_global_cpr2

# 3. Incidence

# confirmed_global_incidence

<b> Choropleth worldwide map for total number of cases </b>

1) With Folium

In [12]:
# Json file with country borders/polygons is necessary to create a choropleth map with Folium 

# Read json file

# Data source: https://www.kaggle.com/ktochylin/world-countries?select=world-countries.json
countries_json = r"C:/Users/Agnieszka/Downloads/Datasets/World_countries/world-countries.json"

# Other possibility to read json file:

# import json
# file = "C:/Users/Agnieszka/Downloads/Datasets/World_countries/world-countries.json"
# countries_json = json.load(open(file, 'r'))
# countries_json["features"][1]

# Other available json files:
# https://github.com/python-visualization/folium/blob/master/examples/data/world-countries.json
# https://datahub.io/core/geo-countries ("C:/Users/Agnieszka/Downloads/Datasets/World_countries/countries.json")

In [13]:
# Prepare dataframe with confirmed cases ("confirmed_global")

# Calculate / create column with total number of cases (total = last date from the dataset)
confirmed_global["total"] = confirmed_global.iloc[:,-1]

# Country as a column not index
confirmed_global.reset_index(inplace=True)
confirmed_global.tail()

Unnamed: 0,Country,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21,total
214,Wallis and Futuna,0,0,0,0,0,0,0,0,0,...,5,5,5,5,5,5,5,5,5,5
215,West Bank and Gaza,0,0,0,0,0,0,0,0,0,...,158559,158962,159443,159956,160426,161087,161559,162029,162601,162601
216,Yemen,0,0,0,0,0,0,0,0,0,...,2120,2121,2122,2122,2122,2122,2124,2127,2127,2127
217,Zambia,0,0,0,0,0,0,0,0,0,...,53352,54217,55042,56233,57489,59003,60427,61427,62633,62633
218,Zimbabwe,0,0,0,0,0,0,0,0,0,...,33273,33388,33548,33814,33964,34171,34331,34487,34552,34552


Create map

In [16]:
# Folium:
# https://python-visualization.github.io/folium/modules.html
# https://python-visualization.github.io/folium/quickstart.html
# https://leafletjs.com/reference-1.6.0.html#map

In [14]:
map_total_confirmed = folium.Map(location=[0, 0], zoom_start=2)

folium.Choropleth(
    geo_data=countries_json,
    name="choropleth",
    data=confirmed_global,
    columns=["Country", "total"],
    key_on="feature.properties.name", # from json file -> country name
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Total number of cases",
).add_to(map_total_confirmed)

map_total_confirmed

Some countries are missing (black fill) -> again problem with unmatching country names

In [15]:
# Modify country names in the dataframe so that they match the names in the json file

# Check country names in json file
# after: https://towardsdatascience.com/choropleth-maps-with-folium-1a5b8bcdd392

# open the json file - json.load() methods returns a python dictionary
import json
with open(countries_json) as communities_file:
    countries_json_test = json.load(communities_file)
    
# loop through the dictionary to obtain the name of the communities in the json file
names_json = []
for index in range(len(countries_json_test['features'])):
    names_json.append(countries_json_test['features'][index]['properties']['name'])
    
names_json

['Afghanistan',
 'Angola',
 'Albania',
 'United Arab Emirates',
 'Argentina',
 'Armenia',
 'Antarctica',
 'French Southern and Antarctic Lands',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Burundi',
 'Belgium',
 'Benin',
 'Burkina Faso',
 'Bangladesh',
 'Bulgaria',
 'The Bahamas',
 'Bosnia and Herzegovina',
 'Belarus',
 'Belize',
 'Bolivia',
 'Brazil',
 'Brunei',
 'Bhutan',
 'Botswana',
 'Central African Republic',
 'Canada',
 'Switzerland',
 'Chile',
 'China',
 'Ivory Coast',
 'Cameroon',
 'Democratic Republic of the Congo',
 'Republic of the Congo',
 'Colombia',
 'Costa Rica',
 'Cuba',
 'Northern Cyprus',
 'Cyprus',
 'Czech Republic',
 'Germany',
 'Djibouti',
 'Denmark',
 'Dominican Republic',
 'Algeria',
 'Ecuador',
 'Egypt',
 'Eritrea',
 'Spain',
 'Estonia',
 'Ethiopia',
 'Finland',
 'Fiji',
 'Falkland Islands',
 'France',
 'Gabon',
 'United Kingdom',
 'Georgia',
 'Ghana',
 'Guinea',
 'Gambia',
 'Guinea Bissau',
 'Equatorial Guinea',
 'Greece',
 'Greenland',
 'Guatemala',
 'Guyana',

In [16]:
# Load country names from Covid dataset
confirmed_global.set_index("Country", inplace=True)
confirmed_global.index.values

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire, Sint Eustatius and Saba',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burma', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Cayman Islands', 'Central African Republic', 'Chad',
       'Channel Islands', 'Chile', 'China', 'Colombia', 'Comoros',
       'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao', 'Cyprus', 'Czechia',
       'Denmark', 'Diamond Princess', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 

There is no data in the Covid-19 dataset available for: North Korea, Northern Cyprus, Puerto Rico (cases from PR are included in the data from the USA), Somaliland, Turkmenistan & Western Sahar <br>
Other countries will be renamed in the Covid-19 dataset

In [17]:
# Prepare dataset for creating a choropleth map

# Function for replacing country names

# df = confirmed_global

def prepare_for_choro (df):

    # Replace country names
    df_choro = df.rename(index={"Bahamas":"The Bahamas",
                                    "Burma": "Myanmar",
                                    "Congo (Brazzaville)": "Republic of the Congo",
                                    "Congo (Kinshasa)": "Democratic Republic of the Congo",
                                    "Cote d'Ivoire": "Ivory Coast",
                                    "Czechia": "Czech Republic",
                                    "Eswatini" : "Swaziland", 
                                    "Falkland Islands (Malvinas)": "Falkland Islands",
                                    "Guinea-Bissau": "Guinea Bissau",
                                    "Korea, South": "South Korea",
                                    "North Macedonia": "Macedonia",
                                    "Serbia": "Republic of Serbia",
                                    "Taiwan*": "Taiwan",
                                    "Tanzania": "United Republic of Tanzania",
                                    "Timor-Leste": "East Timor",
                                    "United States": "United States of America",
                                    "West Bank and Gaza": "West Bank",
                                     })
    # Create column with total number of cases
    df_choro["total"] = df_choro.iloc[:,-1]
    
    # Reset index 
    df_choro.reset_index(inplace=True)
    
    return df_choro

In [18]:
confirmed_global_choro = prepare_for_choro(confirmed_global)
confirmed_global_choro.tail()


Unnamed: 0,Country,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21,total
214,Wallis and Futuna,0,0,0,0,0,0,0,0,0,...,5,5,5,5,5,5,5,5,5,5
215,West Bank,0,0,0,0,0,0,0,0,0,...,158559,158962,159443,159956,160426,161087,161559,162029,162601,162601
216,Yemen,0,0,0,0,0,0,0,0,0,...,2120,2121,2122,2122,2122,2122,2124,2127,2127,2127
217,Zambia,0,0,0,0,0,0,0,0,0,...,53352,54217,55042,56233,57489,59003,60427,61427,62633,62633
218,Zimbabwe,0,0,0,0,0,0,0,0,0,...,33273,33388,33548,33814,33964,34171,34331,34487,34552,34552


In [19]:
map_total_confirmed2 = folium.Map(location=[0, 0], zoom_start=2)

choropleth = folium.Choropleth(
                geo_data=countries_json,
                name="choropleth",
                data=confirmed_global_choro,
                columns=["Country", "total"],
                key_on="feature.properties.name",
                fill_color="YlOrRd",
                fill_opacity=0.7,
                line_opacity=0.2,
                legend_name="Cumulative number of Covid-19 cases",
            ).add_to(map_total_confirmed2)

# add labels indicating the country name
style_function = "font-size: 10px; font-weight: bold"
choropleth.geojson.add_child(
   folium.features.GeoJsonTooltip(['name'], style=style_function, labels=False))

map_total_confirmed2

This map does not provide satisfactory results -> Difference between country cannot be seen due to significantly higher number of cases in the <b>US </b>, <b> Brazil </b> and <b>India </b>; These three countries could be excluded from the dataset to better see the differences between other countries

2) With Plotly

In [30]:
# Choropleth map can be also created using plotly.graph_objects()
# json file is not necessary, but ISO-3 country codes must be in the data frame

# https://plotly.com/python/choropleth-maps/
# https://plotly.github.io/plotly.py-docs/generated/plotly.graph_objects.Choropleth.html

#  We can extract the ISO-3 country codes from the json file ("id") and add to the dataframe

In [20]:
# Parsing json files: https://www.geeksforgeeks.org/pandas-parsing-json-dataset/

import json
from pandas.io.json import json_normalize 

with open('C:/Users/Agnieszka/Downloads/Datasets/World_countries/world-countries.json') as f:
     data = json.load(f)

test = pd.json_normalize(data['features'])
test.head()

Unnamed: 0,type,id,properties.name,geometry.type,geometry.coordinates
0,Feature,AFG,Afghanistan,Polygon,"[[[61.210817, 35.650072], [62.230651, 35.27066..."
1,Feature,AGO,Angola,MultiPolygon,"[[[[16.326528, -5.87747], [16.57318, -6.622645..."
2,Feature,ALB,Albania,Polygon,"[[[20.590247, 41.855404], [20.463175, 41.51508..."
3,Feature,ARE,United Arab Emirates,Polygon,"[[[51.579519, 24.245497], [51.757441, 24.29407..."
4,Feature,ARG,Argentina,MultiPolygon,"[[[[-65.5, -55.2], [-66.45, -55.25], [-66.9599..."


In [21]:
# Extract id (ISO code) and properties.name (country name) from json file

ISO = test[["id","properties.name"]]
#ISO.head()
ISO.rename(columns={"properties.name": "Country"}, inplace=True)

ISO.set_index("Country", inplace=True)
ISO_3 = ISO.sort_index()

# Check if each country has an id
ISO_3["id"].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


-99    4
LSO    1
GEO    1
NZL    1
SDS    1
      ..
JOR    1
NGA    1
HRV    1
SDN    1
ARM    1
Name: id, Length: 174, dtype: int64

In [22]:
# for 4 countries ISO codes are missing (value -99)

mask = ISO_3["id"] == "-99"
ISO_3[mask]

Unnamed: 0_level_0,id
Country,Unnamed: 1_level_1
Kosovo,-99
Northern Cyprus,-99
Somaliland,-99
Western Sahara,-99


In [23]:
# Kosovo ISO-3 country code: XKX
# Northern Cyprus: -
# Somaliland: - 
# Western Sahara: ESH

ISO_3["id"]["Kosovo"] =  "XKX"
ISO_3["id"]["Western Sahara"] =  "ESH"

mask = ISO_3["id"] == "-99"
ISO_3[mask]

Unnamed: 0_level_0,id
Country,Unnamed: 1_level_1
Northern Cyprus,-99
Somaliland,-99


In [24]:
# Merge "confirmed_global_choro" dataset with ISO_3 codes

confirmed_global_choro_ISO = confirmed_global_choro.set_index("Country")

confirmed_choro_ISO = pd.concat([ISO_3, confirmed_global_choro_ISO["total"]], axis= 1, join="inner")
confirmed_choro_ISO.reset_index(inplace=True)

# Country code of South Sudan has to be changed to SSD (in json file - SDS)

confirmed_choro_ISO["id"][137] =  "SSD"
confirmed_choro_ISO.tail()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  confirmed_choro_ISO["id"][137] =  "SSD"


Unnamed: 0,Country,id,total
164,Vietnam,VNM,2001
165,West Bank,PSE,162601
166,Yemen,YEM,2127
167,Zambia,ZMB,62633
168,Zimbabwe,ZWE,34552


In [50]:
fig = go.Figure(data=go.Choropleth(
    locations = confirmed_choro_ISO["id"],
    z = confirmed_choro_ISO["total"],
    text = confirmed_choro_ISO["Country"], 
    colorscale = "YlOrRd",
    #Greys,YlGnBu,Greens,YlOrRd,Bluered,RdBu,Reds,Blues,Picnic,
    #Rainbow,Portland,Jet,H ot,Blackbody,Earth,Electric,Viridis,Cividis
    autocolorscale=False,
    reversescale=False,
    marker_line_color="darkgray",
    marker_line_width=0.5,
    colorbar_title = "Total cases",
    # Hover text: Country + total number of cases
    hovertemplate = 
    "<b>%{text}</b><br><br>" +
    "Total cases: %{z:,.0f}<br><extra></extra>",
))
# <extra></extra> to get rid of "Trace 0" in the second box

fig.update_layout(
    title_text="Total cases worldwide" + " (" + (confirmed_global_choro.columns[-2]) + ")",
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type="equirectangular"
    ))


# fig.show()

This choropleth map is a little bit better, but the differences between countries are still to high <br>
However, it can be seen that the <b> highest </b> number of cases (except USA, Brazil and India) are registered in Argentina, Colombia, Peru, Mexico, Russia, Southern Europe, France, Germany, Poland, UK, Turkey, Russia and South Africa (> 1,5M) <br>
In African and Asian countries as well as in Canada, Northern Europe, Paraguay, Bolivia Uruguay and Australia, the number of confirmed cases is relatively <b> low </b>. The question is, how many people has been tested in those countries and if those numbers reflect the real situation, especially in the developing countries...

<b> Worldwide map with incidence rates </b>

In [26]:
# Load and prepare dataset with cooridnates

coord_incid = pd.read_csv("C:/Users/Agnieszka/Downloads/Datasets/Covid_temp/coordinates_for_incidence.csv")
coord_incid.rename(columns={"Unnamed: 0": "Country"}, inplace=True)
coord_incid.set_index("Country", inplace=True)
coord_incid.head()

Unnamed: 0_level_0,Lat,Long
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,33.93911,67.709953
Albania,41.1533,20.1683
Algeria,28.0339,1.6596
Andorra,42.5063,1.5218
Angola,-11.2027,17.8739


In [27]:
confirmed_global_incidence.rename(columns={"Unnamed: 0" :"Country"}, inplace=True)
confirmed_global_incidence.reset_index(inplace=True)
confirmed_global_incidence.tail()

Unnamed: 0,Country,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21,2/6/21,2/7/21,div100,sum7days,incidence
211,Wallis and Futuna,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.11,0,0.0
212,West Bank and Gaza,0,0,0,0,0,0,0,0,0,...,481,513,470,661,472,470,572,51.01,3639,71.34
213,Yemen,0,0,0,0,0,0,0,0,0,...,1,0,0,0,2,3,0,298.26,6,0.02
214,Zambia,0,0,0,0,0,0,0,0,0,...,825,1191,1256,1514,1424,1000,1206,183.84,8416,45.78
215,Zimbabwe,0,0,0,0,0,0,0,0,0,...,160,266,150,207,160,156,65,148.63,1164,7.83


In [28]:
map_incidence = folium.Map(location=[0, 0], zoom_start=4, tiles="Stamenterrain")
   
# Color of the circle marker depends on the 7-days incidence rate
    
for lat, lon, value, name in zip(coord_incid["Lat"], coord_incid["Long"], confirmed_global_incidence["incidence"], confirmed_global_incidence["Country"]):

    #if value == 0.00:
         #confirmed_global_incidence["incidence"].replace(value, 0.00001, inplace=True)
    
    if value == 0.00001:
         confirmed_global_incidence["incidence"].replace(value, 0.00, inplace=True)
    
    # High risk countries (7-days incidence rate > 200) -> special regulations by coming back / arriving to Germany (test already before coming back + 10 days quarantine)
    if value >= 200:
        folium.CircleMarker([lat, lon], radius=value*0.02, tooltip = ("<strong> Country </strong>: " + str(name).capitalize() + '<br>''<strong> 7-days incidence </strong>' + " (" + str(confirmed_global_incidence.columns.values[-4]) + "): " + str(value) + '<br>'), color="red",fill_color="red", fill_opacity=0.3).add_to(map_incidence)
    
    # Risk countries (7-days incidence rate > 50 < 200) -> 10 days quarantine
    elif value >= 50:
        folium.CircleMarker([lat, lon], radius=value*0.02, tooltip = ("<strong> Country </strong>: " + str(name).capitalize() + '<br>''<strong> 7-days incidence </strong>' + " (" + str(confirmed_global_incidence.columns.values[-4]) + "): " + str(value) + '<br>'), color="yellow",fill_color="yellow", fill_opacity=0.3).add_to(map_incidence)

    # Safe countries No restrictions - traveling to these
    else:
        folium.CircleMarker([lat, lon], radius=value*0.02, tooltip = ("<strong> Country </strong>: " + str(name).capitalize() + '<br>''<strong> 7-days incidence </strong>' + " (" + str(confirmed_global_incidence.columns.values[-4]) + "): " + str(value) + '<br>'), color="green",fill_color="green", fill_opacity=0.3).add_to(map_incidence)
   
map_incidence


At the moment (09.02.2021), travel restrictions seem to depend on incidence rate only partially. The current RKI list of risk regions includes almost all countries but as can be seen in the map, the <b> incidence rates of many countries are lower than 50 </b>. However, the decision of German government seems to be right, since we do not really know how e.g. African and Asian countries are dealing with the pandemics and if the reported cases reflect the real situation. So unfortunately, it is not that easy to predict when and where we will be able to travel again... 

<b> Plots to monitor and compare the spread of Covid-19 in different countries </b> <br>
<br>
Is there still hope that the pandemic ends up one day? Do the vaccinations help? What about the virus mutations?

Dataset preparation

In [31]:
# Dataset preparation

# df_confirmed = "confirmed_global" dataset

def prep_plots_conf (df_confirmed):
    df_plots = df_confirmed.transpose()
    df_plots.drop("total", axis=0, inplace=True)   # only for confirmed_global
    df_plots.reset_index(inplace=True)
    df_plots.drop(index=0, axis=1, inplace=True) # only for confirmed_global
        
    df_plots["date"] = pd.to_datetime(df_plots["index"], format="%m/%d/%y")
    df_plots["index"] = df_plots["date"]
    df_plots.drop(columns="date", inplace = True)
    df_plots.rename(columns={"index": "date"}, inplace=True)
    
    return df_plots

In [32]:
confirmed_cases_plot = prep_plots_conf(confirmed_global)
confirmed_cases_plot.head()

Country,date,Afghanistan,Albania,Algeria,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,Armenia,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Wallis and Futuna,West Bank and Gaza,Yemen,Zambia,Zimbabwe
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,0,0,0,0
5,2020-01-27,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,0,0,0,0


In [33]:
# df = "_global" datasets 

def prep_plots (df):
    df_plots = df.transpose()
    df_plots.reset_index(inplace=True)
        
    df_plots["date"] = pd.to_datetime(df_plots["index"], format="%m/%d/%y")
    df_plots["index"] = df_plots["date"]
    df_plots.drop(columns="date", inplace = True)
    df_plots.rename(columns={"index": "date"}, inplace=True)
    
    return df_plots

In [34]:
deaths_plot = prep_plots(deaths_global)
deaths_plot.head()

Country,date,Afghanistan,Albania,Algeria,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,Armenia,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Wallis and Futuna,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
recovered_plot = prep_plots(recovered_global)
recovered_plot.head()

Country,date,Afghanistan,Albania,Algeria,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,Armenia,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Wallis and Futuna,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


<b> 1) Scatter plots  </b> (comparison of total cases of two countries)

a) static with plotly.graph_objects (go)

In [51]:
# Static plots with plotly.graph_objects(go)

# df = any "_plot" dataset

def scatter_plot (df, country1, country2):
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["date"],
                             y=df[country1], 
                             mode="lines+markers", 
                             line=dict(color='red'), 
                             name= country1))
    
    fig.add_trace(go.Scatter(x=df["date"], 
                             y=df[country2], 
                             mode="lines+markers", 
                             line=dict(color='blue'), 
                             name= country2 ))
    
                  # add.trace - makes update to already constructed figures
    fig.update_layout(title_text =("Total Covid-19 cases in " + country1 + " and " + country2 + " (" + str(pd.to_datetime(df.iloc[-1,0]).date()) + ")"), 
                    plot_bgcolor = "rgb(230,230,230)",
                    yaxis_title = "Total cases",
                    xaxis_title = "Date")
    fig.show()
                           

In [52]:
scatter_plot(confirmed_cases_plot, "Germany", "India")

b) Interactive with ipywidgets

Choosing country from a dropdown list is much easier because we do not need not know or check the names of the countries. We can can just choose countries we are interested in from a list.

In [75]:
import ipywidgets as widgets
from ipywidgets import interact

In [53]:
# Interactive scatter plot only for "confirmed_cases" dataset

# 1. Define a function

def scatter_plot_confirmed (country1, country2):
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=confirmed_cases_plot["date"],
                             y=confirmed_cases_plot[country1], 
                             mode="lines+markers", 
                             line=dict(color='rgb(141,197,164)'), 
                             name= country1))
    
    fig.add_trace(go.Scatter(x=confirmed_cases_plot["date"], 
                             y=confirmed_cases_plot[country2], 
                             mode="lines+markers", 
                             line=dict(color='rgb(99,86,131)'), 
                             name= country2 ))
    
                  # add.trace - makes update to already constructed figures
    fig.update_layout(title_text =("Comparison of total confirmed Covid-19 cases in " + country1 + " and " 
                                   + country2 + " (" + str(pd.to_datetime(confirmed_cases_plot.iloc[-1,0]).date()) 
                                   + ")"), 
                    plot_bgcolor = "rgb(245,245,245)",
                    yaxis_title = "Total cases",
                    xaxis_title = "Date",
                     hovermode="x unified")
    iplot(fig)
    #fig.show()

In [54]:
# Select countries with dropdown button 

country1_drop = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 1: ',
    value='Afghanistan'
)

country2_drop = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 2: ',
    value='Afghanistan'
)


interact(scatter_plot_confirmed, country1 = country1_drop, country2=country2_drop)

interactive(children=(Dropdown(description='Country 1: ', options=('Afghanistan', 'Albania', 'Algeria', 'Andor…

<function __main__.scatter_plot_confirmed(country1, country2)>

If we look at the plots for total confirmed cases, we can see how the pandemic is developing in each country - is the curve flattening or still rising exponentially? <br>
When we e.g. compare Germany to South Africa we can see that in Germany the cases has been rising exponentially since October 2020, while in South Africa the curve was slowly flattening between August and December but then in December it started to rise again. In December one of new virus mutations was discovered in South Africa. That was the reason of that sudden increase of cases. However, it looks like the curve starts to flatten again and in Germany, it cannot be observed yet. Which is pretty interesting, when we assume that Germany is not dealing with the mutated virus (yet?).

<b> 2. Bar plots </b>

a) Active cases 

In [47]:
def active_cases (country):
    fig = go.Figure(data=[
        go.Bar(name='Confirmed', 
               x=confirmed_cases_plot["date"], 
               y=confirmed_cases_plot[country], 
               opacity=1, 
               marker_color="rgb(219,160,149)"),
        go.Bar(name='Recovered', 
               x=recovered_plot["date"], 
               y=recovered_plot[country], 
               opacity=1, 
               marker_color="rgb(186,72,97)"),
        go.Bar(name='Deaths', 
               x=deaths_plot["date"], 
               y=deaths_plot[country], 
               opacity=1, 
               marker_color="rgb(111,2,24)")
        ])
        # Change the bar mode
    fig.update_layout(title_text=("Active Covid-19 cases in " + country 
                                  + " until " + str(pd.to_datetime(confirmed_cases_plot.iloc[-1,0]).date())),
                    yaxis_title = "Total cases",
                    xaxis_title = "Date",
                    barmode='overlay',
                    plot_bgcolor = "rgb(245,245,245)")
    fig.update_layout(hovermode="x")
    
    iplot(fig)
    #fig.show()

In [48]:
country_drop = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Choose country: ',
    value='Afghanistan'
)

interact(active_cases, country = country_drop)

interactive(children=(Dropdown(description='Choose country: ', options=('Afghanistan', 'Albania', 'Algeria', '…

<function __main__.active_cases(country)>

b) Cases per day (confirmed & deaths)

In [44]:
# Prepare "cases per day (cpr)" datasets

confirmed_cpr_prep = prep_plots(confirmed_global_cpr2)
confirmed_cpr_prep.tail()

Country,date,Afghanistan,Albania,Algeria,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,Armenia,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Wallis and Futuna,West Bank and Gaza,Yemen,Zambia,Zimbabwe
378,2021-02-03,53,1007,275,53,37,0,0,9196,190,...,548,58,0,563,66,0,470,0,1256,150
379,2021-02-04,57,1052,265,67,59,0,19,8891,147,...,589,57,0,460,9,0,661,0,1514,207
380,2021-02-05,34,1089,248,35,34,0,9,8374,158,...,499,73,0,456,19,0,472,2,1424,160
381,2021-02-06,65,1130,0,34,32,0,11,6680,211,...,509,56,0,412,9,0,470,3,1000,156
382,2021-02-07,5,1124,459,45,24,0,11,3658,151,...,499,53,0,473,16,0,572,0,1206,65


In [45]:
deaths_cpr_prep = prep_plots(deaths_global_cpr2)
deaths_cpr_prep.head()

Country,date,Afghanistan,Albania,Algeria,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,Armenia,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Wallis and Futuna,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
def cases_per_day_plot (country):
    
    fig = px.bar(confirmed_cpr_prep,
                x=confirmed_cpr_prep["date"],
                y=confirmed_cpr_prep[country],
                barmode = "group", 
                height = 400,
                #hover_data=
                )
    
    fig.update_layout(title_text = ("New cases per day in " + country +
                      " until " + str(pd.to_datetime(confirmed_cases_plot.iloc[-1,0]).date())),
                       plot_bgcolor = "rgb(245,245,245)",
                      yaxis_title = "Cases per day",
                       xaxis_title = "Date")
    fig.show()
    

    fig = px.bar(deaths_cpr_prep,
                x=deaths_cpr_prep["date"],
                y=deaths_cpr_prep[country],
                barmode = "group", 
                height = 400)
    
    fig.update_layout(title_text = ("Deaths per day in " + country +
                      " until " + str(pd.to_datetime(confirmed_cases_plot.iloc[-1,0]).date())),
                        plot_bgcolor = "rgb(245,245,245)",
                        yaxis_title = "Deaths per day",
                        xaxis_title = "Date")
    fig.show()

In [47]:
cases_per_day_plot("Germany")

<b> Germany </b>: In the plot of total cases it looks like the number of new cases is still rising exponentially. However, when we look at the plots of new cases per day, we can observe a decrease in number of new infections.

In [48]:
country_drop = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country: ',
    value='Afghanistan'
)

interact(cases_per_day_plot, country=country_drop)

interactive(children=(Dropdown(description='Country: ', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra…

<function __main__.cases_per_day_plot(country)>

c) New cases per day (comparison of four countries)

In [49]:
from plotly.subplots import make_subplots

def comparison_cpr (country1, country2, country3, country4):
    fig = make_subplots(rows=2, cols=2,
                    subplot_titles=(country1, country2, country3, country4))

    fig.add_trace(go.Bar(x=confirmed_cpr_prep["date"], y=confirmed_cpr_prep[country1],
                    marker=dict(color=confirmed_cpr_prep[country1], coloraxis="coloraxis")),1,1)

    fig.add_trace(go.Bar(x=confirmed_cpr_prep["date"], y=confirmed_cpr_prep[country2],
                    marker=dict(color=confirmed_cpr_prep[country2], coloraxis="coloraxis")),1,2)

    fig.add_trace(go.Bar(x=confirmed_cpr_prep["date"], y=confirmed_cpr_prep[country3],
                    marker=dict(color=confirmed_cpr_prep[country3], coloraxis="coloraxis")),2,1)

    fig.add_trace(go.Bar(x=confirmed_cpr_prep["date"], y=confirmed_cpr_prep[country4],
                    marker=dict(color=confirmed_cpr_prep[country4], coloraxis="coloraxis")),2,2)

    fig.update_layout(coloraxis=dict(colorscale="Portland"), showlegend=False, 
                      title_text=("Cases per day" + " (" 
                                  + str(pd.to_datetime(confirmed_cpr_prep.iloc[-1,0]).date()) + ")"))

    fig.update_layout(plot_bgcolor="rgb(230,230,230)")

    fig.show()

    #Greys,YlGnBu,Greens,YlOrRd,Bluered,RdBu,Reds,Blues,Picnic,
    #Rainbow,Portland,Jet,Hot,Blackbody,Earth,Electric,Viridis,Cividis

In [50]:
country_drop1 = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 1: ',
    value='Germany'
)


country_drop2 = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 2: ',
    value='United Kingdom'
)

country_drop3 = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 3: ',
    value='Spain'
)

country_drop4 = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 4: ',
    value='South Africa'
)

interact(comparison_cpr, country1=country_drop1, country2=country_drop2, 
         country3=country_drop3, country4=country_drop4)

interactive(children=(Dropdown(description='Country 1: ', index=78, options=('Afghanistan', 'Albania', 'Algeri…

<function __main__.comparison_cpr(country1, country2, country3, country4)>

When we look at the diagrams of new cases per day we can better see how the virus has been spreading out in different countries and how e.g. different restrictions, mutations etc. influenced the spread of the virus. <br><br>
Here, I selected four countries: Germany, United Kingdom, Poland and South Africa. <b>UK </b>and <b>South Africa </b> have been dealing with the virus mutations since December 2020. In both diagrams can be seen that the new cases were significantly increasing from December until January. After both countries had strenghtened the restrictions, number of new infections started to sink again. <br> <br>
In <b> Poland </b>, there was a peak in November, then the new cases were slowly decreasing. They remained more or less constant in December and now, after lockdown that begun after Christmas 2020, the cases has been decreasing again. <br> <br> 
In <b> Germany </b>, number of newly infected people was very high between October 2020 and January 2021. The first light lockdown in November did not help much. Only the last stronger restrictions positively influenced the trend of new infections. However, it looked like in Berlin elderly people has been more affected by the virus during the second wave. If we assume that this trend was visible also in other federal states than we do not know, whether the decrease of cases is caused by restrictions or vaccinations that have began in December 2020 or both. It requires more detailed analysis of all new cases and its demographics in each federal state. 

d) Total confirmed cases (comparison of four countries)

In [51]:
def comparison_confirmed_total (country1, country2, country3, country4):
    fig = make_subplots(rows=2, cols=2,
                    subplot_titles=(country1, country2, country3, country4))

    fig.add_trace(go.Bar(x=confirmed_cases_plot["date"], y=confirmed_cases_plot[country1],
                    marker=dict(color=confirmed_cases_plot[country1], coloraxis="coloraxis")),1,1)

    fig.add_trace(go.Bar(x=confirmed_cases_plot["date"], y=confirmed_cases_plot[country2],
                    marker=dict(color=confirmed_cases_plot[country2], coloraxis="coloraxis")),1,2)

    fig.add_trace(go.Bar(x=confirmed_cases_plot["date"], y=confirmed_cases_plot[country3],
                    marker=dict(color=confirmed_cases_plot[country3], coloraxis="coloraxis")),2,1)

    fig.add_trace(go.Bar(x=confirmed_cases_plot["date"], y=confirmed_cases_plot[country4],
                    marker=dict(color=confirmed_cases_plot[country4], coloraxis="coloraxis")),2,2)

    fig.update_layout(coloraxis=dict(colorscale="Portland"), showlegend=False, 
                      title_text= ("Total cases" + " (" 
                                   + str(pd.to_datetime(confirmed_cases_plot.iloc[-1,0]).date()) + ")"))

    fig.update_layout(plot_bgcolor="rgb(245,245,245)")

    fig.show()

In [52]:
country_drop10 = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 1: ',
    value='Germany'
)


country_drop20 = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 2: ',
    value='Poland'
)

country_drop30 = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 3: ',
    value='South Africa'
)

country_drop40 = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 4: ',
    value='Afghanistan'
)

interact(comparison_confirmed_total, country1=country_drop10, country2=country_drop20, 
         country3=country_drop30, country4=country_drop40)

interactive(children=(Dropdown(description='Country 1: ', index=78, options=('Afghanistan', 'Albania', 'Algeri…

<function __main__.comparison_confirmed_total(country1, country2, country3, country4)>

<b> 3. Summary for each country </b> 

In [53]:
# Summary for each country

def summary (country):

# Confirmed, deaths and recovered in one plot (cumulative)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=confirmed_cases_plot["date"],
                             y=confirmed_cases_plot[country], 
                             mode="lines+markers", 
                             line=dict(color="rgb(141,197,164)"), 
                             name= "Confirmed"))
    
    fig.add_trace(go.Scatter(x=recovered_plot["date"], 
                             y=recovered_plot[country], 
                             mode="lines+markers", 
                             line=dict(color="rgb(186,72,97)"), 
                             name= "Recovered" ))
    
    fig.add_trace(go.Scatter(x=deaths_plot["date"], 
                             y=deaths_plot[country], 
                             mode="lines+markers", 
                             line=dict(color="rgb(99,86,131)"), 
                             name= "Deaths" ))
    
                  # add.trace - makes update to already constructed figures
    fig.update_layout(title_text =("Total confirmed Covid-19 cases, deaths and recovered in " + country 
                                   + " (" + str(pd.to_datetime(confirmed_cases_plot.iloc[-1,0]).date()) + ")"), 
                    plot_bgcolor = "rgb(245,245,245)",
                    yaxis_title = "Total cases",
                    xaxis_title = "Date",
                     hovermode="x unified")
    fig.show()


# New cases and deaths per day

    fig = px.bar(confirmed_cpr_prep,
                x=confirmed_cpr_prep["date"],
                y=confirmed_cpr_prep[country],
                barmode = "group", 
                height = 400
                )
    
    fig.update_layout(title_text = "New cases per day in " + country 
                      + " (" + str(pd.to_datetime(confirmed_cpr_prep.iloc[-1,0]).date()) + ")",
                       plot_bgcolor = "rgb(245,245,245)",
                      yaxis_title = "Cases per day",
                       xaxis_title = "Date")
    fig.show()
    

    fig = px.bar(deaths_cpr_prep,
                x=deaths_cpr_prep["date"],
                y=deaths_cpr_prep[country],
                barmode = "group", 
                height = 400)
    
    fig.update_layout(title_text = "Deaths per day in " + country 
                      + " (" + str(pd.to_datetime(deaths_cpr_prep.iloc[-1,0]).date()) + ")",
                       plot_bgcolor = "rgb(245,245,245)",
                      yaxis_title = "Deaths per day",
                       xaxis_title = "Date")
    fig.show()


    # fig.add_trace(go.Bar(x=confirmed_cpr_prep["date"], y=confirmed_cpr_prep[country1],
                    #marker=dict(color=confirmed_cpr_prep[country1], coloraxis="coloraxis")),1,1)
    
# Incidence rate   
    map_incidence2 = folium.Map(location=[coord_incid.loc[country, "Lat"], coord_incid.loc[country, "Long"]], zoom_start=4, tiles="Stamenterrain")
    
    for lat, lon, value, name in zip(coord_incid["Lat"], coord_incid["Long"], confirmed_global_incidence["incidence"], confirmed_global_incidence["Country"]):
    
    # High risk countries (7-days incidence rate > 200) -> special regulations by coming back / arriving to Germany (test already before coming back + 10 days quarantine)
        if value >= 200:
            folium.CircleMarker([lat, lon], radius=value*0.02, tooltip = ("<strong> Country </strong>: " + str(name).capitalize() + '<br>''<strong> 7-days incidence </strong>' + " (" + str(confirmed_global_incidence.columns.values[-4]) + "): " + str(value) + '<br>'), color="red",fill_color="red", fill_opacity=0.3).add_to(map_incidence2)
    
    # Risk countries (7-days incidence rate > 50 < 200) -> 10 days quarantine
        elif value >= 50:
            folium.CircleMarker([lat, lon], radius=value*0.02, tooltip = ("<strong> Country </strong>: " + str(name).capitalize() + '<br>''<strong> 7-days incidence </strong>' + " (" + str(confirmed_global_incidence.columns.values[-4]) + "): " + str(value) + '<br>'), color="yellow",fill_color="yellow", fill_opacity=0.3).add_to(map_incidence2)

    # Safe countries No restrictions - traveling to these
        else:
            folium.CircleMarker([lat, lon], radius=value*0.02, tooltip = ("<strong> Country </strong>: " + str(name).capitalize() + '<br>''<strong> 7-days incidence </strong>' + " (" + str(confirmed_global_incidence.columns.values[-4]) + "): " + str(value) + '<br>'), color="green",fill_color="green", fill_opacity=0.3).add_to(map_incidence2)
   
    display(map_incidence2)

In [54]:
country_drop100 = widgets.Dropdown(
    options=list(confirmed_global.index.values),
    description='Country 1: ',
    value='Germany'
)
interact(summary, country = country_drop100)

interactive(children=(Dropdown(description='Country 1: ', index=78, options=('Afghanistan', 'Albania', 'Algeri…

<function __main__.summary(country)>

<b> (1A. Choropleth with plotly express) </b>

In [55]:
# Choropleth map with plotly

import json
file = "C:/Users/Agnieszka/Downloads/Datasets/World_countries/world-countries.json"
world_poly = json.load(open(file, 'r'))

# or world_poly = r"C:/Users/Agnieszka/Downloads/Datasets/World_countries/world-countries.json"

In [56]:
fig = px.choropleth(confirmed_global_choro,
                    geojson=world_poly,
                    locations = "Country",
                    featureidkey = "properties.name",
                    color = "total",
                    #color_continuous_scale=px.colors.diverging.BrBG,
                    color_continuous_scale = "Viridis",
                    #color_continuous_scale=["green", "yellow", "red"],
                    scope="world",
                    labels={"unemp": "Total cases"})

fig.update_layout(coloraxis_colorbar=dict(
    title="Total cases",
    #tickvals=[6,7,8,9],
    #ticktext=["1M", "10M", "100M", "1B"],
))
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()