In [None]:
# Installs
!pip install pycountry_convert 
!pip install folium
!pip install calmap

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker 
import pycountry_convert as pc
import folium
import branca
from datetime import datetime, timedelta,date
from scipy.interpolate import make_interp_spline, BSpline
import plotly.express as px
import json, requests
import calmap
import seaborn as sns

import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Retriving Dataset        
df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
df_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

# Depricated
df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")
df_table = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_time.csv",parse_dates=['Last_Update'])

In [None]:
df

In [None]:
df_deaths

In [6]:
##Renaming columns from the original data set to understandable and easy to use names
df.columns=['Country',
                         'LastUpdate',
                         'Lat',
                         'Long',
                         'Confirmed',
                         'Deaths',
                         'Recovered',
                         'Active',
                         'IncidentRate',
                         'PeopleTested',
                         'PeopleHospit',
                         'MortalityRate',
                         'UID',
                         'abr']

In [None]:
df.columns

Basic Stats

In [None]:
##Visualize the main statistical features in the data set, for example mean, median, mode, and quartile breakdown of all of the features
##in the data set
df.describe()

In [None]:
##Number of deaths by quartile (25%,50%, and 75%)
df.Deaths.quantile([.25,.5,.75])

In [None]:
##Finding the minimum, maximum, and mean value of deaths across the data set
df.Deaths.agg(['min','max','mean'])

In [None]:
##Identifying the correlation number between deaths and patients who recovered
df.Deaths.corr(df.Recovered)

Plotting

In [7]:
##Creating a new dataframe without longitude, latitude, and last update data
##Setting the index of the new datafram to the country name
##Dropping the feature of Country name from the column header
df_country_cases = df.copy().drop(['Lat','Long','LastUpdate'],axis=1)
df_country_cases.index = df_country_cases["Country"]
df_country_cases = df_country_cases.drop(['Country'],axis=1)

In [None]:
##Creating a bar chart of the top 5 countries based on confirmed cases
##Setting the title of the table along with x and y axis labels to clearly show the given data
f = plt.figure(figsize=(10,5))
f.add_subplot(111)
plt.axes(axisbelow=True)
plt.barh(df_country_cases.sort_values('Confirmed')["Confirmed"]
         .index[-5:],df_country_cases.sort_values('Confirmed')["Confirmed"]
         .values[-5:],color='darkcyan')
plt.xlabel("Confirmed Cases",fontsize=16)
plt.title("Top 5 Countries w/ Confirmed Cases",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
##Creating a bar chart of the top 5 countries with the most deaths
d = plt.figure(figsize=(10,5))
d.add_subplot(111)
plt.axes(axisbelow=True)
plt.barh(df_country_cases.sort_values('Deaths')["Deaths"]
         .index[-5:],df_country_cases.sort_values('Deaths')["Deaths"]
         .values[-5:],color='darkblue')
plt.xlabel("Deaths",fontsize=16)
plt.title("Top 5 Countries w/ most Deaths",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
##Creating a bar chart of the top 5 countries with the most number of active cases
a = plt.figure(figsize=(10,5))
a.add_subplot(111)
plt.axes(axisbelow=True)
plt.barh(df_country_cases.sort_values('Active')["Active"]
         .index[-5:],df_country_cases.sort_values('Active')["Active"]
         .values[-5:],color='darkred')
plt.xlabel("Active",fontsize=16)
plt.title("Top 5 Countries w/ Most Active Cases",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
##Creating a visual representation by country of all features in the original data set(excluding lat, long, and last update)
##Highlighted values are the largest for each feature
df_country_cases["Mortality Rate (per 100)"] = np.round(100*df_country_cases["Deaths"]/df_country_cases["Confirmed"],2)
df_country_cases.sort_values('Confirmed', ascending= False).style.background_gradient(cmap='Purples',subset=["Confirmed"])\
                        .background_gradient(cmap='Greens',subset=["Deaths"])\
                        .background_gradient(cmap='Reds',subset=["Recovered"])\
                        .background_gradient(cmap='Blues',subset=["Active"])\
                        .background_gradient(cmap='YlOrBr',subset=["Mortality Rate (per 100)"])

In [None]:
df_country_cases.iloc[:,:-1].corr(method='kendall').style.background_gradient(cmap='Blues')

In [25]:
##Replacing Nan values in the location feature with null values
df_confirmed= df_confirmed.replace(np.nan,'',regex= True)
df_deaths= df_deaths.replace(np.nan,'',regex=True)

In [None]:
##Creating a world map view where users are able to hover over locations and retrieve Covid-19 data based on region
##Some regions are not applicable *working on this*
world_map = folium.Map(location=[10,0], tiles="cartodbpositron", zoom_start=2,max_zoom=6,min_zoom=2)
for i in range(0,len(df_confirmed)):
    folium.Circle(
        location=[df_confirmed.iloc[i]['Lat'], df_confirmed.iloc[i]['Long']],
        tooltip = "<h5 style='text-align:center;font-weight: bold'>"+df_confirmed.iloc[i]['Country/Region']+"</h5>"+
                    "<div style='text-align:center;'>"+str(np.nan_to_num(df_confirmed.iloc[i]['Province/State']))+"</div>"+
                    "<hr style='margin:10px;'>"+
                    "<ul style='color: #444;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
        "<li>Confirmed: "+str(df_confirmed.iloc[i,-1])+"</li>"+
        "<li>Deaths:   "+str(df_deaths.iloc[i,-1])+"</li>"+
        "<li>Mortality Rate:   "+str(np.round(df_deaths.iloc[i,-1]/(df_confirmed.iloc[i,-1]+1.00001)*100,2))+"</li>"+
        "</ul>"
        ,
        radius=(int((np.log(df_confirmed.iloc[i,-1]+1.00001)))+0.2)*50000,
        color='# ff6600',
        fill_color='#ff8533',
        fill=True).add_to(world_map)

world_map


Filtering

In [None]:
##Filtering the data by number of deaths greater than a certain value
df[df.Deaths >= 20000]

In [None]:
##Filtering the data by the number of confimed cases greater than a certain value
df[df.Confirmed >=5000000 ]

In [None]:
##Combining filters to create a conditional statement satisfying both commands
df[(df.Deaths >= 20000) & (df.Confirmed >=5000000)]

Grouping using Groupby


In [96]:
df_covid19 =df
df_confirmed = df_confirmed.rename(columns={"Province/State":"state","Country/Region": "country"})
df_deaths = df_deaths.rename(columns={"Province/State":"state","Country/Region": "country"})
df_covid19 = df_covid19.rename(columns={"Country": "country"})
df_covid19["Active"] = df_covid19["Confirmed"]-df_covid19["Recovered"]-df_covid19["Deaths"]
# df_recovered = df_recovered.rename(columns={"Province/State":"state","Country/Region": "country"})

In [94]:
df.columns

Index(['Country', 'LastUpdate', 'Lat', 'Long', 'Confirmed', 'Deaths',
       'Recovered', 'Active', 'IncidentRate', 'PeopleTested', 'PeopleHospit',
       'MortalityRate', 'UID', 'abr'],
      dtype='object')

In [97]:
# Changing the conuntry names as required by pycountry_convert Lib
df_confirmed.loc[df_confirmed['country'] == "US", "country"] = "USA"
df_deaths.loc[df_deaths['country'] == "US", "country"] = "USA"
df_covid19.loc[df_covid19['country'] == "US", "country"] = "USA"
df_table.loc[df_table['Country_Region'] == "US", "Country_Region"] = "USA"
# df_recovered.loc[df_recovered['country'] == "US", "country"] = "USA"


df_confirmed.loc[df_confirmed['country'] == 'Korea, South', "country"] = 'South Korea'
df_deaths.loc[df_deaths['country'] == 'Korea, South', "country"] = 'South Korea'
df_covid19.loc[df_covid19['country'] == "Korea, South", "country"] = "South Korea"
df_table.loc[df_table['Country_Region'] == "Korea, South", "Country_Region"] = "South Korea"
# df_recovered.loc[df_recovered['country'] == 'Korea, South', "country"] = 'South Korea'

df_confirmed.loc[df_confirmed['country'] == 'Taiwan*', "country"] = 'Taiwan'
df_deaths.loc[df_deaths['country'] == 'Taiwan*', "country"] = 'Taiwan'
df_covid19.loc[df_covid19['country'] == "Taiwan*", "country"] = "Taiwan"
df_table.loc[df_table['Country_Region'] == "Taiwan*", "Country_Region"] = "Taiwan"
# df_recovered.loc[df_recovered['country'] == 'Taiwan*', "country"] = 'Taiwan'

df_confirmed.loc[df_confirmed['country'] == 'Congo (Kinshasa)', "country"] = 'Democratic Republic of the Congo'
df_deaths.loc[df_deaths['country'] == 'Congo (Kinshasa)', "country"] = 'Democratic Republic of the Congo'
df_covid19.loc[df_covid19['country'] == "Congo (Kinshasa)", "country"] = "Democratic Republic of the Congo"
df_table.loc[df_table['Country_Region'] == "Congo (Kinshasa)", "Country_Region"] = "Democratic Republic of the Congo"
# df_recovered.loc[df_recovered['country'] == 'Congo (Kinshasa)', "country"] = 'Democratic Republic of the Congo'

df_confirmed.loc[df_confirmed['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"
df_deaths.loc[df_deaths['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"
df_covid19.loc[df_covid19['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"
df_table.loc[df_table['Country_Region'] == "Cote d'Ivoire", "Country_Region"] = "Côte d'Ivoire"
# df_recovered.loc[df_recovered['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"

df_confirmed.loc[df_confirmed['country'] == "Reunion", "country"] = "Réunion"
df_deaths.loc[df_deaths['country'] == "Reunion", "country"] = "Réunion"
df_covid19.loc[df_covid19['country'] == "Reunion", "country"] = "Réunion"
df_table.loc[df_table['Country_Region'] == "Reunion", "Country_Region"] = "Réunion"
# df_recovered.loc[df_recovered['country'] == "Reunion", "country"] = "Réunion"

df_confirmed.loc[df_confirmed['country'] == 'Congo (Brazzaville)', "country"] = 'Republic of the Congo'
df_deaths.loc[df_deaths['country'] == 'Congo (Brazzaville)', "country"] = 'Republic of the Congo'
df_covid19.loc[df_covid19['country'] == "Congo (Brazzaville)", "country"] = "Republic of the Congo"
df_table.loc[df_table['Country_Region'] == "Congo (Brazzaville)", "Country_Region"] = "Republic of the Congo"
# df_recovered.loc[df_recovered['country'] == 'Congo (Brazzaville)', "country"] = 'Republic of the Congo'

df_confirmed.loc[df_confirmed['country'] == 'Bahamas, The', "country"] = 'Bahamas'
df_deaths.loc[df_deaths['country'] == 'Bahamas, The', "country"] = 'Bahamas'
df_covid19.loc[df_covid19['country'] == "Bahamas, The", "country"] = "Bahamas"
df_table.loc[df_table['Country_Region'] == "Bahamas, The", "Country_Region"] = "Bahamas"
# df_recovered.loc[df_recovered['country'] == 'Bahamas, The', "country"] = 'Bahamas'

df_confirmed.loc[df_confirmed['country'] == 'Gambia, The', "country"] = 'Gambia'
df_deaths.loc[df_deaths['country'] == 'Gambia, The', "country"] = 'Gambia'
df_covid19.loc[df_covid19['country'] == "Gambia, The", "country"] = "Gambia"
df_table.loc[df_table['Country_Region'] == "Gambia", "Country_Region"] = "Gambia"
# df_recovered.loc[df_recovered['country'] == 'Gambia, The', "country"] = 'Gambia'

# getting all countries
countries = np.asarray(df_confirmed["country"])
countries1 = np.asarray(df_covid19["country"])
# Continent_code to Continent_names
continents = {
    'NA': 'North America',
    'SA': 'South America', 
    'AS': 'Asia',
    'OC': 'Australia',
    'AF': 'Africa',
    'EU' : 'Europe',
    'na' : 'Others'
}

# Defininng Function for getting continent code for country.
def country_to_continent_code(country):
    try:
        return pc.country_alpha2_to_continent_code(pc.country_name_to_country_alpha2(country))
    except :
        return 'na'

#Collecting Continent Information
df_confirmed.insert(2,"continent", [continents[country_to_continent_code(country)] for country in countries[:]])
df_deaths.insert(2,"continent",  [continents[country_to_continent_code(country)] for country in countries[:]])
df_covid19.insert(1,"continent",  [continents[country_to_continent_code(country)] for country in countries1[:]])
df_table.insert(1,"continent",  [continents[country_to_continent_code(country)] for country in df_table["Country_Region"].values])
# df_recovered.insert(2,"continent",  [continents[country_to_continent_code(country)] for country in countries[:]] )   

In [101]:
df_continents_cases = df_covid19.copy().drop(['Lat','Long','country','LastUpdate'],axis =1)
df_continents_cases = df_continents_cases.groupby(["continent"]).sum()

In [102]:
df_continents_cases["Mortality Rate (per 100)"] = np.round(100*df_continents_cases["Deaths"]/df_continents_cases["Confirmed"],2)
df_continents_cases.style.background_gradient(cmap='Blues',subset=["Confirmed"])\
                        .background_gradient(cmap='Reds',subset=["Deaths"])\
                        .background_gradient(cmap='Greens',subset=["Recovered"])\
                        .background_gradient(cmap='Purples',subset=["Active"])\
                        .background_gradient(cmap='YlOrBr',subset=["Mortality Rate (per 100)"])

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active,IncidentRate,PeopleTested,PeopleHospit,MortalityRate,UID,Mortality Rate (per 100)
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Africa,2983144.0,71382.0,2431184.0,480578.0,16805.248759,0.0,0.0,108.005273,24764,2.39
Asia,21075798.0,342165.0,19576149.0,1157484.0,63833.366089,0.0,0.0,89.023802,18454,1.62
Australia,31636.0,945.0,28734.0,1957.0,183.253305,0.0,0.0,9.224025,3534,2.99
Europe,25592350.0,581005.0,12238107.0,11301869.0,178008.478225,0.0,0.0,82.470634,18538,2.27
North America,24959448.0,535774.0,2449555.0,534124.0,30850.949559,0.0,0.0,46.196163,8292,2.15
Others,329618.0,5754.0,287539.0,36325.0,9384.716089,0.0,0.0,29.855385,20611,1.75
South America,13715889.0,372351.0,12331993.0,1011545.0,24683.334133,0.0,0.0,34.554191,4708,2.71
