<a href="https://colab.research.google.com/github/ai2ys/Covid-19-EDA/blob/master/CSSE_Covid_19_time_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Custom plots for COVID-19 data

In [0]:
import pandas as pd
import re

import altair as alt
import pandas as pd
import numpy as np

import requests
from bs4 import BeautifulSoup

from enum import Enum

COVID-19 data:<br>
https://github.com/CSSEGISandData/COVID-19

In [0]:
def get_population():
  url = 'https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)'
  page = requests.get(url)
  html = BeautifulSoup(page.content, 'html.parser')
  element = html.find(text=re.compile('Country or area'))
  #element = html.find(text=re.compile('Countries and areas ranked by population in 2019'))
  html_population_table = element.findParent('table')
  df_population = pd.read_html(str(html_population_table))[0]
  return df_population

class Covid19_status(Enum):
  Confirmed=(0)
  Deaths=(1)
  Recovered=(2)
  Active=(3)
  Merged=(4)

  def __init__(self, idx):
    self.csse = None

class Csse_covid19(Enum):
  Confirmed = (Covid19_status.Confirmed, 
               'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
  Deaths = (Covid19_status.Deaths, 
            'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
  Recovered = (Covid19_status.Recovered,
               'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')
  Active = (Covid19_status.Active, None)
  Merged = (Covid19_status.Merged, None)

  def __init__(self, status, url):
    self.status = status
    self.url = url
    self.status.csse = self
    self.df_raw = None
    self.re_date='^[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{1,2}$'    
    if self.status == Covid19_status.Active:
      self.df_raw = self.Confirmed.df_raw.copy()
      self.date_cols = self.df_raw.filter(regex=self.re_date).columns.values 
      self.df_raw[self.date_cols] = self.df_raw[self.date_cols].sub(
          self.Recovered.df_raw[self.date_cols]).sub(
              self.Deaths.df_raw[self.date_cols])
    elif self.status is Covid19_status.Merged:
      cols = self.Confirmed.df_raw.columns.values.tolist()
      cols.insert(0,'Status')
      self.df_raw = pd.DataFrame(columns=cols)
      for status in Covid19_status:
        if status is Covid19_status.Merged:
          continue
        df = status.csse.df_raw.copy()
        df['Status'] = status.name
        self.df_raw = self.df_raw.append(df, sort=False, ignore_index=True)      
    else:
      self.df_raw = pd.read_csv(self.url)
      self.date_cols = self.df_raw.filter(regex=self.re_date).columns.values

  def alt_plot(self, countries=None, width=800, height=600, 
               sort_legend=False, log_scale=None):
    source = self.df_raw
    if countries is not None:
      source = source.loc[source['Country/Region'].isin(countries)]
    else:
      countries = source['Country/Region'].values.tolist()
    source = source.groupby('Country/Region').sum().reset_index()
    source = source.set_index([source.index, 'Country/Region', 'Lat', 'Long'])
    source = source.stack()
    source.index.set_names('Date', level=len(source.index.names)-1, inplace=True)
    source = source.reset_index().rename(columns={0:'Count'})
    #display(source.head(10))
    alt.data_transformers.disable_max_rows()
    # Create a selection that chooses the nearest point & selects based on x-value
    nearest = alt.selection(type='single', nearest=True, on='mouseover',
                            fields=['Date'], empty='none')


    scale = alt.Scale(type='linear')
    color = alt.Color('Country/Region')
    threshold = -1
    sort = countries
    if log_scale != None:
      scale = alt.Scale(type="log", base=log_scale)  
      threshold = 0
    if sort_legend:
      sort = self.get_countries_sorted(countries)
      
    color = alt.Color('Country/Region', sort=sort)                    

    line = alt.Chart(source).transform_filter( 
        alt.datum.Count > threshold).mark_line().encode(
        alt.X('Date:T'), 
        alt.Y('Count:Q', scale=scale), 
        color).properties(title=self.name)
    # Transparent selectors across the chart. This is what tells us
    # the x-value of the cursor
    selectors = alt.Chart(source).mark_point().encode(
        x='Date:T', 
        opacity=alt.value(0),
    ).add_selection(nearest)
    # Draw points on the line, and highlight based on selection
    points = line.mark_point().encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0))
    )
    # Draw text labels near the points, and highlight based on selection
    text = line.mark_text(align='left', dx=5, dy=-5).encode(
        text=alt.condition(nearest, 'Count:Q', alt.value(' '))
    )
    # Draw a rule at the location of the selection
    rules = alt.Chart(source).mark_rule(color='gray').encode(
        x='Date:T',
    ).transform_filter(nearest)
    # Put the five layers into a chart and bind the data
    layer = alt.layer(line, 
                      selectors, points, rules, text,
    ).properties(width=width, height=height)
    layer.display()

  def get_top_countries(self, top=10):
    date = self.df_raw.filter(regex=self.re_date).columns.values[-1]
    df = self.df_raw.groupby(by='Country/Region').sum()
    df.sort_values(by=[date], inplace=True, ascending=False)
    df = df.iloc[:top]
    return df.index.values.tolist()


  def get_countries_sorted(self, countries):
    date = self.df_raw.filter(regex=self.re_date).columns.values[-1]
    df = self.df_raw.groupby(by='Country/Region').sum()
    df.sort_values(by=[date], inplace=True, ascending=False)
    return df.iloc[df.index.isin(countries)].index.tolist()

  def get_by_country(self, countries=None, plot=False, figsize=(10,10)):
    df = self.df_raw.groupby(by='Country/Region').sum().filter(self.date_cols)
    if countries is None:
      df = df.transpose()
    else:
      df = df.loc[countries].transpose()    
    if plot:
      fig = df.plot(figsize=figsize)
    return df, fig


In [4]:
def match_population_with_csse_covid19_countries(csse_covid19): 
  df_population = get_population()
  #2020-03-15 matching country strings
  countries_csse = ['Afghanistan', 'Albania', 'Algeria', None            , 'Andorra', None    , None      , 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia'   , 'Austria', 'Azerbaijan'   , None     , 'Bahrain', 'Bangladesh', None      , 'Belarus', 'Belgium', None    , None   , None     , 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', None      , 'Brazil', None                    , 'Brunei', 'Bulgaria', 'Burkina Faso', None     , 'Cambodia', 'Cameroon', 'Canada', None        , None                      , 'Cayman Islands', None                      , None  , 'Chile', 'China'   , 'Colombia', None     , 'Congo (Kinshasa)', None          , 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cruise Ship', 'Cuba', 'Curacao', 'Cyprus'   , 'Czechia'       , None      , 'Denmark', None      , None      , 'Dominican Republic', None        , 'Ecuador', 'Egypt', None         , None               , None     , 'Estonia', 'Eswatini', 'Ethiopia', None             , None              , None           , None  , 'Finland'   , 'France'   , 'French Guiana', None              , 'Gabon', None    , 'Georgia'   , 'Germany', 'Ghana', None       , 'Greece', None       , None     , 'Guadeloupe'   , None  , 'Guatemala', 'Guernsey'           , 'Guinea', None           , 'Guyana', None   , 'Holy See', 'Honduras', None       , 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', None         , 'Israel', 'Italy', None         , 'Jamaica', 'Japan', 'Jersey', 'Jordan', 'Kazakhstan', 'Kenya', None      , 'Kuwait', None        , None  , 'Latvia', 'Lebanon', None     , None     , None   , 'Liechtenstein', 'Lithuania', 'Luxembourg', None   , None        , None    , 'Malaysia'   , 'Maldives', None  , 'Malta', None              , 'Martinique', 'Mauritania', None          , None     , 'Mexico', 'Moldova'   , 'Monaco', 'Mongolia', None        , None        , 'Morocco', None        , None     , 'Namibia', None   , 'Nepal', 'Netherlands', None           , 'New Zealand', None       , None   , 'Nigeria', None  , None         , 'North Macedonia', None                      , 'Norway'   , 'Oman', 'Pakistan', None   , 'occupied Palestinian territory', 'Panama', None              , 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', None         , 'Qatar', 'Reunion', 'Romania', 'Russia', 'Rwanda', None     , None                                          , None                   , 'Saint Lucia', None                       , 'Saint Vincent and the Grenadines', None   , 'San Marino', 'Saudi Arabia', 'Senegal', 'Serbia'   , 'Seychelles', None          , 'Singapore', None          , 'Slovakia', 'Slovenia', None             , None     , 'South Africa', 'Korea, South', None         , 'Spain'   , 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', None   , None                   , 'Taiwan*'  , None        , None         , 'Thailand', 'Togo', None     , None   , 'Trinidad and Tobago', 'Tunisia', 'Turkey', None          , None                      , None    , None                 , None    , 'Ukraine'   , 'United Arab Emirates', 'United Kingdom', 'US'           , 'Uruguay', None        , None     , None             , 'Venezuela', 'Vietnam', None               , None            , None   , None   , None    , None      ]
  countries_pop  = ['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia[g]', 'Austria', 'Azerbaijan[i]', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Caribbean Netherlands[s]', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China[a]', 'Colombia', 'Comoros', 'Congo'           , 'Cook Islands', 'Costa Rica', None           , 'Croatia', None         , 'Cuba', 'Curaçao', 'Cyprus[q]', 'Czech Republic', 'DR Congo', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'East Timor', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'F.S. Micronesia', 'Falkland Islands', 'Faroe Islands', 'Fiji', 'Finland[k]', 'France[b]', 'French Guiana', 'French Polynesia', 'Gabon', 'Gambia', 'Georgia[o]', 'Germany', 'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe[r]', 'Guam', 'Guatemala', 'Guernsey and Jersey', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', None      , 'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Isle of Man', 'Israel', 'Italy', 'Ivory Coast', 'Jamaica', 'Japan', None    , 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Macau', 'Madagascar', 'Malawi', 'Malaysia[f]', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Martinique', 'Mauritania', 'Mauritius[p]', 'Mayotte', 'Mexico', 'Moldova[n]', 'Monaco', 'Mongolia', 'Montenegro', 'Montserrat', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Caledonia', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Niue', 'North Korea', 'North Macedonia', 'Northern Mariana Islands', 'Norway[l]', 'Oman', 'Pakistan', 'Palau', 'Palestine[m]'                  , 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Puerto Rico', 'Qatar', None     , 'Romania', 'Russia', 'Rwanda', 'Réunion', 'Saint Helena, Ascension and Tristan da Cunha', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Pierre and Miquelon', 'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Saudi Arabia', 'Senegal', 'Serbia[j]', 'Seychelles', 'Sierra Leone', 'Singapore', 'Sint Maarten', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Korea' , 'South Sudan', 'Spain[d]', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Syria', 'São Tomé and Príncipe', 'Taiwan[h]', 'Tajikistan', 'Tanzania[c]', 'Thailand', 'Togo', 'Tokelau', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Turks and Caicos Islands', 'Tuvalu', 'U.S. Virgin Islands', 'Uganda', 'Ukraine[e]', 'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Vatican City[t]', 'Venezuela', 'Vietnam', 'Wallis and Futuna', 'Western Sahara', 'World', 'Yemen', 'Zambia', 'Zimbabwe']
  df_population = get_population()
  df_countries = pd.DataFrame(
    np.array([countries_csse, countries_pop]).transpose(),
    columns=['csse_covid19_countries', 'population_countries'])
  
  df_countries.dropna(inplace=True)
  df_population.sort_values(by=['Country or area'], inplace=True)
  df_countries.sort_values(by=['population_countries'], inplace=True)
  
  values_pop = df_countries['population_countries'].values.tolist()
  df_pop_subset = df_population.loc[df_population['Country or area'].isin(values_pop),['Country or area', 'Population(1 July 2019)']]
  
  df = csse_covid19.df_raw
  df.sort_values(by='Country/Region', inplace=True)
  df.insert(0, 'Population_2019-7-1', np.NaN)
  for index, row in df_pop_subset.iterrows():
    country=row['Country or area']
    population=row['Population(1 July 2019)']
    idx_dst = df['Country/Region'] == country
    df.loc[idx_dst, ['Population_2019-7-1']] = population


display(Csse_covid19.Merged.df_raw.head(10))
match_population_with_csse_covid19_countries(Csse_covid19.Merged)
display(Csse_covid19.Merged.df_raw.head(10))


Unnamed: 0,Status,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20
1412,Active,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,4,4,5,7,7,7,11,16,20,21,21
488,Deaths,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
26,Confirmed,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,4,4,5,7,7,7,11,16,21,22,22
950,Recovered,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1
557,Deaths,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,2
1481,Active,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,10,11,22,32,37,41,50,54,57
95,Confirmed,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,10,12,23,33,38,42,51,55,59
1019,Recovered,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
491,Deaths,,Algeria,28.0339,1.6596,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,4,4,7
29,Confirmed,,Algeria,28.0339,1.6596,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,3,5,12,12,17,17,19,20,20,20,24,26,37,48,54,60,74


Unnamed: 0,Population_2019-7-1,Status,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20
1412,38041754.0,Active,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,4,4,5,7,7,7,11,16,20,21,21
488,38041754.0,Deaths,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
26,38041754.0,Confirmed,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,4,4,5,7,7,7,11,16,21,22,22
950,38041754.0,Recovered,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1
557,2880917.0,Deaths,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,2
1481,2880917.0,Active,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,10,11,22,32,37,41,50,54,57
95,2880917.0,Confirmed,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,10,12,23,33,38,42,51,55,59
1019,2880917.0,Recovered,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
953,43053054.0,Recovered,,Algeria,28.0339,1.6596,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,8,12,12,12,12,12
1415,43053054.0,Active,,Algeria,28.0339,1.6596,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,3,5,12,12,17,17,19,20,20,20,15,16,22,32,38,44,55


In [0]:
def print_population_and_csse_covid19_countries():
  print(sorted(Csse_covid19.Merged.df_raw.groupby(by=['Country/Region']).sum().index.tolist()))
  print(sorted(get_population()['Country or area'].values.tolist()))
print_population_and_csse_covid19_countries()

In [0]:
#2020-03-15
countries_csse = ['Afghanistan', 'Albania', 'Algeria', None            , 'Andorra', None    , None      , 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia'   , 'Austria', 'Azerbaijan'   , 'Bahrain', None     , 'Bangladesh', None      , 'Belarus', 'Belgium', None    , None   , None     , 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', None      , 'Brazil', None                    , 'Brunei', 'Bulgaria', 'Burkina Faso', None     , 'Cambodia', 'Cameroon', 'Canada', None        , None                      , 'Cayman Islands', None                      , None  , 'Chile', 'China'   , 'Colombia', None     , 'Congo (Kinshasa)', None          , 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cruise Ship', 'Cuba', 'Curacao', 'Cyprus'   , 'Czechia'       , None      , 'Denmark', None      , None      , 'Dominican Republic', None        , 'Ecuador', 'Egypt', None         , None               , None     , 'Estonia', 'Eswatini', 'Ethiopia', None             , None              , None           , None  , 'Finland'   , 'France'   , 'French Guiana', None              , 'Gabon', None    , 'Georgia'   , 'Germany', 'Ghana', None       , 'Greece', None       , None     , 'Guadeloupe'   , None  , 'Guatemala', 'Guernsey'           , 'Guinea', None           , 'Guyana', None   , 'Holy See', 'Honduras', None       , 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', None         , 'Israel', 'Italy', None         , 'Jamaica', 'Japan', 'Jersey', 'Jordan', 'Kazakhstan', 'Kenya', None      , 'Kuwait', None        , None  , 'Latvia', 'Lebanon', None     , None     , None   , 'Liechtenstein', 'Lithuania', 'Luxembourg', None   , None        , None    , 'Malaysia'   , 'Maldives', None  , 'Malta', None              , 'Martinique', 'Mauritania', None          , None     , 'Mexico', 'Moldova'   , 'Monaco', 'Mongolia', None        , None        , 'Morocco', None        , None     , 'Namibia', None   , 'Nepal', 'Netherlands', None           , 'New Zealand', None       , None   , 'Nigeria', None  , None         , 'North Macedonia', None                      , 'Norway'   , 'Oman', 'Pakistan', None   , 'occupied Palestinian territory', 'Panama', None              , 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', None         , 'Qatar', 'Reunion', 'Romania', 'Russia', 'Rwanda', None     , None                                          , None                   , 'Saint Lucia', None                       , 'Saint Vincent and the Grenadines', None   , 'San Marino', 'Saudi Arabia', 'Senegal', 'Serbia'   , 'Seychelles', None          , 'Singapore', None          , 'Slovakia', 'Slovenia', None             , None     , 'South Africa', 'Korea, South', None         , 'Spain'   , 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', None   , None                   , 'Taiwan*'  , None        , None         , 'Thailand', 'Togo', None     , None   , 'Trinidad and Tobago', 'Tunisia', 'Turkey', None          , None                      , None    , None                 , None    , 'Ukraine'   , 'United Arab Emirates', 'United Kingdom', 'US'           , 'Uruguay', None        , None     , None             , 'Venezuela', 'Vietnam', None               , None            , None   , None   , None    , None      ]
countries_pop  = ['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia[g]', 'Austria', 'Azerbaijan[i]', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Caribbean Netherlands[s]', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China[a]', 'Colombia', 'Comoros', 'Congo'           , 'Cook Islands', 'Costa Rica', None           , 'Croatia', None         , 'Cuba', 'Curaçao', 'Cyprus[q]', 'Czech Republic', 'DR Congo', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'East Timor', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'F.S. Micronesia', 'Falkland Islands', 'Faroe Islands', 'Fiji', 'Finland[k]', 'France[b]', 'French Guiana', 'French Polynesia', 'Gabon', 'Gambia', 'Georgia[o]', 'Germany', 'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe[r]', 'Guam', 'Guatemala', 'Guernsey and Jersey', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', None      , 'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Isle of Man', 'Israel', 'Italy', 'Ivory Coast', 'Jamaica', 'Japan', None    , 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Macau', 'Madagascar', 'Malawi', 'Malaysia[f]', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Martinique', 'Mauritania', 'Mauritius[p]', 'Mayotte', 'Mexico', 'Moldova[n]', 'Monaco', 'Mongolia', 'Montenegro', 'Montserrat', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Caledonia', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Niue', 'North Korea', 'North Macedonia', 'Northern Mariana Islands', 'Norway[l]', 'Oman', 'Pakistan', 'Palau', 'Palestine[m]'                  , 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Puerto Rico', 'Qatar', None     , 'Romania', 'Russia', 'Rwanda', 'Réunion', 'Saint Helena, Ascension and Tristan da Cunha', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Pierre and Miquelon', 'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Saudi Arabia', 'Senegal', 'Serbia[j]', 'Seychelles', 'Sierra Leone', 'Singapore', 'Sint Maarten', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Korea' , 'South Sudan', 'Spain[d]', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Syria', 'São Tomé and Príncipe', 'Taiwan[h]', 'Tajikistan', 'Tanzania[c]', 'Thailand', 'Togo', 'Tokelau', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Turks and Caicos Islands', 'Tuvalu', 'U.S. Virgin Islands', 'Uganda', 'Ukraine[e]', 'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Vatican City[t]', 'Venezuela', 'Vietnam', 'Wallis and Futuna', 'Western Sahara', 'World', 'Yemen', 'Zambia', 'Zimbabwe']

In [0]:
Csse_covid19.Confirmed.alt_plot(['France','Germany', 'Austria'], 
                                sort_legend=True,
                                log_scale=10)
Csse_covid19.Confirmed.alt_plot(None, 
                                sort_legend=True,
                                log_scale=10)

In [0]:
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import Checkbox, HBox, VBox
from IPython.display import clear_output

countries = []
c_dict = {}
def changed(b):
  if b.name == 'value':
    c_dict[b.owner.description] = b.owner.value
    #  Csse_covid19.Confirmed.alt_plot([k for k,v in c_dict.items() if v])


#top x count
top_number = 10

countries = Csse_covid19.Active.get_top_countries(top_number)
vbox1 = VBox()
vbox2 = VBox()
boxes = []
for c in countries:
  c_dict[c] = True
  box = Checkbox(True, description=c)
  box.observe(changed)
  boxes.append(box)

half = len(boxes)//2
vbox1 = VBox(boxes[:half])
vbox2 = VBox(boxes[half:])
hbox = HBox([vbox1, vbox2])
hbox

In [0]:
countries = []
for box_list in hbox.children:
  for box in box_list.children:
    if box.value:
      countries.append(box.description)
print(countries)
sort_legend=False
log_scale=10
Csse_covid19.Confirmed.alt_plot(countries, sort_legend=sort_legend, log_scale=log_scale)
Csse_covid19.Active.alt_plot   (countries, sort_legend=sort_legend, log_scale=log_scale)
Csse_covid19.Deaths.alt_plot   (countries, sort_legend=sort_legend, log_scale=log_scale)
Csse_covid19.Recovered.alt_plot(countries, sort_legend=sort_legend, log_scale=log_scale)

List of countries by population (United Nations): <br>
https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)

In [0]:
url_wiki_population = 'https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)'

Scrapping table cells instructions:<br>
* https://pythonprogramminglanguage.com/web-scraping-with-pandas-and-beautifulsoup/
* https://stackoverflow.com/questions/52336057/web-scraping-html-table-with-certain-text-in-python

In [0]:
page = requests.get(url_wiki_population)
html = BeautifulSoup(page.content, 'html.parser')

element = html.find(text=re.compile('Country or area'))
#element = html.find(text=re.compile('Countries and areas ranked by population in 2019'))

html_population_table = element.findParent('table')
df_pop = pd.read_html(str(html_population_table))[0]
df_pop.head(10)

In [0]:
print(sorted(Csse_covid19.Active.df_raw.groupby(by=['Country/Region']).sum().index.tolist()))
print(sorted(df_pop['Country or area'].values.tolist()))

In [0]:
#2020-03-15
countries_csse = ['Afghanistan', 'Albania', 'Algeria', None            , 'Andorra', None    , None      , 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia'   , 'Austria', 'Azerbaijan'   , 'Bahrain', None     , 'Bangladesh', None      , 'Belarus', 'Belgium', None    , None   , None     , 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', None      , 'Brazil', None                    , 'Brunei', 'Bulgaria', 'Burkina Faso', None     , 'Cambodia', 'Cameroon', 'Canada', None        , None                      , 'Cayman Islands', None                      , None  , 'Chile', 'China'   , 'Colombia', None     , 'Congo (Kinshasa)', None          , 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cruise Ship', 'Cuba', 'Curacao', 'Cyprus'   , 'Czechia'       , None      , 'Denmark', None      , None      , 'Dominican Republic', None        , 'Ecuador', 'Egypt', None         , None               , None     , 'Estonia', 'Eswatini', 'Ethiopia', None             , None              , None           , None  , 'Finland'   , 'France'   , 'French Guiana', None              , 'Gabon', None    , 'Georgia'   , 'Germany', 'Ghana', None       , 'Greece', None       , None     , 'Guadeloupe'   , None  , 'Guatemala', 'Guernsey'           , 'Guinea', None           , 'Guyana', None   , 'Holy See', 'Honduras', None       , 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', None         , 'Israel', 'Italy', None         , 'Jamaica', 'Japan', 'Jersey', 'Jordan', 'Kazakhstan', 'Kenya', None      , 'Kuwait', None        , None  , 'Latvia', 'Lebanon', None     , None     , None   , 'Liechtenstein', 'Lithuania', 'Luxembourg', None   , None        , None    , 'Malaysia'   , 'Maldives', None  , 'Malta', None              , 'Martinique', 'Mauritania', None          , None     , 'Mexico', 'Moldova'   , 'Monaco', 'Mongolia', None        , None        , 'Morocco', None        , None     , 'Namibia', None   , 'Nepal', 'Netherlands', None           , 'New Zealand', None       , None   , 'Nigeria', None  , None         , 'North Macedonia', None                      , 'Norway'   , 'Oman', 'Pakistan', None   , 'occupied Palestinian territory', 'Panama', None              , 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', None         , 'Qatar', 'Reunion', 'Romania', 'Russia', 'Rwanda', None     , None                                          , None                   , 'Saint Lucia', None                       , 'Saint Vincent and the Grenadines', None   , 'San Marino', 'Saudi Arabia', 'Senegal', 'Serbia'   , 'Seychelles', None          , 'Singapore', None          , 'Slovakia', 'Slovenia', None             , None     , 'South Africa', 'Korea, South', None         , 'Spain'   , 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', None   , None                   , 'Taiwan*'  , None        , None         , 'Thailand', 'Togo', None     , None   , 'Trinidad and Tobago', 'Tunisia', 'Turkey', None          , None                      , None    , None                 , None    , 'Ukraine'   , 'United Arab Emirates', 'United Kingdom', 'US'           , 'Uruguay', None        , None     , None             , 'Venezuela', 'Vietnam', None               , None            , None   , None   , None    , None      ]
countries_pop  = ['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia[g]', 'Austria', 'Azerbaijan[i]', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Caribbean Netherlands[s]', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China[a]', 'Colombia', 'Comoros', 'Congo'           , 'Cook Islands', 'Costa Rica', None           , 'Croatia', None         , 'Cuba', 'Curaçao', 'Cyprus[q]', 'Czech Republic', 'DR Congo', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'East Timor', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'F.S. Micronesia', 'Falkland Islands', 'Faroe Islands', 'Fiji', 'Finland[k]', 'France[b]', 'French Guiana', 'French Polynesia', 'Gabon', 'Gambia', 'Georgia[o]', 'Germany', 'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe[r]', 'Guam', 'Guatemala', 'Guernsey and Jersey', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', None      , 'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Isle of Man', 'Israel', 'Italy', 'Ivory Coast', 'Jamaica', 'Japan', None    , 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Macau', 'Madagascar', 'Malawi', 'Malaysia[f]', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Martinique', 'Mauritania', 'Mauritius[p]', 'Mayotte', 'Mexico', 'Moldova[n]', 'Monaco', 'Mongolia', 'Montenegro', 'Montserrat', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Caledonia', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Niue', 'North Korea', 'North Macedonia', 'Northern Mariana Islands', 'Norway[l]', 'Oman', 'Pakistan', 'Palau', 'Palestine[m]'                  , 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Puerto Rico', 'Qatar', None     , 'Romania', 'Russia', 'Rwanda', 'Réunion', 'Saint Helena, Ascension and Tristan da Cunha', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Pierre and Miquelon', 'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Saudi Arabia', 'Senegal', 'Serbia[j]', 'Seychelles', 'Sierra Leone', 'Singapore', 'Sint Maarten', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Korea' , 'South Sudan', 'Spain[d]', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Syria', 'São Tomé and Príncipe', 'Taiwan[h]', 'Tajikistan', 'Tanzania[c]', 'Thailand', 'Togo', 'Tokelau', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Turks and Caicos Islands', 'Tuvalu', 'U.S. Virgin Islands', 'Uganda', 'Ukraine[e]', 'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Vatican City[t]', 'Venezuela', 'Vietnam', 'Wallis and Futuna', 'Western Sahara', 'World', 'Yemen', 'Zambia', 'Zimbabwe']

In [0]:
df_countries = pd.DataFrame(
    np.array([countries_csse, countries_pop]).transpose(),
    columns=['csse_covid19', 'population'])
df_countries = df_countries.dropna()
df_countries.info()

In [0]:
df = Csse_covid19.Merged.df_raw.copy()
df.head()

In [0]:
df = Csse_covid19.Merged.df_raw.copy()
df_countries = df_countries.dropna()
values_pop = df_countries['population'].values
values_csse = df_countries['csse_covid19'].values
print(df_countries['csse_covid19'].index.tolist())
#df_pop_subset = df_pop.loc[df_pop['Country or area'].isin(values_pop),['Country or area', 'Population(1 July 2019)']]
subset_idx = df_pop['Country or area'].isin(values_pop)
df_pop_subset = df_pop.loc[subset_idx, ['Country or area', 'Population(1 July 2019)']]

df_pop_csse.loc[df_countries['csse_covid19'].index.tolist(), ['Country/Region']] = df_countries['csse_covid19'].values.tolist()
df.insert(0, 'Population(1 July 2019)', np.NaN)
df.loc[df['Country/Region'].isin(values_csse), 'Population(1 July 2019)'] = series_pop 
df.head()

In [0]:
series_pop

In [0]:
idx = df['Country/Region']=='Germany'
df.loc[idx]