In [1]:
import numpy as np 
import pandas as pd 
import geopandas as gpd 

import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px 

In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [3]:
# Read data from live data sources 
url = 'https://www.worldometers.info/coronavirus/'

In [4]:
# Red html 
data = pd.read_html(url)

In [5]:
# Loop over data cases 
for data_cases in data:
    print(data_cases)

    Country,Other  TotalCases  NewCases  TotalDeaths  NewDeaths  \
0           China       80778      24.0       3158.0       22.0   
1           Italy       10149       NaN        631.0        NaN   
2            Iran        8042       NaN        291.0        NaN   
3        S. Korea        7755     242.0         61.0        1.0   
4          France        1784       NaN         33.0        NaN   
..            ...         ...       ...          ...        ...   
115      Mongolia           1       NaN          NaN        NaN   
116     St. Barth           1       NaN          NaN        NaN   
117          Togo           1       NaN          NaN        NaN   
118        Turkey           1       NaN          NaN        NaN   
119        Total:      119217     294.0       4299.0       25.0   

     TotalRecovered  ActiveCases  Serious,Critical  Tot Cases/1M pop  
0           61486.0        16134            4492.0              56.1  
1            1004.0         8514             877.0   

In [6]:
# Check data type 
type(data_cases)

pandas.core.frame.DataFrame

In [7]:
# Take a look at dataset
data_cases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 9 columns):
Country,Other       120 non-null object
TotalCases          120 non-null int64
NewCases            7 non-null float64
TotalDeaths         27 non-null float64
NewDeaths           5 non-null float64
TotalRecovered      52 non-null float64
ActiveCases         120 non-null int64
Serious,Critical    38 non-null float64
Tot Cases/1M pop    92 non-null float64
dtypes: float64(6), int64(2), object(1)
memory usage: 8.6+ KB


In [8]:
# Numerical Summary
data_cases.describe()

Unnamed: 0,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop
count,120.0,7.0,27.0,5.0,52.0,120.0,38.0,92.0
mean,1986.95,84.0,318.444444,10.0,2560.038462,805.95,302.473684,12.493478
std,13126.200637,126.821397,1003.965726,12.369317,12429.250284,4748.62775,1168.522577,28.081501
min,1.0,2.0,1.0,1.0,1.0,0.0,1.0,0.1
25%,3.0,5.0,1.0,1.0,1.0,3.0,1.0,0.475
50%,14.0,16.0,3.0,1.0,6.5,13.0,2.0,1.95
75%,69.5,133.0,32.0,22.0,25.25,60.5,9.75,8.35
max,119217.0,294.0,4299.0,25.0,66561.0,48357.0,5747.0,167.9


In [9]:
# Check missing data 
data_cases.isnull().sum()

Country,Other         0
TotalCases            0
NewCases            113
TotalDeaths          93
NewDeaths           115
TotalRecovered       68
ActiveCases           0
Serious,Critical     82
Tot Cases/1M pop     28
dtype: int64

In [10]:
# Renaming
data_cases['Country,Other'].replace({'Country, Other': 'Country'}, inplace=True)

In [11]:
# Filling missing values 
data_cases.loc[:, 'TotalCases'].fillna(value=0, inplace=True)
data_cases.loc[:, 'NewCases'].fillna(value=0, inplace=True)
data_cases.loc[:, 'TotalDeaths'].fillna(value=0, inplace=True) 
data_cases.loc[:, 'NewDeaths'].fillna(value=0, inplace=True)
data_cases.loc[:, 'TotalRecovered'].fillna(value=0, inplace=True)
data_cases.loc[:, 'ActiveCases'].fillna(value=0, inplace=True)
data_cases.loc[:, 'Serious,Critical'].fillna(value=0, inplace=True)

In [12]:
# Type conversion
data_cases.astype({'TotalCases': 'int64', 'NewCases': 'int64', 'TotalDeaths': 'int64', 'NewDeaths': 'int64',
                   'TotalRecovered': 'int64', 'ActiveCases': 'int64', 'Serious,Critical': 'int64'
                  })

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop
0,China,80778,24,3158,22,61486,16134,4492,56.1
1,Italy,10149,0,631,0,1004,8514,877,167.9
2,Iran,8042,0,291,0,2731,5020,0,95.7
3,S. Korea,7755,242,61,1,288,7406,54,151.3
4,France,1784,0,33,0,12,1739,86,27.3
...,...,...,...,...,...,...,...,...,...
115,Mongolia,1,0,0,0,0,1,0,0.3
116,St. Barth,1,0,0,0,0,1,0,
117,Togo,1,0,0,0,0,1,0,0.1
118,Turkey,1,0,0,0,0,1,0,


In [13]:
data_cases.groupby('TotalCases').sum()

Unnamed: 0_level_0,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop
TotalCases,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0.0,0.0,0.0,3.0,14,0.0,1.1
2,0.0,0.0,0.0,0.0,16,0.0,2.1
3,0.0,1.0,0.0,1.0,19,1.0,2.2
4,0.0,0.0,0.0,1.0,3,0.0,0.2
5,3.0,0.0,0.0,0.0,30,1.0,2.8
...,...,...,...,...,...,...,...
7755,242.0,61.0,1.0,288.0,7406,54.0,151.3
8042,0.0,291.0,0.0,2731.0,5020,0.0,95.7
10149,0.0,631.0,0.0,1004.0,8514,877.0,167.9
80778,24.0,3158.0,22.0,61486.0,16134,4492.0,56.1


In [14]:
data

[    Country,Other  TotalCases  NewCases  TotalDeaths  NewDeaths  \
 0           China       80757      22.0       3136.0       17.0   
 1           Italy        9172       0.0        463.0        0.0   
 2        S. Korea        7513      35.0         54.0        1.0   
 3            Iran        7161       0.0        237.0        0.0   
 4          France        1412       0.0         30.0        0.0   
 ..            ...         ...       ...          ...        ...   
 111        Panama           1       1.0          0.0        0.0   
 112      Paraguay           1       0.0          0.0        0.0   
 113     St. Barth           1       0.0          0.0        0.0   
 114          Togo           1       0.0          0.0        0.0   
 115        Total:      114458      96.0       4027.0       19.0   
 
      TotalRecovered  ActiveCases  Serious,Critical  Tot Cases/1M pop  
 0           60095.0        17526            4794.0              56.1  
 1             724.0         7985     

In [22]:
data_cases['TotalRecovered'].sum()

128544.0

In [15]:
data_cases['TotalCases'].sum()

238434