In [85]:
%matplotlib inline
from IPython.display import display, HTML
import numpy as np
import pandas as pd
import json
import random
from urllib.request import urlopen

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
import calmap
import folium

In [86]:
full_table = pd.read_csv('covid_19_clean_complete.csv', parse_dates=['Date'])
full_table.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Thailand,15.0,101.0,2020-01-22,2.0,0.0,0.0
1,,Japan,36.0,138.0,2020-01-22,2.0,0.0,0.0
2,,Singapore,1.2833,103.8333,2020-01-22,0.0,0.0,0.0
3,,Nepal,28.1667,84.25,2020-01-22,0.0,0.0,0.0
4,,Malaysia,2.5,112.5,2020-01-22,0.0,0.0,0.0


##### Creating new feature "Active_case"
Active_case = Confirmed - Deaths - Recovered

In [87]:
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths'] - full_table['Recovered']

In [88]:
full_table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19220 entries, 0 to 19219
Data columns (total 9 columns):
Province/State    8432 non-null object
Country/Region    19220 non-null object
Lat               19220 non-null float64
Long              19220 non-null float64
Date              19220 non-null datetime64[ns]
Confirmed         19219 non-null float64
Deaths            19219 non-null float64
Recovered         19219 non-null float64
Active            19219 non-null float64
dtypes: datetime64[ns](1), float64(6), object(2)
memory usage: 1.3+ MB


In [89]:
full_table.isna().sum()

Province/State    10788
Country/Region        0
Lat                   0
Long                  0
Date                  0
Confirmed             1
Deaths                1
Recovered             1
Active                1
dtype: int64

In [90]:
full_table['Country/Region'].unique()

array(['Thailand', 'Japan', 'Singapore', 'Nepal', 'Malaysia', 'Canada',
       'Australia', 'Cambodia', 'Sri Lanka', 'Germany', 'Finland',
       'United Arab Emirates', 'Philippines', 'India', 'Italy', 'Sweden',
       'Spain', 'Belgium', 'Egypt', 'Lebanon', 'Iraq', 'Oman',
       'Afghanistan', 'Bahrain', 'Kuwait', 'Algeria', 'Croatia',
       'Switzerland', 'Austria', 'Israel', 'Pakistan', 'Brazil',
       'Georgia', 'Greece', 'North Macedonia', 'Norway', 'Romania',
       'Estonia', 'San Marino', 'Belarus', 'Iceland', 'Lithuania',
       'Mexico', 'New Zealand', 'Nigeria', 'Ireland', 'Luxembourg',
       'Monaco', 'Qatar', 'Ecuador', 'Azerbaijan', 'Armenia',
       'Dominican Republic', 'Indonesia', 'Portugal', 'Andorra', 'Latvia',
       'Morocco', 'Saudi Arabia', 'Senegal', 'Argentina', 'Chile',
       'Jordan', 'Ukraine', 'Hungary', 'Liechtenstein', 'Poland',
       'Tunisia', 'Bosnia and Herzegovina', 'Slovenia', 'South Africa',
       'Bhutan', 'Cameroon', 'Colombia', 'Costa R

In [91]:
full_table['Country/Region'] = full_table['Country/Region'].replace('Mainland China', 'China')

In [92]:
full_table[full_table['Country/Region'] == 'China']

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
154,Hubei,China,30.9756,112.2707,2020-01-22,444.0,17.0,28.0,399.0
158,Guangdong,China,23.3417,113.4244,2020-01-22,26.0,0.0,0.0,26.0
159,Henan,China,33.8820,113.6140,2020-01-22,5.0,0.0,0.0,5.0
160,Zhejiang,China,29.1832,120.0934,2020-01-22,10.0,0.0,0.0,10.0
161,Hunan,China,27.6104,111.7088,2020-01-22,4.0,0.0,0.0,4.0
...,...,...,...,...,...,...,...,...,...
19098,Inner Mongolia,China,44.0935,113.9448,2020-03-23,75.0,1.0,74.0,0.0
19099,Ningxia,China,37.2692,106.1655,2020-03-23,75.0,0.0,75.0,0.0
19103,Qinghai,China,35.7452,95.9956,2020-03-23,18.0,0.0,18.0,0.0
19104,Macau,China,22.1667,113.5500,2020-03-23,24.0,0.0,10.0,14.0


In [93]:
full_table.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
0,,Thailand,15.0,101.0,2020-01-22,2.0,0.0,0.0,2.0
1,,Japan,36.0,138.0,2020-01-22,2.0,0.0,0.0,2.0
2,,Singapore,1.2833,103.8333,2020-01-22,0.0,0.0,0.0,0.0
3,,Nepal,28.1667,84.25,2020-01-22,0.0,0.0,0.0,0.0
4,,Malaysia,2.5,112.5,2020-01-22,0.0,0.0,0.0,0.0


In [94]:
full_table[['Province/State']] = full_table[['Province/State']].fillna('')

In [95]:
# India, china and the rest
India = full_table[full_table['Country/Region']=='India']
China = full_table[full_table['Country/Region']=='China']
Rest = full_table[full_table['Country/Region']!=('China', 'India')]

## Latest Data

In [96]:
temp = full_table.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
temp = temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1')

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active
0,2020-03-23 00:00:00,336004,14643,98334,223027


In [97]:
# color pallette
cnf = '#393e46' # confirmed - grey
dth = '#ff2e63' # death - red
rec = '#21bf73' # recovered - cyan
act = '#fe9801' # active case - yellow

In [98]:
tm = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths', 'Recovered'])
fig = px.treemap(tm, path=["variable"], values="value", height=400, width=600,
                 color_discrete_sequence=[rec, act, dth])
fig.show()

## Country Wise Data

In [99]:
full_latest = full_table[full_table['Date'] == max(full_table['Date'])].reset_index()
full_latest_grouped = full_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
#full_latest_grouped

In [100]:
temp1 = full_latest_grouped.sort_values(by='Confirmed', ascending=False).reset_index(drop=True)
temp1.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
0,China,81439,3274,72814,5351
1,Italy,59138,5476,7024,46638
2,US,33276,417,178,32681
3,Spain,28768,1772,2575,24421
4,Germany,24873,94,266,24513
5,Iran,21638,1685,7931,12022
6,France,16044,674,2200,13170
7,South Korea,8897,104,2909,5884
8,Switzerland,7245,98,131,7016
9,United Kingdom,5741,282,67,5392


### Maps

In [101]:
fig = px.choropleth(full_latest_grouped, locations=full_latest_grouped['Country/Region'],
                    color=full_latest_grouped['Confirmed'],locationmode='country names', 
                    hover_name=full_latest_grouped['Country/Region'], 
                    color_continuous_scale=px.colors.sequential.deep)
fig.update_layout(
    title='Confirmed Cases In The World',
)
fig.show();

In [None]:
formated_gdf = full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['Confirmed'].pow(0.3)

fig = px.scatter_geo(formated_gdf, locations="Country/Region", locationmode='country names', 
                     color="Confirmed", size='size', hover_name="Country/Region", 
                     range_color= [0, max(formated_gdf['Confirmed'])+2], 
                     projection="natural earth", animation_frame="Date", 
                     title='Spread over time')
fig.update(layout_coloraxis_showscale=False)
fig.show();

### Countries With Death Reports

In [54]:
temp_flg = temp1[temp1['Deaths']>0][['Country/Region', 'Deaths']]
temp_flg.sort_values('Deaths', ascending=False).reset_index(drop=True).style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Deaths
0,Italy,5476
1,China,3274
2,Spain,1772
3,Iran,1685
4,France,674
5,US,417
6,United Kingdom,282
7,Netherlands,180
8,South Korea,104
9,Switzerland,98


### Countries with no cases recovered

In [55]:
temp = temp1[temp1['Recovered']==0][['Country/Region', 'Confirmed', 'Deaths', 'Recovered']]
temp.reset_index(drop=True).style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered
0,Turkey,1236,30,0
1,Slovenia,414,2,0
2,South Africa,274,0,0
3,Panama,245,3,0
4,Dominican Republic,202,3,0
5,Uruguay,135,0,0
6,New Zealand,66,0,0
7,Kazakhstan,60,0,0
8,Guadeloupe,56,0,0
9,Reunion,47,0,0


### Top 10 Countries

In [56]:
fig = go.Figure(data=[go.Bar(
            x=temp1['Country/Region'][0:10], y=temp1['Confirmed'][0:10],
            text=temp1['Confirmed'][0:10],
            textposition='auto',
            marker_color='yellow',

        )])
fig.update_layout(
    title='Top 10 infected Cases',
    xaxis_title="Countries",
    yaxis_title="Confirmed Cases",
)
fig.show()

In [57]:
temp2 = full_latest_grouped.sort_values(by='Deaths', ascending=False).reset_index(drop=True)
fig = go.Figure(data=[go.Bar(
            x=temp2['Country/Region'][0:10], y=temp2['Deaths'][0:10],
            text=temp2['Deaths'][0:10],
            textposition='auto',
            marker_color='red',

        )])
fig.update_layout(
    title='Top 10 Death Cases',
    xaxis_title="Countries",
    yaxis_title="Deaths",
)
fig.show()

### Recovered Cases

In [58]:
Recovered_per_country = full_latest.groupby(["Country/Region"])["Recovered"].sum().reset_index().sort_values("Recovered",ascending=False).reset_index(drop=True)
fig = px.pie(Recovered_per_country, values=Recovered_per_country['Recovered'], names=Recovered_per_country['Country/Region'],
             title='Recovered cases',
            )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [59]:
fig = go.Figure(data=[go.Bar(
            x=Recovered_per_country['Country/Region'][0:10], y=Recovered_per_country['Recovered'][0:10],
            text=Recovered_per_country['Recovered'][0:10],
            textposition='auto',
            marker_color='green',

        )])
fig.update_layout(
    title='Top 10 Recovered Countries',
    xaxis_title="Countries",
    yaxis_title="Recovered Cases",
)
fig.show()

### China province wise data

In [60]:
china_latest = full_latest[full_latest['Country/Region']=='China']
china_latest_grouped = china_latest.groupby('Province/State')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

In [61]:
temp2 = china_latest_grouped[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]
temp2 = temp2.sort_values(by='Confirmed', ascending=False)
temp2 = temp2.reset_index(drop=True)
temp2.style.background_gradient(cmap='Pastel1_r')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
0,Hubei,67800,3153,59879
1,Guangdong,1413,8,1332
2,Henan,1274,22,1250
3,Zhejiang,1238,1,1221
4,Hunan,1018,4,1014
5,Anhui,990,6,984
6,Jiangxi,936,1,934
7,Shandong,767,7,751
8,Jiangsu,633,0,631
9,Chongqing,577,6,570


### Provinces with no cases recovered

In [62]:
temp = china_latest_grouped[china_latest_grouped['Recovered']==0]
temp = temp[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1_r')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered


### Provinces with all the cases recovered

In [63]:
temp = china_latest_grouped[china_latest_grouped['Confirmed']==
                          china_latest_grouped['Recovered']]
temp = temp[['Province/State', 'Confirmed','Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
0,Shanxi,133,0,133
1,Ningxia,75,0,75
2,Qinghai,18,0,18
3,Tibet,1,0,1


### Provinces with no affected cases anymore

In [64]:
temp = china_latest_grouped[china_latest_grouped['Confirmed']==
                          china_latest_grouped['Deaths']+
                          china_latest_grouped['Recovered']]
temp = temp[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
0,Hunan,1018,4,1014
1,Anhui,990,6,984
2,Guizhou,146,2,144
3,Shanxi,133,0,133
4,Jilin,93,1,92
5,Xinjiang,76,3,73
6,Inner Mongolia,75,1,74
7,Ningxia,75,0,75
8,Qinghai,18,0,18
9,Tibet,1,0,1


In [65]:
gdf = full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
gdf = gdf.reset_index()

temp = gdf[gdf['Country/Region']=='China'].reset_index()
temp = temp.melt(id_vars='Date', value_vars=['Confirmed', 'Deaths', 'Recovered'],
                var_name='Case', value_name='Count')
fig = px.bar(temp, x="Date", y="Count", color='Case', facet_col="Case",
            title='China', color_discrete_sequence=[cnf, dth, rec])
fig.show()

temp = gdf[gdf['Country/Region']!='China'].groupby('Date').sum().reset_index()
temp = temp.melt(id_vars='Date', value_vars=['Confirmed', 'Deaths', 'Recovered'],
                var_name='Case', value_name='Count')
fig = px.bar(temp, x="Date", y="Count", color='Case', facet_col="Case",
             title='Rest', color_discrete_sequence=[cnf, dth, rec])
fig.show()

### Hubei - China - World

In [66]:
def location(row):
    if row['Country/Region']=='China':
        if row['Province/State']=='Hubei':
            return 'Hubei'
        else:
            return 'Other Chinese Provinces'
    else:
        return 'Rest of the World'

temp = full_table.copy()
temp['Region'] = temp.apply(location, axis=1)
temp['Date'] = temp['Date'].dt.strftime('%Y-%m-%d')
temp = temp.groupby(['Region', 'Date'])['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()
temp = temp.melt(id_vars=['Region', 'Date'], value_vars=['Confirmed', 'Deaths', 'Recovered'], 
                 var_name='Case', value_name='Count').sort_values('Count')
# temp = temp.sort_values(['Date', 'Region', 'Case']).reset_index()
temp.head()

fig = px.bar(temp, y='Region', x='Count', color='Case', barmode='group', orientation='h',
             text='Count', title='Hubei - China - World', animation_frame='Date',
             color_discrete_sequence= [dth, rec, cnf], range_x=[0, 70000])
fig.update_traces(textposition='outside')
# fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
# fig.update_layout(yaxis={'categoryorder':'array', 
#                          'categoryarray':['Hubei','Other Chinese Provinces','Rest of the World']})
fig.show()

temp = full_latest.copy()
temp['Region'] = temp.apply(location, axis=1)
temp = temp.groupby('Region')['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()
temp = temp.melt(id_vars='Region', value_vars=['Confirmed', 'Deaths', 'Recovered'], 
                 var_name='Case', value_name='Count').sort_values('Count')
temp.head()

fig = px.bar(temp, y='Region', x='Count', color='Case', barmode='group', orientation='h',
             text='Count', title='Hubei - China - World', 
             color_discrete_sequence= [dth, rec, cnf])
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()

KeyboardInterrupt: 

## Country Wise

In [None]:
# converter
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()   

# hide warnings
import warnings
warnings.filterwarnings('ignore')

temp = full_table.groupby(['Date', 'Country/Region'])['Confirmed'].sum()
temp = temp.reset_index().sort_values(by=['Date', 'Country/Region'])
# temp = temp[temp['Confirmed']>100]

plt.style.use('seaborn')
g = sns.FacetGrid(temp, col="Country/Region", hue="Country/Region", 
                  sharey=False, col_wrap=5)
g = g.map(plt.plot, "Date", "Confirmed")
g.set_xticklabels(rotation=90)
plt.show()

In [None]:
temp = full_table.groupby(['Date', 'Country/Region'])['Confirmed'].sum().reset_index().sort_values('Confirmed', ascending=False)

fig = px.line(temp, x="Date", y="Confirmed", color='Country/Region', title='Cases Spread', height=600)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

In [None]:
temp = full_table.groupby(['Date', 'Country/Region'])['Deaths'].sum().reset_index().sort_values('Deaths', ascending=False)

fig = px.line(temp, x="Date", y="Deaths", color='Country/Region', title='Deaths', height=600)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

In [None]:
fig = px.scatter(full_latest_grouped.sort_values('Deaths', ascending=False).iloc[:15, :], 
                 x='Confirmed', y='Deaths', color='Country/Region', size='Confirmed', height=600,
                 text='Country/Region', log_x=True, log_y=True, title='Deaths vs Confirmed')
fig.update_traces(textposition='top center')
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

In [None]:
temp = full_table.groupby('Date').sum().reset_index()

temp = temp.melt(id_vars='Date', value_vars=['Active', 'Recovered'], 
                 var_name='Case', value_name='Value')

fig = px.line(temp, x="Date", y="Value", color='Case',  
              title='Active & Recovered cases', color_discrete_sequence=[dth, rec])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()