# Covid-19 dashboard
---
__Goal,__ create a dynamic dashboard from tableau example

In [150]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

In [151]:
pwd

'/Users/axel/Agensit/covid19'

# 1. Data Cleanning and Preparation
---

In [152]:
df = pd.read_csv('collect_data/df_covid19.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55594 entries, 0 to 55593
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Lat        55594 non-null  float64
 1   Long       55594 non-null  float64
 2   Date       55594 non-null  object 
 3   Confirmed  55594 non-null  int64  
 4   Death      55594 non-null  int64  
 5   State      55594 non-null  object 
dtypes: float64(2), int64(2), object(2)
memory usage: 2.5+ MB


# 2. Plotly Express
---
###  2.1 Gloabal map
In that purpose we will use mapbox API (scatter mapbox), for that we need to retreive the mapbox token from `mapbox_token.txt`.

⚠️ I also notice something weird, the plotly mapbox API use string Data for the animation instead of datetime

In [153]:
# get acess to mapbox
with open('mapbox_token.txt') as f:
    lines=[x.rstrip() for x in f]
    
mapbox_access_token = lines[0]
px.set_mapbox_access_token(mapbox_access_token)

In [154]:
last_date = df['Date'].iloc[-1]
df1 = df[df['Date'] == last_date]
df1 = df[df['Date'] == '2020-02-18']
df1['color']= df1['Confirmed'].map(lambda x: np.log2(x+1e-6)) # add 1e-6 to avoid log(0)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [155]:
# let's create the tooltip
tooltip = {key:False for key,_ in df1.items()}
tooltip['Death']=True
tooltip['Confirmed']=True
tooltip

{'Lat': False,
 'Long': False,
 'Date': False,
 'Confirmed': True,
 'Death': True,
 'State': False,
 'color': False}

We add a `color` feature which is just the log value of confirmed cases! The goal here is to decrease the huge difference of color between the US and the other country

In [156]:
map_plot = px.scatter_mapbox(df1, lat='Lat', lon='Long', 
                        hover_name='State', hover_data=tooltip,
                        zoom=0.6, mapbox_style='basic',
                        size='Confirmed', size_max=20,
                        width=800, height=600,
                        color='color', color_continuous_scale=['Gold', 'DarkOrange', 'Crimson'])


# map_plot.update(layout_coloraxis_showscale=False)
map_plot.update_layout(hoverlabel=dict(bgcolor="white",font_size=16))
map_plot.show()

In [157]:
animation = px.scatter_mapbox(df, lat='Lat', lon='Long', 
                        animation_frame='Date', animation_group='Confirmed',
                        hover_name='State',
                        zoom=0.6, mapbox_style='dark',
                        size='Confirmed', size_max=35,
                        width=800, height=600,
                        color_continuous_scale=['Gold', 'DarkOrange', 'Crimson'])


animation.update(layout_coloraxis_showscale=False)
animation.update_layout(hoverlabel=dict(bgcolor="white",font_size=12))
animation.show()

### 2.2  Cases over time
---

In [158]:
global_increase = df.groupby('Date').sum()
global_increase.reset_index(inplace=True)

In [159]:
tooltip = {column:False for column in global_increase.columns}
tooltip['Confirmed'] = True

In [160]:
total_case = px.line(global_increase, y='Confirmed',x='Date', title='Nombre de cas en fonction du temps',
        hover_data=tooltip)

total_case.update_yaxes(title=None)
total_case.update_xaxes(title=None, showgrid=False)
total_case.update_layout(hovermode="x unified")

### 2.3 New Cases Over time

In [161]:
diff = df.copy()
diff['Nouveau cas'] = diff['Confirmed'] - diff['Confirmed'].shift(1)
diff['Nouveau mort'] = diff['Death'] - diff['Death'].shift(1)
diff.dropna(inplace=True)

In [162]:
global_diff = diff.groupby('Date').sum().reset_index()
global_diff = global_diff[global_diff['Nouveau cas']>0]

# #tooltip
tooltip = {column:False for column in global_diff}
tooltip['Nouveau cas'] = True

new_cases_plot = px.bar(global_diff, x='Date',y='Nouveau cas', title='Nombre de nouveau cas en fonction du temps',
                         hover_data=tooltip)

new_cases_plot.update_yaxes(title=None)
new_cases_plot.update_xaxes(title=None)
new_cases_plot.update_layout(hovermode="x unified")

### 2.4 Top 10 Country
---

In [163]:
df2 = df.groupby(['State', 'Date']).sum().reset_index()

date = '2020-05-23'
df2 = df2[df2['Date'] == date]
top10 = df2.nlargest(10,'Confirmed')
top10.sort_values('Confirmed', inplace=True)
tooltip = {column:False for column in top10.columns}

top10_plot = px.bar(top10, y='State', x='Confirmed', text='Confirmed', orientation='h',
                    title='Les 10 pays avec le plus de cas',hover_name='State', hover_data=tooltip)

top10_plot.update_layout(hoverlabel=dict(bgcolor="white",font_size=12))
top10_plot.update_traces(texttemplate='%{text:.2s}', textposition='outside')
top10_plot.update_yaxes(title=None, showgrid=False)
top10_plot.update_xaxes(title=None, showgrid=False, showticklabels=False)

# 3. Plotly go
---
An other way with a low level api to create the same plot! We will have more control but it's longer to code
### 3.1 New cases

In [197]:
## DROPDOWN
country_dropdown = ['Etats Unis', 'Inde']
# country_dropdown = None

## CREATE THE TABLE
new_type = 'Nouveau cas'
# delete negative value
# global_diff = diff.groupby(['Date', 'State']).sum().reset_index(level='Date')
daily_cases = diff.copy()
daily_cases.loc[daily_cases['Nouveau cas'] < 0,['Nouveau cas']] = 0
daily_cases.loc[daily_cases['Nouveau mort'] < 0,'Nouveau mort'] = 0

# FILTERRING BY COUNTRY
if country_dropdown:   
    new_cases_plot = go.Figure()
    for c in country_dropdown:
        df = daily_cases.loc[daily_cases['State'] == c]
        evolution = df['Nouveau cas'].map(lambda x: x/df['Nouveau cas'].sum())
        print(evolution.sum())
        new_cases_plot.add_traces(
            go.Scatter(
                x=df['Date'],
                y=evolution,
                fill='tozeroy',
                line_shape='spline',
                name=c)
        )
        
# # GLOBAL CASES
# else:
#     df = daily_cases.groupby('Date').sum()
#     new_cases_plot = go.Figure(go.Bar(
#         x=global_diff.index,
#         y=global_diff[new_type],
# #         marker_color=marker_color,
# #         hovertemplate='%{y:.3s} '+ f'nouveau {type_value}<extra></extra>'
#                               ))
    
new_cases_plot.update_yaxes(showline=False, nticks=5)
new_cases_plot.update_xaxes(showline=True, nticks=5, showgrid=True)
new_cases_plot.update_layout(hovermode="x unified", showlegend=False, barmode='stack')

1.0
1.0


In [172]:
import plotly.graph_objects as go

import numpy as np

x0 = np.random.randn(500)
# Add 1 to shift the mean of the Gaussian distribution
x1 = np.random.randn(500) + 1

fig = go.Figure()
fig.add_trace(go.Histogram(x=x0))
fig.add_trace(go.Histogram(x=x1))

# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

In [88]:
if country_dropdown:
    global_diff = diff.groupby(['Date','State']).sum().reset_index()
    global_diff = global_diff[global_diff['Nouveau cas']>0]
    new_cases_plot = go.Figure([go.Bar(x=global_diff[global_diff['State'] == country]['Date'], 
                    y=global_diff[global_diff['State'] == country]['Nouveau cas'], name=country)
                     for country in country_dropdown])
else:
    global_diff = diff.groupby('Date').sum().reset_index()
    global_diff = global_diff[global_diff['Nouveau cas']>0]
    new_cases_plot = go.Figure(go.Bar(x=global_diff['Date'], y=global_diff['Nouveau cas']))

new_cases_plot.update_layout(barmode='stack')
new_cases_plot.update_yaxes(title=None)
new_cases_plot.update_xaxes(title=None)
new_cases_plot.update_layout(hovermode="x unified")
new_cases_plot.show()

In [90]:
global_diff

Unnamed: 0,Date,State,Lat,Long,Confirmed,Death,Nouveau cas,Nouveau mort
273,2020-01-23,Anhui,31.825700,117.226400,9,0,8.0,0.0
285,2020-01-23,Beijing,40.182400,116.414200,22,0,8.0,0.0
310,2020-01-23,Chongqing,30.057200,107.874000,9,0,3.0,0.0
344,2020-01-23,Fujian,26.078900,117.987400,5,0,4.0,0.0
347,2020-01-23,Gansu,35.751800,104.286100,2,0,2.0,0.0
...,...,...,...,...,...,...,...,...
55374,2020-08-17,West Bank and Gaza,31.952200,35.233200,16844,113,310.0,3.0
55375,2020-08-17,Western Australia,-31.950500,115.860500,647,9,1.0,0.0
55378,2020-08-17,Yemen,15.552727,48.516388,1882,535,13.0,5.0
55381,2020-08-17,Zambie,-13.133897,27.849332,9839,264,496.0,4.0


### 3.2 Case Over time

In [77]:
if country_dropdown:
    global_increase = df.groupby(['Date','State']).sum().reset_index()
    total_case = go.Figure([go.Scatter(x=global_increase[global_increase['State'] == country]['Date'],
                        y=global_increase[global_increase['State'] == country]['Confirmed'],name=country)
                for country in country_dropdown])
else:
    global_increase = df.groupby('Date').sum().reset_index()
    total_case = go.Figure(go.Scatter(x=global_increase['Date'], y=global_increase['Confirmed'], fill='tozeroy'))

total_case.update_yaxes(title=None)
total_case.update_xaxes(title=None)
total_case.update_layout(hovermode="x unified")
total_case.show()

### 3.3 Top 10

In [78]:
date = '2020-05-25'
country_dropdown = ['France', 'Italie', 'Brésil']
# country_dropdown = None

In [79]:
top10 = df.groupby(['State', 'Date']).sum().reset_index()
top10 = top10[top10['Date'] == date]
# top10.sort_values('Confirmed', inplace=True)

if country_dropdown:
    top10_plot = go.Figure([go.Bar(x=top10[top10['State'] == country]['Confirmed'], 
                    y=top10[top10['State'] == country]['State'], name=country, orientation='h',
                    text=top10[top10['State'] == country]['Confirmed'],textposition='outside')
                     for country in country_dropdown])
else:
    top10 = top10.nlargest(10,'Confirmed')
    top10.sort_values('Confirmed', inplace=True)
    top10_plot = go.Figure(go.Bar(x=top10['Confirmed'], y=top10['State'], orientation='h',
                        hoverinfo='text+y', text=top10['Confirmed'],textposition='outside'))

top10_plot.update_layout(hoverlabel=dict(bgcolor="white",font_size=12))
top10_plot.update_traces(texttemplate='%{text:.2s}', textposition='outside')
top10_plot.update_yaxes(title=None )
top10_plot.update_xaxes(title=None, showgrid=False, showticklabels=False)

### 3.4 Map

In [80]:
# get acess to mapbox
with open('mapbox_token.txt') as f:
    lines=[x.rstrip() for x in f]
    
mapbox_access_token = lines[0]

In [81]:
def discretize(serie, buckets):
    return pd.cut(serie.tolist(),buckets).codes
# create a discrete confirmed column
df['disc_confirmerd'] = discretize(df['Confirmed'].map(lambda x: x ** 0.4), 30)

In [82]:
date = '2020-05-25'
df1 = df[df['Date'] == date]
country_dropdown = ['France', 'Italie', 'Etats Unis']
# country_dropdown = None

In [83]:
if country_dropdown:
    df1 = df1[df1['State'].isin(country_dropdown)].reset_index(drop=True)
    map_plot = go.Figure([go.Scattermapbox(
        lat = [df1.loc[i,'Lat']],
        lon = [df1.loc[i,'Long']],
        marker = go.scattermapbox.Marker(size= [df1.loc[i,'disc_confirmerd']]),
        hovertemplate ='<b>%{customdata}</b><br>' +
                    '%{text} cas<extra></extra>',
        customdata = [df1.loc[i,'State']],
        text = [df1.loc[i,'Confirmed']])
        for i in range(len(df1))])
    map_plot.update_layout(mapbox={'zoom':0.4,
                        'center':go.layout.mapbox.Center(lat=df1['Lat'].mean(),lon=df1['Long'].mean())})

else:
    map_plot = go.Figure(go.Scattermapbox(
        lat = df1['Lat'], 
        lon = df1['Long'],
        marker = dict(size= df1['disc_confirmerd']),
        hovertemplate ='<b>%{customdata}</b><br>' +
                    '%{text} cas<extra></extra>',
        customdata = df1['State'],
        text = df1['Confirmed'],
                        ))
    map_plot.update_layout(mapbox={'zoom': 0.4})


map_plot.update_layout(hoverlabel=dict(bgcolor="white",font_size=12),  width=800, height=600,
                      mapbox = {'accesstoken': mapbox_access_token}, showlegend=False)

### Test zoom

In [84]:
import itertools 

def find_pairs(lst):
    return [(a,b) for a,b in itertools.permutations(lst, 2)]

def dist(df, c1,c2):
    dff = df.set_index('State')
    diff_lat = dff.loc[c1,'Lat'] - dff.loc[c2,'Lat']
    diff_long =  dff.loc[c1,'Long'] - dff.loc[c2,'Long']
    return np.sqrt(diff_lat ** 2 + diff_long ** 2)   

def get_max_dist(country_list):
    country_pairs = find_pairs(country_list)
    country_dist = []
    for pair in country_pairs:
        country_dist.append(dist(df1, pair[0], pair[1]))
    if len(country_pairs) > 0: 
        return max(country_dist)
    else:
        return 1

# custom buckets
bins = [0, 10.3, 33.8, 79.8, 98.3, np.inf]
groups_names = [5, 4, 3, 2, 1]
def binning(country_list):
    dist = get_max_dist(country_list)
    return pd.cut([dist], bins, labels=groups_names).tolist()

In [85]:
df1 = df[df['Date'] == date]

#zoom adjustment
country_dropdown = ['France', 'Turquie', 'Egypte']
country_dropdown = ['Egypte']
df1 = df1[df1['State'].isin(country_dropdown)].reset_index(drop=True)
mean_lat = df1['Lat'].mean()
mean_lon = df1['Long'].mean()
zoom = binning(df1['State'])[0]

In [86]:
map_plot = go.Figure([go.Scattermapbox(
    lat = [df1.loc[i,'Lat']],
    lon = [df1.loc[i,'Long']],
    marker = go.scattermapbox.Marker(size=[df1.loc[i,'disc_confirmerd']*3]),
    hovertemplate ='<b>%{customdata}</b><br>' +'%{text} cas<extra></extra>',
    customdata = [df1.loc[i,'State']],
    text = [df1.loc[i,'Confirmed']])
    for i in range(len(df1))])


map_plot.update_layout(mapbox={'zoom':zoom,
                    'center':go.layout.mapbox.Center(lat= mean_lat,lon= mean_lon)})


map_plot.update_layout(hoverlabel=dict(bgcolor="white",font_size=12),  width=800, height=600,
                      mapbox = {'accesstoken': mapbox_access_token}, showlegend=False)