## Data wrangling

## Interactive figures with plotly.py

In [None]:
from data_input import tidy_most_recent, get_all_data
import pandas as pd
import plotly.express as px
df, df_prediction = get_all_data()

In [None]:
df.tail()

In [None]:
df_recent = df['confirmed'].iloc[-1]
df_recent

In [None]:
df_recent = df_recent.reset_index()
df_recent.columns = ['iso', 'country_region', 'value']
df_recent

In [None]:
import plotly.express as px
fig1 = px.choropleth(df_recent, locations='iso', color='value')
fig1.show()

In [None]:
from data_input import normalize_by_population
import numpy as np
normalized_values = normalize_by_population(df_recent)
df_recent['normalized'] = (1.e6 * normalized_values).values
df_recent['log_normalized'] = np.log10(df_recent['normalized'])
fig_map = px.choropleth(df_recent, locations='iso',
                        color='log_normalized',
                        hover_data={'iso':False,
                                    'country_region':True,
                                    'value':True,
                                    'normalized':':.2f',
                                    'log_normalized':False},
                        labels={'country_region':'country',
                               'value':'confirmed cases'}
                       )
fig_map.show()

In [None]:
from data_input import normalize_by_population_wide
df_confirmed = df['confirmed']
df_confirmed = normalize_by_population_wide(df_confirmed)
df_confirmed.columns = [col[1] for col in df_confirmed.columns]
countries = ['Canada', 'France', 'Brazil']
fig_timeplot = px.scatter(1.e6 * df_confirmed[countries], 
                          labels={'value':'confirmed cases per million',
                                  'variable':'country'})
fig_timeplot.update_layout(xaxis_rangeslider_visible=True, height=400)
fig_timeplot.update_traces(mode='lines+markers')
fig_timeplot.show()

### Long mode and wide mode

In [None]:
df_confirmed

In [None]:
import plotly.graph_objects as go
countries = df_confirmed.columns[::10]
fig = go.Figure()
for country in countries:
    fig.add_scatter(x=df_confirmed.index, y=df_confirmed[country], name=country)
fig.show()

In [None]:
df_long = df_confirmed.reset_index().melt(id_vars='date')
print(df_long)
fig_scatter = px.line(df_long[:10000], x='date', y='value', color='variable')
fig_scatter.show()

In [None]:
df_confirmed['month'] = list(pd.Series(df_confirmed.index).dt.to_period('M').astype(str))
countries = ['Canada', 'France', 'Brazil']
df_confirmed['debug'] = np.random.choice(['a', 'b'], size=len(df_confirmed))
print(df_confirmed[countries + ['month']])
print(df_confirmed['month'].dtype)
fig_timeplot2 = px.scatter(df_confirmed[countries + ['month']][80:], facet_col='month',
                           labels={'value':'confirmed cases per million',
                                  'variable':'country'}
                          )
fig_timeplot2.update_traces(mode='lines+markers')
fig_timeplot2.show()
print(df_confirmed.index)
print(df_confirmed['month'])

In [None]:
pd.Series(df_confirmed.index).dt.to_period('M').astype(str)

## Full application with Dash and Jupyter Dash

See https://medium.com/plotly/introducing-jupyterdash-811f1f57c02e

In [None]:
def make_timeplot(df, countries):
    df_confirmed = df['confirmed']
    df_confirmed = normalize_by_population_wide(df_confirmed)
    df_confirmed.columns = [col[1] for col in df_confirmed.columns]
    fig = px.scatter(1.e6 * df_confirmed[countries], 
                     labels={'value':'confirmed cases per million',
                             'variable':'country'})
    fig.update_layout(xaxis_rangeslider_visible=True, height=400,
                     margin=dict(t=0, b=0))
    fig.update_traces(mode='lines+markers')
    return fig

In [None]:
fig_map.update_layout(margin=dict(t=0, b=0), height=300)

In [None]:
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("Covid-19: confirmed cases"),
    dcc.Graph(id='graph-map', figure=fig_map),
    dcc.Graph(id='graph-timeplot', figure=fig_timeplot)
])
    
app.run_server(mode='inline', port=8052)

In [None]:
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash

app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("Covid-19: confirmed cases"),
    dcc.Graph(id='graph-map', figure=fig_map),
    dcc.Graph(id='graph-timeplot', figure=fig_timeplot),
])
    
@app.callback(
    Output('graph-timeplot', 'figure'),
    [Input("graph-map", "clickData")]
)
def update_timeplot(click_data):
    if click_data is None:
        return dash.no_update
    countries = [click_data['points'][0]['customdata'][1]]
    return make_timeplot(df, countries)
    
app.run_server(mode='inline', port=8053)

In [None]:
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import dash

app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("Covid-19: confirmed cases"),
    dcc.Graph(id='graph-map', figure=fig_map),
    dcc.Graph(id='graph-timeplot', figure=fig_timeplot),
    dcc.Store(id='country-store', data=['Canada', 'France', 'Brazil'])
])
    
@app.callback(
    [Output('graph-timeplot', 'figure'),
     Output('country-store', 'data')],
    [Input("graph-map", "clickData")],
    [State('country-store', 'data')]
)
def update_timeplot(click_data, stored_countries):
    if click_data is None:
        return dash.no_update, dash.no_update
    new_country = click_data['points'][0]['customdata'][1]
    stored_countries.append(new_country)
    return make_timeplot(df, stored_countries), stored_countries
    
app.run_server(mode='inline', port=8055)