In [None]:
import pandas as pd
import numpy as np

from datetime import datetime

%matplotlib inline


import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns

import plotly.graph_objects as go

In [None]:
mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 500)

sns.set(style="darkgrid")

![CRISP_DM](../reports/figures/CRISP_DM.png)

# Explorative Data Analysis

* focus is often a visual representation of the data 


In [None]:
df_plot=pd.read_csv('../data/processed/COVID_small_flat_table.csv',sep=';')
df_plot.sort_values('date',ascending=False).head()

# Matplotlib Example

In [None]:
plt.figure();
ax=df_plot.iloc[15:,:].set_index('date').plot()
plt.ylim(10, 40000)
ax.set_yscale('log')

# Plot.ly

In [None]:
import plotly.graph_objects as go
import requests
import json

In [None]:
import plotly
plotly.__version__

In [None]:
country_list=['Italy',
              'US',
              'Spain',
              'Germany',
              'Korea, South',
             ] 

In [None]:
df_confirmed=pd.read_csv('../data/processed/COVID_small_flat_table_confirmed.csv',sep=';')  
df_confirmed.sort_values('date',ascending=True).head()

In [None]:
df_recovered=pd.read_csv('../data/processed/COVID_small_flat_table_recovered.csv',sep=';')  
df_recovered.sort_values('date',ascending=True).head()

In [None]:
df_deaths=pd.read_csv('../data/processed/COVID_small_flat_table_deaths.csv',sep=';')  
df_deaths.sort_values('date',ascending=True).head()

In [None]:
country_list=df_recovered.columns[1:]
country_dropdown=[]
for country in country_list:
    country_dropdown.append({'label':country,'value':country})

In [None]:
# defines how to plot the individual trace

fig = go.Figure()
fig1 = go.Figure()
fig2 = go.Figure()
fig3 = go.Figure()
fig_recover_death = go.Figure()


In [None]:
#Dispaly some statistics for every country

df_=pd.read_csv('../data/raw/NPGEO/GER_state_data.csv',sep=';')

In [None]:
colors = {
    'background': '#111111',
    'text': '#111111'
}

In [None]:
df_input_large=pd.read_csv('../data/processed/COVID_final_set.csv',sep=';')

In [None]:
import dash
dash.__version__
import dash_core_components as dcc
import dash_html_components as html

fig_dr = go.Figure()
app = dash.Dash(external_stylesheets=['https://codepen.io/chriddyp/pen/bWLwgP.css'])
app.layout = html.Div([

                 html.Div([
                        html.H1(children='Coronavirus Stats',
                                style={
                                        'textAlign': 'center',
                                        'color': 'white',
                                        'fontSize': 44,
                                        "border":"2px black solid",
                                        'margin-bottom': '30px',
                                        'background': '#fb6107',
#                                         'margin' : 'auto',
#                                         'width' : '60%'
                                      }),   
                 ]),
                
                # Dropdown for selecting multiple countries
                html.Div([ 
                        html.Div(
                            html.Label('Select Country:', style={'fontSize':28}), style={'margin-bottom': '10px'}),
                        dcc.Dropdown(
                                        id='country_drop_down',                                      
                                        options=country_dropdown,
                                        value=['US', 'Germany'], # which are pre-selected
                                        multi=True
                                    ),
            
                        
                        
                        
                        
                            ]),
    
        html.Div([
            html.Div([
                html.H2('Confirmed Cases',
                                style={
                                        'textAlign': 'center',
                                        'color': colors['text'],
                                        'text-decoration': 'underline',
                                        'margin-bottom': '30px'
                                       }),
                html.Div(
                            html.Label('Select Scale:', style={'fontSize':24}), style={'margin-bottom': '10px','margin-top': '10px'}),
                        html.Div([
                        dcc.RadioItems(
                                        id='yaxis-type',
                                        options=[{'label': i, 'value': i} for i in ['Linear', 'Log']],
                                        value='Log',
                                        labelStyle={'fontSize': 20}
                                    ),
                        ],style={'margin-left':'40px'}),
                
                dcc.Graph(figure=fig, id='main_window_slope')
            ], className="six columns"),   
            
            html.Div([
                   html.H2('Confirmed Cases Doubling Time',
                                style={
                                        'textAlign': 'center',
                                        'color': colors['text'],
                                        'text-decoration': 'underline',
                                        'margin-bottom': '30px'
                                       }),                         
                
                 html.Div([
                    html.Div(
                            html.Label('Select timeline:', style={'fontSize':24}), style={'margin-bottom': '10px','margin-top': '10px'}),
                    dcc.Dropdown(
                    id='doubling_time',
                    options=[
                        {'label': 'Timeline Confirmed ', 'value': 'confirmed'},
                        {'label': 'Timeline Confirmed Filtered', 'value': 'confirmed_filtered'},
                        {'label': 'Timeline Doubling Rate', 'value': 'confirmed_DR'},
                        {'label': 'Timeline Doubling Rate Filtered', 'value': 'confirmed_filtered_DR'},
                    ],
                    value='confirmed',
                    multi=False
                    )
                ], style={'margin-left':'20px'}),

                    dcc.Graph(figure=fig_dr, id='doubling rate')
                
                 ], className="six columns")
            
            ], className="row"),
    
            html.H4('Source : Johns Hopkins University (JHU)', style={'font-family': "Comic Sans MS", 'fontSize':18}),
    
                # Various Stats for different countries
    
                    html.H2('Various Stats for different countries',
                          style={
                                    'textAlign': 'center',
                                    'color': colors['text'],
                                    'text-decoration': 'underline',
                                    'margin-bottom': '30px',
                                    'margin-top': '30px'
                                }),
    
                # Dropdown for selecting a single countries for different stats
    
                  html.Div([ 
                     html.Div(
                         html.Label('Select a country:', style={'fontSize':18}), style={'margin-bottom': '10px'}),
                    dcc.Dropdown(
                                id='single_input',
                                options=country_dropdown,
                                value='India', # which are pre-selected
                                ),  
                  ]),
    
    #changes here
    html.Div([  
    
                # Representation of Recovery and Death rate 
                      
                   html.Div([
                       html.H2('Recovery and death rate',
                                style={
                                        'textAlign': 'center',
                                        'color': colors['text'],
                                        'text-decoration': 'underline',
                                        'margin-bottom': '30px',
                                        'margin-top': '60px'
                                      }),
                         
                      dcc.Graph(figure=fig_recover_death, id='recover_death')] , className="six columns"),                     
                       
              
   
                # Representation of Recovered Cases
    
                   html.Div([
                       html.H2('Recovered Cases',
                                style={
                                        'textAlign': 'center',
                                        'color': colors['text'],
                                        'text-decoration': 'underline',
                                        'margin-bottom': '30px',
                                        'margin-top': '60px'
                                      }),
                  
                   dcc.Graph(figure=fig, id='recovery') ],className="six columns"),
             ], className="row"),     
     
    html.Div([
                # Representation of Newly Confirmed Cases
    
                   html.Div([
                       html.H2('Newly Confirmed Cases',
                                style={
#                                         'textAlign': 'center',
                                        'color': colors['text'],
                                        'text-decoration': 'underline',
                                        'margin-bottom': '30px',
                                        'margin-top': '60px'
                                      }),
                       dcc.Graph(figure=fig, id='newly_confirmed')
                          ], className=" six columns"),
 
                  # Representation of Newly Death Cases
    
                   html.Div([
                       html.H2('Newly Death Cases',
                                style={
                                        'textAlign': 'center',
                                        'color': colors['text'],
                                        'text-decoration': 'underline',
                                        'margin-bottom': '30px',
                                        'margin-top': '60px'
                                      }),
                       dcc.Graph(figure=fig, id='newly_deaths')
                          ], className=" six columns"),
      
           ], className = "row"),
    
        # Representation of Newly Recovered Cases
        html.Div([
                   html.Div([
                       html.H2('Newly Recovered Cases',
                                style={
                                        'textAlign': 'center',
                                        'color': colors['text'],
                                        'text-decoration': 'underline',
                                        'margin-bottom': '30px',
                                        'margin-top': '60px'
                                      }),
                       dcc.Graph(figure=fig, id='newly_recovered')
                          ], className="six columns"),
    
            ], className="row")       


], style = {'margin-left': '5%','margin-right': '5%'}) 


In [None]:
from dash.dependencies import Input, Output

@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value'),
     Input('yaxis-type', 'value')])
def update_figure(country_list, yaxis_type):
    
    traces = [] 
    for each in country_list:
        traces.append(dict(x=df_confirmed.date,
                                y=df_confirmed[each],
                                mode='markers+lines',
                                opacity=0.9,
                                line_width=2,
                                marker_size=4, 
                                name=each
                        )
                )
        
    return {
            'data': traces,
            'layout': dict (
#                 width=1000,
#                 height=720,                                
#                 xaxis_title="Time",
#                 yaxis_title="Confirmed infected people (source johns hopkins csse, log-scale)",
                xaxis={'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                       'tickfont':dict(size=14,color="black"),
                        'tickangle':-45,
                       "title": "Time" ,
                        'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')
                      },
                yaxis={'type':'Linear' if yaxis_type == 'Linear' else 'log',
                       'range':'[0,100000]',
#                        'dtick': 10,
                       "title": "Confirmed infected people",
                       'tickfont':dict(size=14,color="black"),                     
                       'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')
                      }
        )
    }
    

In [None]:
@app.callback(
    Output('recover_death', 'figure'),
    [Input('single_input', 'value')])
def update_figure(country_name):
    
    
    traces = [] 
    traces.append(dict(x=df_recovered.date[30:],
                                y=(df_recovered[country_name]/df_confirmed[country_name])[30:] * 100,
                                mode='markers+lines',
                                opacity=0.9,
                                line_width=2,
                                marker_size=4,
                                hovertemplate = "%{y:.2f}%",
                                name='Recovery Rate'
                        )),
    traces.append(dict(x=df_deaths.date[30:],
                                y=(df_deaths[country_name]/df_confirmed[country_name])[30:] * 100,
                                mode='markers+lines',
                                opacity=0.9,
                                line_width=2,
                                marker_size=4,
                                hovertemplate = "%{y:.2f}%",
                                name='Death Rate'
                        )
                )
        
    return {
            'data': traces,
            'layout': dict (
#                 width=1000,
#                 height=720,
               title = "Stats for country " + country_name,
                xaxis={'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                       'tickfont':dict(size=14,color="black"),
                        'tickangle':-45,
                       "title": "Time" ,
                        'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')
                      },
                yaxis={'type':'linear',
                       'range':'[1.1,5.5]',
                       'title' :'Recovery rate vs. Death rate.',
                       'tickfont':dict(size=14,color="black"),                       
                       'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')
                      },
               )
    }

In [None]:
@app.callback(
    Output('recovery', 'figure'),
    [Input('single_input', 'value')])
def update_figure(list_of_countries):
    
    traces = [] 
    traces.append(dict(x=df_recovered.date,
                                y=df_recovered[list_of_countries],
                                mode='markers+lines',
                                opacity=0.9,
                                line_width=2,
                                marker_size=4, 
                                name='Recovery'
                        )
                )
        
    return {
            'data': traces,
           'layout': dict (
#                 width=1000,
#                 height=720,
                title = "Stats for country " + list_of_countries,
                xaxis = {'title' : "Time",
                         'tickfont':dict(size=14,color="#7f7f7f"),
                       'tickfont':dict(size=14,color="black"),
                        'tickangle':-45,
                       "title": "Time" ,
                        'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414') 
                        
                      },
                yaxis = {'title' : "Newly recovered cases",
                         'tickfont':dict(size=14,color="black"),                       
                       'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')
                        
                      },
    
        )
    }

In [None]:
@app.callback(
    Output('newly_confirmed', 'figure'),
    [Input('single_input', 'value')])
def update_figure(list_of_countries):
    
    traces = [] 
    traces.append(dict(x=df_confirmed.date[30:],
                                y= df_confirmed[list_of_countries].diff()[30:],
                                type = "bar",
                                orientation = 'v'
                        )
                )
        
    return {
            'data': traces,
            'layout': dict (
#                 width=1000,
#                 height=720,
                title = "Stats for country " + list_of_countries,
                xaxis = {'title' : "Time",
                         'tickfont':dict(size=14,color="#7f7f7f"),
                       'tickfont':dict(size=14,color="black"),
                          'tickangle':-45,
                       "title": "Time" ,
                        'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414') 
                        
                      },
                yaxis = {'title' : "Newly confirmed cases",
                         'tickfont':dict(size=14,color="black"),                       
                       'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')
                        
                      },
    
        )
    }

In [None]:
@app.callback(
    Output('newly_deaths', 'figure'),
    [Input('single_input', 'value')])
def update_figure(list_of_countries):
    
    traces = [] 
    traces.append(dict(x=df_deaths.date[30:],
                                y=df_deaths[list_of_countries].diff()[30:],
                                type = "bar",
                                orientation = 'v'
                        )
                )
        
    return {
            'data': traces,
            'layout': dict (
#                 width=1000,
#                 height=720,
                title = "Stats for country " + list_of_countries,
                  xaxis = {'title' : "Time",
                           'tickfont':dict(size=14,color="#7f7f7f"),
                       'tickfont':dict(size=14,color="black"),
                      'tickangle':-45,
                       "title": "Time" ,
                        'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')  
                      },
                yaxis = {'title' : "Newly death cases",
                         'tickfont':dict(size=14,color="black"),                        
                       'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')                  
                        },
        )
    }


In [None]:
@app.callback(
    Output('newly_recovered', 'figure'),
    [Input('single_input', 'value')])
def update_figure(list_of_countries):
    
    traces = [] 
    traces.append(dict(x=df_recovered.date[30:],
                                y=df_recovered[list_of_countries].diff()[30:],
                                type = "bar",
                                orientation = 'v'
                        )
                )
        
    return {
            'data': traces,
            'layout': dict (
#                 width=1000,
#                 height=720,
                title = "Stats for country " + list_of_countries,
                xaxis = {'title' : "Time", 'tickfont':dict(size=14,color="#7f7f7f"),
                       'tickfont':dict(size=14,color="black"),
                       "title": "Time" ,
                        'tickangle':-45,
                        'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')                   
                        
                      },
                yaxis = {'title' : "Newly recovered cases",
                          'tickfont':dict(size=14,color="black"),                        
                       'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')  
                        
                      },

        )
    }


In [None]:
@app.callback(
    Output('doubling rate', 'figure'),
    [Input('country_drop_down', 'value'),
    Input('doubling_time', 'value')])
def update_figure(country_list,show_doubling):


    if 'doubling_rate' in show_doubling:
        my_yaxis={'type':"log",
               'title':'Approximated doubling rate over 3 days',
                'tickfont':dict(size=14,color="black"),               
               'titlefont': dict(
                family = 'Comic Sans MS',
                size = 22, 
                color = '#e36414')  
              }
    else:
        my_yaxis={'type':"log",
                  'title':'Confirmed infected people ',
                  'tickfont':dict(size=14,color="black"),                  
                'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')  
              }


    traces = []
    for each in country_list:

        df_plot=df_input_large[df_input_large['country']==each]

        if show_doubling=='doubling_rate_filtered':
            df_plot=df_plot[['state','country','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','date']].groupby(['country','date']).agg(np.mean).reset_index()
        else:
            df_plot=df_plot[['state','country','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','date']].groupby(['country','date']).agg(np.sum).reset_index()
       #print(show_doubling)


        traces.append(dict(x=df_plot.date,
                                y=df_plot[show_doubling],
                                mode='markers+lines',
                                opacity=0.9,
                                name=each
                        )
                )

    return {
            'data': traces,           
            'layout': dict (
#                 width=900,
#                 height=780,                
                xaxis={'title':'Timeline',
                        'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="black"),
                       'titlefont': dict(
                        family = 'Comic Sans MS',
                        size = 22, 
                        color = '#e36414')                   
                        
                      },

                yaxis=my_yaxis
        )
    }


In [None]:
# take care to press stop button befor calling once again, since address will be blocked (server up)
# if you have problems to find (kill) the process, use the following cmd commands (Linux)
# sudo lsof -n -i :8050 | grep LISTEN 
# kill -9 pid 

app.run_server(debug=True, use_reloader=False)