In [1]:
import subprocess
import os
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.pylab as plt
import dash
dash.__version__
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output,State

import plotly.graph_objects as go


from datetime import datetime
from bs4 import BeautifulSoup
import json
import seaborn as sns

from scipy import optimize
from scipy import integrate

sns.set(style='whitegrid')
# sns.axes_style("")


pd.set_option('display.max_rows', 500)

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


My personal github link is below

https://github.com/abdlhersln/Enterprise-Data-Science-Folder.git

In [2]:
country_list=['Germany','Turkey','US'] 

In [3]:
pd_raw = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
time_idx=pd_raw.columns[4:]
time_idx = [datetime.strptime(each ,"%m/%d/%y") for each in time_idx]
time_idx = [each.strftime('%Y-%m-%d') for each in time_idx]

US = pd_raw[pd_raw['Country/Region']=='US'].iloc[:,4:].sum(axis=0)
Turkey = pd_raw[pd_raw['Country/Region']=='Turkey'].iloc[:,4:].sum(axis=0)
Germany = pd_raw[pd_raw['Country/Region']=='Germany'].iloc[:,4:].sum(axis=0)

population_germany = 83240000
population_turkey = 84340000
population_us = 329500000

population = [83240000,84340000,329500000]
         

df_plot = pd.DataFrame({'date':time_idx,'Germany':Germany,'Turkey':Turkey ,'US':US})

In [4]:
def quick_plot(x_in, df_input,y_scale='log',slider=False):
    """ Quick basic plot for quick static evaluation of a time series
    
        you can push selective columns of your data frame by .iloc[:,[0,6,7,8]]
        
        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider
    
        
        Returns:
        ----------
        
    """
    fig = go.Figure()

    for each in df_input.columns:
        fig.add_trace(go.Scatter(
                        x=x_in,
                        y=df_input[each],
                        name=each,
                        opacity=0.8))
    
    fig.update_layout(autosize=True,
        width=800,
        height=700,
        font=dict(
            family="PT Sans, monospace",
            size=18,
            color="#7f7f7f"
            )
        )
    fig.update_yaxes(type=y_scale),
    fig.update_xaxes(tickangle=-45,
                 nticks=20,
                 tickfont=dict(size=14,color="#7f7f7f")
                )
    if slider==True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()
      
    

In [5]:
def savgol_filter(df_input,column='confirmed',window=5):
    ''' Savgol Filter which can be used in groupby apply function (data structure kept)

        parameters:
        ----------
        df_input : pandas.series
        column : str
        window : int
            used data points to calculate the filter result

        Returns:
        ----------
        df_result: pd.DataFrame
            the index of the df_input has to be preserved in result
    '''

    degree=1
    df_result=df_input

    filter_in=df_input[column].fillna(0) # attention with the neutral element here

    result=signal.savgol_filter(np.array(filter_in),
                           window, # window size used for filtering
                           1)
    df_result[str(column+'_filtered')]=result
    return df_result

def rolling_reg(df_input,col='confirmed'):
    ''' Rolling Regression to approximate the doubling time'

        Parameters:
        ----------
        df_input: pd.DataFrame
        col: str
            defines the used column
        Returns:
        ----------
        result: pd.DataFrame
    '''
    days_back=3
    result=df_input[col].rolling(
                window=days_back,
                min_periods=days_back).apply(get_doubling_time_via_regression,raw=False)



    return result

In [6]:
# quick_plot(df_plot.date,
#            df_plot.iloc[:,1:],
#            y_scale='log',
#            slider=True)

Doubling Rate - Piecewise Linear Regression

In [7]:
from sklearn import linear_model
reg = linear_model.LinearRegression(fit_intercept=True)
from scipy import signal


In [8]:
start_pos=50
df_plot = df_plot.iloc[start_pos:]

In [9]:
## filter data
for each in country_list:
    df_plot[each+'_filter']=signal.savgol_filter(df_plot[each],
                           3, # window size used for filtering
                           1) # order of fitted polynomial

In [10]:
filter_cols=['Germany_filter','Turkey_filter','US_filter']

In [11]:
def get_doubling_time_via_regression(in_array):
    ''' Use a linear regression to approximate the doubling rate'''
    
    y = np.array(in_array)
    X = np.arange(-1,3).reshape(-1, 1)
    
    assert len(in_array)==4
    reg.fit(X,y)
    intercept=reg.intercept_
    slope=reg.coef_
    
    return intercept/slope

In [12]:
def doubling_time(in_array):
    ''' Use a classical doubling time formular, 
     see https://en.wikipedia.org/wiki/Doubling_time '''
    y = np.array(in_array)
    return len(y)*np.log(2)/np.log(y[-1]/y[0])

In [13]:
# calculate slope of regression of last x days
# use always a limited number of days to approximate the triangle, attention exponential base assumption
days_back = 4 # this gives a smoothing effect
for pos,country in enumerate(country_list):
    df_plot[country+'_DR']=df_plot[country].rolling(
                                window=days_back,
                                min_periods=days_back).apply(get_doubling_time_via_regression, raw=False)

In [14]:
# run on all filtered data
days_back = 4 # this gives a smoothing effect
for pos,country in enumerate(filter_cols):
    df_plot[country+'_DR']=df_plot[country].rolling(
                                window=days_back,
                                min_periods=days_back).apply(get_doubling_time_via_regression, raw=False)

In [15]:
df_plot = df_plot.fillna(0)

In [16]:
confirmed = df_plot[['Germany','Turkey','US']]
confirmed_filtered = df_plot[['Germany_filter','Turkey_filter','US_filter']]
confirmed_filtered.rename(columns = {'Germany_filter':'Germany', 'Turkey_filter':'Turkey','US_filter':'US'}, inplace = True)
confirmed_DR = df_plot[['Germany_DR','Turkey_DR','US_DR']]
confirmed_DR.rename(columns = {'Germany_DR':'Germany', 'Turkey_DR':'Turkey','US_DR':'US'}, inplace = True)
confirmed_filtered_DR = df_plot[['Germany_filter_DR','Turkey_filter_DR','US_filter_DR']]
confirmed_filtered_DR.rename(columns = {'Germany_filter_DR':'Germany', 'Turkey_filter_DR':'Turkey','US_filter_DR':'US'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [17]:
fig = go.Figure()

app = dash.Dash()
app.layout = html.Div([

    dcc.Markdown('''
     ## Enterprise Data Science Delivery 3
    '''),

    dcc.Markdown('''
     Country Selection
    '''),

    dcc.Dropdown(
        id='country_drop_down',
        options=[
        {'label': 'Germany', 'value': 'Germany'},
        {'label': 'Turkey', 'value': 'Turkey'},
        {'label': 'US', 'value': 'US'},
    ],
        value=['Germany','Turkey'], # which are pre-selected
        multi=True
    ),

    dcc.Markdown('''
         Select Timeline of confirmed COVID-19 cases or the approximated doubling time
        '''),


    dcc.Dropdown(
    id='doubling_time',
    options=[
        {'label': 'Timeline Confirmed ', 'value':1},
        {'label': 'Timeline Confirmed Filtered', 'value': 2},
        {'label': 'Timeline Doubling Rate', 'value': 3},
        {'label': 'Timeline Doubling Rate Filtered', 'value': 4},
    ],
    value=1,
    multi=False
    ),

    dcc.Graph(figure=fig, id='main_window_slope')
])


@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value'),
    Input('doubling_time', 'value')])

def update_figure(country_list,show_doubling):


    if show_doubling >2: 
        my_yaxis={'type':"log",
               'title':'Approximated doubling rate over 4 days'
              }
    else:
        my_yaxis={'type':"log",
                  'title':'Confirmed Cases'
              }


    traces = []
    
    for each in country_list:

        if show_doubling ==2:
            
            df_plot_new=confirmed_filtered
            
        elif show_doubling ==3:
            
            df_plot_new=confirmed_DR
            
        elif show_doubling ==4:
            
            df_plot_new=confirmed_filtered_DR
            
        else:
            
            df_plot_new=confirmed

            
        traces.append(dict(x=df_plot.date,
                           y=df_plot_new[each],
                           mode='markers+lines',
                           opacity=0.9,
                           name=each
                        )
                )

    return {
            'data': traces,
            'layout': dict (
                width=1200,
                height=700,

                xaxis={'title':'Timeline',
                        'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                      },

                yaxis=my_yaxis
                             
        )
    }

In [18]:
if __name__ == '__main__':

    app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
