#### Import needed libraries

In [40]:
#dealing with datasets
import pandas as pd
#operations on datasets
import numpy as np
#creaing viualizations
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [41]:
recoveries = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
us_medical_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/08-22-2020.csv')
latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-22-2020.csv')

In [42]:
confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,12/27/20,12/28/20,12/29/20,12/30/20,12/31/20,1/1/21,1/2/21,1/3/21,1/4/21,1/5/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,51039,51280,51350,51405,51526,51526,51526,51526,53011,53105
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,56254,56572,57146,57727,58316,58316,58991,59438,59623,60283
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,98249,98631,98988,99311,99610,99897,100159,100408,100645,100873
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,7821,7875,7919,7983,8049,8117,8166,8192,8249,8308
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,17240,17296,17371,17433,17553,17568,17608,17642,17684,17756


### Preprocessing

###### splitting the dataframes into groups by using groupby() function

In [43]:
confirmed_df=confirmed.groupby(by="Country/Region", as_index=False).sum()
death_df=deaths.groupby(by="Country/Region", as_index=False).sum()
recoveries_df=recoveries.groupby(by="Country/Region", as_index=False).sum()

In [44]:
active_group=pd.DataFrame(columns=[confirmed.columns])
active_group=deaths.copy()


###### dataframe enlargement

In [None]:
for i in range(confirmed_df.shape[0]):
    for j in range(3, confirmed_df.shape[1]):
        active_group.iloc[i,j] = confirmed_df.iloc[i,j]-(recoveries_df.iloc[i,j]+death_df.iloc[i,j])

In [None]:
confirmed.describe()

#### Basic Statistics for Time Series Analysis:
###### Time series analysis comprises methods for analyzing time series data in order to extract meaningful statistics and other characteristics of the data.

In [None]:
base_stats = pd.DataFrame(columns=['Dates','Confirmed','Deaths','Recovered','Active'])
base_stats['Dates'] = confirmed.columns[4:]


In [None]:
base_stats['Confirmed'] = base_stats['Dates'].apply(lambda x: confirmed[x].sum())
base_stats['Recovered'] = base_stats['Dates'].apply(lambda x: recoveries[x].sum())
base_stats['Deaths'] = base_stats['Dates'].apply(lambda x: deaths[x].sum())
base_stats.reset_index(drop=False, inplace=True)
base_stats['Active'] = base_stats['index'].apply(lambda x: (base_stats['Confirmed'][x]-(base_stats['Deaths'][x]+base_stats['Recovered'][x])))

In [None]:
base_stats.head()

#### Recent cases 

In [None]:
recent_stats = go.Figure()
recent_stats.add_trace(go.Treemap(labels = ["Confirmed","Active","Recovered","Deaths"],
                                     parents = ["","Confirmed","Confirmed","Confirmed"],
                                     values = [base_stats['Confirmed'].sum(), base_stats['Active'].sum(), base_stats['Recovered'].sum(), base_stats['Deaths'].sum()],
                                      branchvalues="total", marker_colors = ["#073b4c","#118ab2","#06d6a0","#ef476f"],
                                      textinfo = "label+text+value",
                                      outsidetextfont = {"size": 30, "color": "darkblue"},
                                      marker = {"line": {"width": 2}},
                                        pathbar = {"visible": False}
                                     ))

recent_stats.show()

In [None]:
base_stats_fig = go.Figure()
for column in base_stats.columns.to_list()[2:6]:
    color_dict = {
      "Confirmed": "#ef476f",
      "Deaths": "#118ab2",
      "Recovered": "#073b4c",
      "Active": "#06d6a0"
        }
    base_stats_fig.add_trace(
        go.Scatter(
            x = base_stats['Dates'],
            y = base_stats[column],
            name = column,
            line = dict(color=color_dict[column]),
            hovertemplate ='<br><b>Date</b>: %{x}'+'<br><i>Count</i>:'+'%{y}',
        )
    )

In [None]:
for column in base_stats.columns.to_list()[2:6]:
    color_dict = {
      "Confirmed": "#0C6583",
      "Deaths": "#24F9C1",
      "Active": "#F47C98",
      "Recovered": "#149ECC"
        }
    base_stats_fig.add_trace(
        go.Scatter(
            x = base_stats['Dates'],
            y = base_stats['index'].apply(lambda x: (base_stats[column][x-7:x].sum())/7 if x>7 else (base_stats[column][0:x].sum())/7),
            name = column+" 7-day Moving Avg.",
            line = dict(dash="dash", color=color_dict[column]), showlegend=False,
            hovertemplate = '<br><b>Date</b>: %{x}'+'<br><i>7-day moving avg.</i>: %{y}'
        )
    )

#### Cases across the world

In [None]:
base_stats_fig.update_layout(
    updatemenus=[
        dict(
        buttons=list(
            [dict(label = 'All Cases',
                  method = 'update',
                  args = [{'visible': [True, True, True, True, True, True, True, True]},
                          {'title': 'All Cases',
                           'showlegend':True}]),
             dict(label = 'Confirmed',
                  method = 'update',
                  args = [{'visible': [True, False, False, False, True, False, False, False]},
                          {'title': 'Confirmed',
                           'showlegend':True}]),
             dict(label = 'Active',
                  method = 'update',
                  args = [{'visible': [False, False, False, True, False, False, False, True]},
                          {'title': 'Active',
                           'showlegend':True}]),
             dict(label = 'Recovered',
                  method = 'update',
                  args = [{'visible': [False, False, True, False, False, False, True, False]},
                          {'title': 'Recovered',
                           'showlegend':True}]),
             dict(label = 'Deaths',
                  method = 'update',
                  args = [{'visible': [False, True, False, False, False, True, False, False]},
                          {'title': 'Deaths',
                           'showlegend':True}]),
            ]),
             type = "dropdown",
             direction="down",
            showactive=True,
             x=0,
             xanchor="left",
             y=1.25,
             yanchor="top"
        ),
        dict(
        buttons=list(
            [dict(label = 'Linear Scale',
                  method = 'relayout',
                  args = [{'yaxis': {'type': 'linear'}},
                          {'title': 'All Cases',
                           'showlegend':True}]),
             dict(label = 'Log Scale',
                  method = 'relayout',
                  args = [{'yaxis': {'type': 'log'}},
                          {'title': 'Confirmed',
                           'showlegend':True}]),
            ]),
             type = "dropdown",
             direction="down",
            showactive=True,
             x=0,
             xanchor="left",
             y=1.36,
             yanchor="top"
        )
    ])

base_stats_fig.update_xaxes(showticklabels=False)
base_stats_fig.update_layout( 
    title_text="Covid-19 Basic Statistics", title_x=0.5, title_font_size=20,
                            legend=dict(orientation='h',yanchor='top',y=1.15,xanchor='right',x=1), paper_bgcolor="mintcream",
                            xaxis_title="Date", yaxis_title="Number of Cases")
base_stats_fig.show()

### Daily Increasing Cases

In [None]:
daily_case_fig = make_subplots(rows=2, cols=2, vertical_spacing=0.05, horizontal_spacing=0.04,
                           subplot_titles=('Confirmed','Active','Recovered','Deaths'),
                            x_title='Dates', y_title='# of Cases',)

daily_case_fig.add_trace(go.Bar(x=base_stats['Dates'], y=base_stats['index'].apply(lambda x: base_stats['Confirmed'][x]-base_stats['Confirmed'][x-1:x].sum()),
                              name='Confirmed',hovertemplate = '<br><b>Date</b>: %{x}'+'<br><i>Confirmed Count</i>: %{y}',
                                marker=dict(color='#118ab2')),row=1, col=1)
daily_case_fig.add_trace(go.Scatter(x=base_stats['Dates'], y=base_stats['index'].apply(lambda x: (base_stats['Confirmed'][x-7:x].sum()-base_stats['Confirmed'][x-8:x-1].sum())/7 if x>0 else 0),
                             name='7-day moving average', hovertemplate = '<br><b>Date</b>: %{x}'+'<br><i>7-day average</i>: %{y}', showlegend=False,
                                    line=dict(dash="dash", color='#149ECC')),row=1, col=1)

daily_case_fig.add_trace(go.Bar(x=base_stats['Dates'], y=base_stats['index'].apply(lambda x: base_stats['Active'][x]-base_stats['Active'][x-1:x].sum()), 
                             name='Active',hovertemplate = '<br><b>Date</b>: %{x}'+'<br><i>Active Count</i>: %{y}',
                               marker=dict(color='#ef476f')),row=1, col=2)
daily_case_fig.add_trace(go.Scatter(x=base_stats['Dates'], y=base_stats['index'].apply(lambda x: (base_stats['Active'][x-7:x].sum()-base_stats['Active'][x-8:x-1].sum())/7 if x>0 else 0),
                             name='7-day moving average', hovertemplate = '<br><b>Date</b>: %{x}'+'<br><i>7-day average</i>: %{y}', showlegend=False,
                                    line=dict(dash="dash", color='#F47C98')),row=1, col=2)
daily_case_fig.add_trace(go.Bar(x=base_stats['Dates'], y=base_stats['index'].apply(lambda x: base_stats['Recovered'][x]-base_stats['Recovered'][x-1:x].sum()), 
                              name='Recovered',hovertemplate = '<br><b>Date</b>: %{x}'+'<br><i>Recovered Count</i>: %{y}',
                               marker=dict(color='#06d6a0')),row=2, col=1)
daily_case_fig.add_trace(go.Scatter(x=base_stats['Dates'], y=base_stats['index'].apply(lambda x: (base_stats['Recovered'][x-7:x].sum()-base_stats['Recovered'][x-8:x-1].sum())/7 if x>0 else 0),
                             name='7-day moving average', hovertemplate = '<br><b>Date</b>: %{x}'+'<br><i>7-day average</i>: %{y}', showlegend=False,
                                    line=dict(dash="dash", color='#24F9C1')),row=2, col=1)

daily_case_fig.add_trace(go.Bar(x=base_stats['Dates'], y=base_stats['index'].apply(lambda x: base_stats['Deaths'][x]-base_stats['Deaths'][x-1:x].sum()), 
                              name='Deaths',hovertemplate = '<br><b>Date</b>: %{x}'+'<br><i>Death Count</i>: %{y}',
                               marker=dict(color='#073b4c')),row=2, col=2)
daily_case_fig.add_trace(go.Scatter(x=base_stats['Dates'], y=base_stats['index'].apply(lambda x: (base_stats['Deaths'][x-7:x].sum()-base_stats['Deaths'][x-8:x-1].sum())/7 if x>0 else 0),
                             name='7-day moving average', hovertemplate = '<br><b>Date</b>: %{x}'+'<br><i>7-day average</i>: %{y}', line=dict(dash="dash", color='#0C6583')),row=2, col=2)
daily_case_fig.update_xaxes(showticklabels=False)
daily_case_fig.update_layout(
    title_text="Daily change in cases of Covid19", title_x=0.5, title_font_size=20,
                            legend=dict(orientation='h',yanchor='top',y=1.1,xanchor='right',x=1), paper_bgcolor="mintcream")


daily_case_fig.show()   

### Weekly and monthly statistics

In [None]:
base_stats['Dates'] = pd.to_datetime(base_stats["Dates"])
base_stats.set_index(base_stats["Dates"],inplace=True)

In [None]:
week_month_fig = make_subplots(rows=1, cols=3, vertical_spacing=0.05, horizontal_spacing=0.04, # shared_yaxes=True,
                           subplot_titles=('Daily Statistics','Weekly Statistics','Monthly Statistics'),y_title='# of Cases',)

for column in ['Confirmed','Active','Recovered','Deaths']:
    df_dict={
      "Confirmed": [confirmed_df,"#118ab2",True],
      "Active": [active_group,"#ef476f",False],
      "Recovered": [recoveries_df,"#06d6a0",False],
      "Deaths": [death_df,"#073b4c",False]        
    }
    week_month_fig.add_trace(go.Bar(x=list(range(len(base_stats[column].resample('D').sum()))),
                            y=base_stats[column].resample('D').sum(), visible = df_dict[column][2],
                            name='Daily '+column,hovertemplate = '<br><b>day</b>: %{x}'+'<br><i>Confirmed Count</i>: %{y}',
                            marker=dict(color=df_dict[column][1]), showlegend=False) ,row=1, col=1)
    week_month_fig.add_trace(go.Bar(x=list(range(len(base_stats[column].resample('W').sum()))),
                            y=base_stats[column].resample('W').sum(), visible = df_dict[column][2],
                            name='Weekly '+column,hovertemplate = '<br><b>Week</b>: %{x}'+'<br><i>Confirmed Count</i>: %{y}',
                            marker=dict(color=df_dict[column][1]), showlegend=False) ,row=1, col=2)
    week_month_fig.add_trace(go.Bar(x=list(range(len(base_stats[column].resample('M').sum()))),
                            y=base_stats[column].resample('M').sum(), visible = df_dict[column][2],name='Monthly '+column,hovertemplate = '<br><b>Month</b>: %{x}'+'<br><i>Confirmed Count</i>: %{y}',
                            marker=dict(color=df_dict[column][1]), showlegend=False) ,row=1, col=3)


week_month_fig.update_layout(
    updatemenus=[
        dict(
        buttons=list([
             dict(label = 'Confirmed',
                  method = 'update',
                  args = [{'visible': [True, True, True, False, False, False, False, False, False, False, False, False]},
                          {'title': 'Confirmed',
                           'showlegend':True}]),
             dict(label = 'Active',
                  method = 'update',
                  args = [{'visible': [False, False, False, True, True, True, False, False, False, False, False, False]},
                          {'title': 'Active',
                           'showlegend':True}]),
             dict(label = 'Recovered',
                  method = 'update',
                  args = [{'visible': [False, False, False, False, False, False, True, True, True, False, False, False]},
                          {'title': 'Recovered',
                           'showlegend':True}]),dict(label = 'Deaths',
                  method = 'update',
                  args = [{'visible': [False, False, False, False, False, False, False, False, False, True, True, True]},
                          {'title': 'Deaths',
                           'showlegend':True}]),
            ]),
             type = "buttons",
            direction="right",
#             pad={"r": 10, "t": 40},
             showactive=True,
             x=-0.05,
             xanchor="left",
             y=1.2,
             yanchor="top"
        )
    ])
week_month_fig.update_layout(
    title_text="Weekly/Monthly Statistics", title_x=0.5, title_font_size=20,
                             paper_bgcolor="mintcream")
week_month_fig.update_xaxes(title_text="Days", row=1, col=1)
week_month_fig.update_xaxes(title_text="Weeks", row=1, col=2)
week_month_fig.update_xaxes(title_text="Months", row=1, col=3)
week_month_fig.show()

In [33]:
prediction_df = pd.DataFrame(columns=['Index', 'Confirmed Pred', 'Deaths Pred', 'Recovered Pred', 'Active Pred', 'Daily Inc. Pred'])
prediction_df['Index'] = list(flatten(days_ex))

NameError: name 'flatten' is not defined

In [27]:
for col in base_stats_inc_df.columns[2:]:

    count = np.array(base_stats_inc_df[[col]]).reshape(-1, 1)

    X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(
                                                        days[50:], count[50:], 
                                                        test_size=0.05, shuffle=False)

    MAE, RSE, R2 = [], [], []
    for j in range(1,10):
        #creating the model
        poly = PolynomialFeatures(degree=j)
        train_x_poly = poly.fit_transform(X_train_confirmed)
        
        regr_poly = linear_model.LinearRegression()
        regr_poly.fit(train_x_poly, y_train_confirmed)
        
        y_pred_poly = regr_poly.predict(poly.fit_transform(X_test_confirmed))
        MAE.append(np.mean(np.absolute(y_pred_poly - y_test_confirmed)))
        RSE.append(np.mean((y_pred_poly - list(flatten(y_test_confirmed))) ** 2))
        R2.append(r2_score(y_pred_poly, list(flatten(y_test_confirmed))))

NameError: name 'flatten' is not defined