In [None]:
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as py
from plotly.offline import init_notebook_mode, plot, iplot, download_plotlyjs

from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot

In [None]:
# extract the confirmed data for instance:
csvfile="../COVID-19-master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"

df=pd.read_csv(csvfile)#,index_col='Province/State')

In [None]:
df.tail()

In [None]:
set(df['Country/Region'].values)


Some persons said that the occurrences of Taiwan in JHU database had been recorded by the name of one region of Mainland China. Is it **True**?

In [None]:
df[df['Country/Region']=='Taiwan*']

Self-Practicing
---
Try another country's data to proceed the following by yourself.

In [None]:
df_germany=df[df['Country/Region']=='Germany']

In [None]:
df_germany.head()

After transpose the database, the index `52` becomes the key's name and columns more than 4 become the index of df_germany.

In [None]:
df_germany=df_germany.T[4:]
df_germany.head()

We have to prepare some pre-work Before making prediction by fbprophet:
1. install fbprophet (which requires pystan):
    ```
    shell > pip install fbprophet
    or
    shell > conda install fbprophet
    ```
- Why using fbprophet? The simplest time-series prediction is the reason: only two columns    requires, one is `ds`, timestamp, and the other is `y`, number of occurences.
- date data uses is in Python datetime format, YYYY-MM-DD. 

In [None]:
df_germany = df_germany.reset_index().rename(columns={'index': 'ds', 11: 'y'})
df_germany.tail()

In [None]:
df_germany['ds'] = pd.to_datetime(df_germany['ds'])
df_germany.head()

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=df_germany.ds,
        y=df_germany.y,
        name='Confirmed in Germany'
    )
)
fig.update_layout(
    title_text = 'Confirmed Cases In DeutschLand',
    title_x = 0.5,
)
plot(fig, filename='NCOVID-19.html') 

In [None]:
m = Prophet(
    yearly_seasonality=False,
    weekly_seasonality=False,
    daily_seasonality=True,
    seasonality_mode='additive'
)

m.fit(df_germany)

In [None]:
future = m.make_future_dataframe(periods=7)
forecast = m.predict(future)
future.tail(7)

In [None]:
# Meaning of y hat in linear regression can be referred in below
# https://www.statisticshowto.datasciencecentral.com/y-hat-definition/

forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(7)

In [None]:
# Linear regression analysis is inappropriate prediction model  
# for exponential increase of COVID19 confirmed cases

fig = plot_plotly(m, forecast)
py.plot(fig)

Too bad to accept? Let us to fine tune the parameters to re-make the model.

In [None]:
Prophet?

In [None]:

# Make New Model with changepoints: divide into severel linear regressions to capture exponential increase
m = Prophet(
    changepoint_prior_scale=0.2, # increasing it will make the trend more flexible
    changepoint_range=0.98, # place potential changepoints in the first 98% of the time series
    yearly_seasonality=False,
    weekly_seasonality=False,
    daily_seasonality=True,
    seasonality_mode='additive'
)


m.fit(df_germany)

In [None]:
# Display new model's forecast of COVID19 confirmed cases into table
future = m.make_future_dataframe(periods=7)
forecast = m.predict(future)
forecast.tail(7)

In [None]:
# Plot new model's expected confirmed cases
fig = plot_plotly(m, forecast)
py.plot(fig)

In [None]:
# display changepoints as red dotted line on the plot
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)