# COVID-19 Novel Coronavirus: EDA & Forecast Number of Cases

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.graph_objects as go
from fbprophet import Prophet
import pycountry
import plotly.express as px
from collections import namedtuple
from fbprophet.plot import plot_plotly, plot_components_plotly
from sklearn.metrics import mean_squared_error

%load_ext nb_black

<IPython.core.display.Javascript object>

# Data Import, Preprocessing and EDA

In [2]:
df = pd.read_csv(
    "data/covid_19_data.csv",
    parse_dates=["Last Update"],
)
df.rename(
    columns={"ObservationDate": "Date", "Country/Region": "Country"}, inplace=True
)

df_confirmed = pd.read_csv("data/time_series_covid_19_confirmed.csv")
df_recovered = pd.read_csv("data/time_series_covid_19_recovered.csv")
df_deaths = pd.read_csv("data/time_series_covid_19_deaths.csv")

df_confirmed.rename(columns={"Country/Region": "Country"}, inplace=True)
df_recovered.rename(columns={"Country/Region": "Country"}, inplace=True)
df_deaths.rename(columns={"Country/Region": "Country"}, inplace=True)

<IPython.core.display.Javascript object>

In [3]:
df_confirmed.head()

Unnamed: 0,Province/State,Country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,9/14/20,9/15/20,9/16/20,9/17/20,9/18/20,9/19/20,9/20/20,9/21/20,9/22/20,9/23/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,38772,38815,38855,38872,38883,38919,39044,39074,39096,39145
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,11520,11672,11816,11948,12073,12226,12385,12535,12666,12787
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,48496,48734,48966,49194,49413,49623,49826,50023,50214,50400
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,1438,1438,1483,1483,1564,1564,1564,1681,1681,1753
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,3439,3569,3675,3789,3848,3901,3991,4117,4236,4363


<IPython.core.display.Javascript object>

## Earliest Cases

In [4]:
df.head()

Unnamed: 0,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,2020-01-22 17:00:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,2020-01-22 17:00:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,2020-01-22 17:00:00,0.0,0.0,0.0


<IPython.core.display.Javascript object>

## Latest Cases

In [5]:
df.tail()

Unnamed: 0,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
116800,116801,09/23/2020,Zaporizhia Oblast,Ukraine,2020-09-24 04:23:38,3149.0,49.0,1158.0
116801,116802,09/23/2020,Zeeland,Netherlands,2020-09-24 04:23:38,1270.0,72.0,0.0
116802,116803,09/23/2020,Zhejiang,Mainland China,2020-09-24 04:23:38,1282.0,1.0,1272.0
116803,116804,09/23/2020,Zhytomyr Oblast,Ukraine,2020-09-24 04:23:38,5191.0,92.0,2853.0
116804,116805,09/23/2020,Zuid-Holland,Netherlands,2020-09-24 04:23:38,29513.0,1372.0,0.0


<IPython.core.display.Javascript object>

In [6]:
df2 = (
    df.groupby(["Date", "Country", "Province/State"])[
        ["SNo", "Date", "Province/State", "Country", "Confirmed", "Deaths", "Recovered"]
    ]
    .sum()
    .reset_index()
)

<IPython.core.display.Javascript object>

## By Country View (World)

In [7]:
df2

Unnamed: 0,Date,Country,Province/State,SNo,Confirmed,Deaths,Recovered
0,01/22/2020,Hong Kong,Hong Kong,13,0.0,0.0,0.0
1,01/22/2020,Macau,Macau,21,1.0,0.0,0.0
2,01/22/2020,Mainland China,Anhui,1,1.0,0.0,0.0
3,01/22/2020,Mainland China,Beijing,2,14.0,0.0,0.0
4,01/22/2020,Mainland China,Chongqing,3,6.0,0.0,0.0
...,...,...,...,...,...,...,...
81443,09/23/2020,Ukraine,Vinnytsia Oblast,116773,5455.0,103.0,3580.0
81444,09/23/2020,Ukraine,Volyn Oblast,116779,7132.0,150.0,4931.0
81445,09/23/2020,Ukraine,Zakarpattia Oblast,116800,9343.0,294.0,4295.0
81446,09/23/2020,Ukraine,Zaporizhia Oblast,116801,3149.0,49.0,1158.0


<IPython.core.display.Javascript object>

## By Country View (India)

In [8]:
df.query('Country=="India"').groupby("Last Update")[
    ["Confirmed", "Deaths", "Recovered"]
].sum().reset_index()

Unnamed: 0,Last Update,Confirmed,Deaths,Recovered
0,2020-01-30 16:00:00,1.0,0.0,0.0
1,2020-01-31 08:15:00,1.0,0.0,0.0
2,2020-01-31 23:59:00,1.0,0.0,0.0
3,2020-02-02 06:03:08,2.0,0.0,0.0
4,2020-02-03 21:43:02,30.0,0.0,0.0
...,...,...,...,...
204,2020-09-20 04:22:56,5308014.0,85619.0,4208431.0
205,2020-09-21 04:23:18,5400619.0,86752.0,4303043.0
206,2020-09-22 04:23:11,5487580.0,87882.0,4396399.0
207,2020-09-23 04:23:40,5562663.0,88935.0,4497867.0


<IPython.core.display.Javascript object>

## By Country (Sorted)

In [9]:
df.groupby("Country")[["Confirmed", "Deaths", "Recovered"]].sum().reset_index()

Unnamed: 0,Country,Confirmed,Deaths,Recovered
0,Azerbaijan,1.0,0.0,0.0
1,"('St. Martin',)",2.0,0.0,0.0
2,Afghanistan,4134817.0,128968.0,2463817.0
3,Albania,704767.0,20902.0,398544.0
4,Algeria,3583765.0,162118.0,2438290.0
...,...,...,...,...
218,Western Sahara,1481.0,121.0,1112.0
219,Yemen,177772.0,49501.0,86008.0
220,Zambia,750629.0,18002.0,635259.0
221,Zimbabwe,394970.0,10243.0,240370.0


<IPython.core.display.Javascript object>

# Visualizations

In [10]:
df.groupby("Date").sum()

Unnamed: 0_level_0,SNo,Confirmed,Deaths,Recovered
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
01/22/2020,741,555.0,17.0,28.0
01/23/2020,2829,653.0,18.0,30.0
01/24/2020,4305,941.0,26.0,36.0
01/25/2020,6490,1438.0,42.0,39.0
01/26/2020,9071,2118.0,56.0,52.0
...,...,...,...,...
09/19/2020,84189917,30688150.0,955866.0,20922189.0
09/20/2020,84740481,30935011.0,959565.0,21159459.0
09/21/2020,85406364,31245797.0,963693.0,21394593.0
09/22/2020,85958413,31517087.0,969578.0,21624434.0


<IPython.core.display.Javascript object>

## Summary Plot of Worldwide Cases - Confirmed, Deaths & Recovered

In [11]:
confirmed = df.groupby("Date").sum()["Confirmed"].reset_index()
deaths = df.groupby("Date").sum()["Deaths"].reset_index()
recovered = df.groupby("Date").sum()["Recovered"].reset_index()

<IPython.core.display.Javascript object>

In [12]:
fig = go.Figure()
fig.add_trace(
    go.Bar(
        x=confirmed["Date"],
        y=confirmed["Confirmed"],
        name="Confirmed",
        marker_color="blue",
    )
)
fig.add_trace(
    go.Bar(x=deaths["Date"], y=deaths["Deaths"], name="Deaths", marker_color="Red")
)
fig.add_trace(
    go.Bar(
        x=recovered["Date"],
        y=recovered["Recovered"],
        name="Recovered",
        marker_color="Green",
    )
)

fig.update_layout(
    title="Worldwide Corona Virus Cases - Confirmed, Deaths, Recovered (Bar Chart)",
    xaxis_tickfont_size=14,
    yaxis=dict(
        title="Number of Cases",
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor="rgba(255, 255, 255, 0)",
        bordercolor="rgba(255, 255, 255, 0)",
    ),
    barmode="group",
    bargap=0.15,  # gap between bars of adjacent location coordinates.
    bargroupgap=0.1,  # gap between bars of the same location coordinate.
)
fig.show()

<IPython.core.display.Javascript object>

In [13]:
fig.write_html(
    "images/Worldwide Corona Virus Cases - Confirmed, Deaths, Recovered (Bar Chart).html"
)

<IPython.core.display.Javascript object>

In [14]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=confirmed["Date"],
        y=confirmed["Confirmed"],
        mode="lines+markers",
        name="Confirmed",
        line=dict(color="blue", width=2),
    )
)
fig.add_trace(
    go.Scatter(
        x=deaths["Date"],
        y=deaths["Deaths"],
        mode="lines+markers",
        name="Deaths",
        line=dict(color="Red", width=2),
    )
)
fig.add_trace(
    go.Scatter(
        x=recovered["Date"],
        y=recovered["Recovered"],
        mode="lines+markers",
        name="Recovered",
        line=dict(color="Green", width=2),
    )
)
fig.update_layout(
    title="Worldwide Corona Virus Cases - Confirmed, Deaths, Recovered (Line Chart)",
    xaxis_tickfont_size=14,
    yaxis=dict(
        title="Number of Cases",
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor="rgba(255, 255, 255, 0)",
        bordercolor="rgba(255, 255, 255, 0)",
    ),
)
fig.show()

<IPython.core.display.Javascript object>

In [15]:
fig.write_html(
    "images/Worldwide Corona Virus Cases - Confirmed, Deaths, Recovered (Line Chart).html"
)

<IPython.core.display.Javascript object>

In [16]:
df_confirmed = df_confirmed[["Province/State", "Lat", "Long", "Country"]]
df_temp = df.copy()
df_temp["Country"].replace({"Mainland China": "China"}, inplace=True)
df_latlong = pd.merge(df_temp, df_confirmed, on=["Country", "Province/State"])

<IPython.core.display.Javascript object>

In [17]:
fig = px.density_mapbox(
    df_latlong,
    lat="Lat",
    lon="Long",
    hover_name="Province/State",
    hover_data=["Confirmed", "Deaths", "Recovered"],
    animation_frame="Date",
    color_continuous_scale="Portland",
    radius=7,
    zoom=0,
    height=700,
)
fig.update_layout(
    title="Worldwide Corona Virus Cases Time Lapse - Confirmed, Deaths, Recovered",
    font=dict(family="Courier New, monospace", size=18, color="#7f7f7f"),
)
fig.update_layout(mapbox_style="open-street-map", mapbox_center_lon=0)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})


fig.show()

<IPython.core.display.Javascript object>

In [18]:
fig.write_html(
    "images/Worldwide Corona Virus Cases Time Lapse - Confirmed, Deaths, Recovered.html"
)

<IPython.core.display.Javascript object>

## Analysis by Country

We use Plotly (https://plot.ly/python/bubble-maps/) for the visuals

### Latest Date in Data

In [19]:
confirmed = df2.groupby(["Date", "Country"]).sum()[["Confirmed"]].reset_index()
deaths = df2.groupby(["Date", "Country"]).sum()[["Deaths"]].reset_index()
recovered = df2.groupby(["Date", "Country"]).sum()[["Recovered"]].reset_index()

<IPython.core.display.Javascript object>

In [20]:
latest_date = confirmed["Date"].max()
latest_date

'09/23/2020'

<IPython.core.display.Javascript object>

In [21]:
confirmed = confirmed[(confirmed["Date"] == latest_date)][["Country", "Confirmed"]]
deaths = deaths[(deaths["Date"] == latest_date)][["Country", "Deaths"]]
recovered = recovered[(recovered["Date"] == latest_date)][["Country", "Recovered"]]

<IPython.core.display.Javascript object>

### Countries/Regions Affected

In [22]:
all_countries = confirmed["Country"].unique()
print("Number of countries/regions with cases: " + str(len(all_countries)))
print("Countries/Regions with cases: ")
for i in all_countries:
    print("    " + str(i))

Number of countries/regions with cases: 24
Countries/Regions with cases: 
    Australia
    Brazil
    Canada
    Chile
    Colombia
    Denmark
    France
    Germany
    Hong Kong
    India
    Italy
    Japan
    Macau
    Mainland China
    Mexico
    Netherlands
    Pakistan
    Peru
    Russia
    Spain
    Sweden
    UK
    US
    Ukraine


<IPython.core.display.Javascript object>

We need to do some processing to the country names for this bubble plot as some of the countries are not found in `pycountry.countries` although they are, just that its due to them being listed acronyms and with additional words, such as Mainland China instead of China.

In [23]:
print(list(country.name for country in pycountry.countries))

['Aruba', 'Afghanistan', 'Angola', 'Anguilla', 'Åland Islands', 'Albania', 'Andorra', 'United Arab Emirates', 'Argentina', 'Armenia', 'American Samoa', 'Antarctica', 'French Southern Territories', 'Antigua and Barbuda', 'Australia', 'Austria', 'Azerbaijan', 'Burundi', 'Belgium', 'Benin', 'Bonaire, Sint Eustatius and Saba', 'Burkina Faso', 'Bangladesh', 'Bulgaria', 'Bahrain', 'Bahamas', 'Bosnia and Herzegovina', 'Saint Barthélemy', 'Belarus', 'Belize', 'Bermuda', 'Bolivia, Plurinational State of', 'Brazil', 'Barbados', 'Brunei Darussalam', 'Bhutan', 'Bouvet Island', 'Botswana', 'Central African Republic', 'Canada', 'Cocos (Keeling) Islands', 'Switzerland', 'Chile', 'China', "Côte d'Ivoire", 'Cameroon', 'Congo, The Democratic Republic of the', 'Congo', 'Cook Islands', 'Colombia', 'Comoros', 'Cabo Verde', 'Costa Rica', 'Cuba', 'Curaçao', 'Christmas Island', 'Cayman Islands', 'Cyprus', 'Czechia', 'Germany', 'Djibouti', 'Dominica', 'Denmark', 'Dominican Republic', 'Algeria', 'Ecuador', 'Egy

<IPython.core.display.Javascript object>

For example, the United Kingdom is "UK"

In [24]:
print("UK" in list(country.name for country in pycountry.countries))
print("United Kingdom" in list(country.name for country in pycountry.countries))

False
True


<IPython.core.display.Javascript object>

In [25]:
confirmed2 = confirmed.copy()
deaths2 = deaths.copy()
recovered2 = recovered.copy()
bubble_plot_dfs = [confirmed2, deaths2, recovered2]
for df_ in bubble_plot_dfs:
    df_["Country"].replace({"Mainland China": "China"}, inplace=True)
    df_["Country"].replace({"UK": "United Kingdom"}, inplace=True)
    df_["Country"].replace({"US": "United States"}, inplace=True)

<IPython.core.display.Javascript object>

In [26]:
countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3

confirmed2["iso_alpha"] = confirmed2["Country"].map(countries.get)
deaths2["iso_alpha"] = deaths2["Country"].map(countries.get)
recovered2["iso_alpha"] = recovered2["Country"].map(countries.get)

<IPython.core.display.Javascript object>

In [27]:
plot_data_confirmed = confirmed2[["iso_alpha", "Confirmed", "Country"]]
plot_data_deaths = deaths2[["iso_alpha", "Deaths"]]
plot_data_recovered = recovered2[["iso_alpha", "Recovered"]]

<IPython.core.display.Javascript object>

In [28]:
fig = px.scatter_geo(
    plot_data_confirmed,
    locations="iso_alpha",
    color="Country",
    hover_name="iso_alpha",
    size="Confirmed",
    projection="natural earth",
    title="Worldwide Confirmed Cases",
)
fig.show()

<IPython.core.display.Javascript object>

In [29]:
fig.write_html("images/Worldwide Confirmed Cases.html")

<IPython.core.display.Javascript object>

In [30]:
fig = px.scatter_geo(
    plot_data_deaths,
    locations="iso_alpha",
    color="Deaths",
    hover_name="iso_alpha",
    size="Deaths",
    projection="natural earth",
    title="Worldwide Death Cases",
)
fig.show()

<IPython.core.display.Javascript object>

In [31]:
fig.write_html("images/Worldwide Death Cases.html")

<IPython.core.display.Javascript object>

In [32]:
fig = px.scatter_geo(
    plot_data_recovered,
    locations="iso_alpha",
    color="Recovered",
    hover_name="iso_alpha",
    size="Recovered",
    projection="natural earth",
    title="Worldwide Recovered Cases",
)
fig.show()

<IPython.core.display.Javascript object>

In [33]:
fig.write_html("images/Worldwide Recovered Cases.html")

<IPython.core.display.Javascript object>

## Transforming Data for Forecasting

In [34]:
confirmed = df.groupby("Date").sum()["Confirmed"].reset_index()
deaths = df.groupby("Date").sum()["Deaths"].reset_index()
recovered = df.groupby("Date").sum()["Recovered"].reset_index()

<IPython.core.display.Javascript object>

In [35]:
confirmed.columns = ["ds", "y"]
confirmed["ds"] = pd.to_datetime(confirmed["ds"])

<IPython.core.display.Javascript object>

In [36]:
confirmed.head()

Unnamed: 0,ds,y
0,2020-01-22,555.0
1,2020-01-23,653.0
2,2020-01-24,941.0
3,2020-01-25,1438.0
4,2020-01-26,2118.0


<IPython.core.display.Javascript object>

# Forecasting Total Number of Cases Worldwide

## Prophet

We use Prophet, a procedure for forecasting time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects. It works best with time series that have strong seasonal effects and several seasons of historical data. Prophet is robust to missing data and shifts in the trend, and typically handles outliers well. It is also an open source software released by Facebook’s Core Data Science team. It is available for download on CRAN and PyPI.

## Why Prophet?

Prophet is easy to customize and use, and to produce accurate forecasts which can be explained intuitively with supporting evidence such as forecast seasonality components. It allows the analyst to explain in an intuitive and convinving manner to higher management as to why the forecasts are as such, and the plausible underlying factors that contribute to its result. Furthermore, it is also open-source! :)

## References 
- https://facebook.github.io/prophet/
- https://facebook.github.io/prophet/docs/
- https://github.com/facebook/prophet

## Forecasting Confirmed Cases Worldwide with Prophet (Baseline)

We perform a week's ahead forecast with Prophet, with 95% prediction intervals. Here, no tweaking of seasonality-related parameters and additional regressors are performed.

In [37]:
m = Prophet(interval_width=0.95)
m.fit(confirmed)
future = m.make_future_dataframe(periods=30)
future_confirmed = future.copy()  # for non-baseline predictions later on
future.tail()

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Unnamed: 0,ds
271,2020-10-19
272,2020-10-20
273,2020-10-21
274,2020-10-22
275,2020-10-23


<IPython.core.display.Javascript object>

In [38]:
forecast = m.predict(future)
forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
271,2020-10-19,38411980.0,37540980.0,39210390.0
272,2020-10-20,38673310.0,37751250.0,39491570.0
273,2020-10-21,38942300.0,37990590.0,39813850.0
274,2020-10-22,39208190.0,38203360.0,40102610.0
275,2020-10-23,39484980.0,38426330.0,40435490.0


<IPython.core.display.Javascript object>

### RMSE

In [39]:
true_pred_df = pd.merge(forecast[['ds', 'yhat']], confirmed, on='ds')[['y','yhat']]
mean_squared_error(true_pred_df['y'], true_pred_df['yhat'], squared=False)  

37348.6700345524

<IPython.core.display.Javascript object>

In [40]:
confirmed_forecast_plot = plot_plotly(m, forecast)
confirmed_forecast_plot

<IPython.core.display.Javascript object>

In [41]:
confirmed_forecast_plot.write_html("images/confirmed_forecast_plot.html")

<IPython.core.display.Javascript object>

In [42]:
forecast_components = plot_components_plotly(m, forecast)
forecast_components

<IPython.core.display.Javascript object>

In [43]:
forecast_components.write_html("images/confirmed_forecast_components.html")

<IPython.core.display.Javascript object>

## Forecasting Deaths Worldwide with Prophet (Baseline)

We perform a week's ahead forecast with Prophet, with 95% prediction intervals. Here, no tweaking of seasonality-related parameters and additional regressors are performed.

In [44]:
deaths.columns = ["ds", "y"]
deaths["ds"] = pd.to_datetime(deaths["ds"])

<IPython.core.display.Javascript object>

In [45]:
m = Prophet(interval_width=0.95)
m.fit(deaths)
future = m.make_future_dataframe(periods=30)
future_deaths = future.copy()  # for non-baseline predictions later on
future.tail()

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Unnamed: 0,ds
271,2020-10-19
272,2020-10-20
273,2020-10-21
274,2020-10-22
275,2020-10-23


<IPython.core.display.Javascript object>

In [46]:
forecast = m.predict(future)
forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
271,2020-10-19,1118945.0,1076692.0,1158938.0
272,2020-10-20,1124950.0,1081383.0,1166579.0
273,2020-10-21,1131029.0,1085623.0,1175069.0
274,2020-10-22,1137109.0,1089885.0,1184159.0
275,2020-10-23,1142987.0,1093088.0,1191271.0


<IPython.core.display.Javascript object>

### RMSE

In [47]:
true_pred_df = pd.merge(forecast[['ds', 'yhat']], deaths, on='ds')[['y','yhat']]
mean_squared_error(true_pred_df['y'], true_pred_df['yhat'], squared=False)  

1183.8854934398355

<IPython.core.display.Javascript object>

In [48]:
deaths_forecast_plot = plot_plotly(m, forecast)
deaths_forecast_plot

<IPython.core.display.Javascript object>

In [49]:
deaths_forecast_plot.write_html("images/deaths_forecast_plot.html")

<IPython.core.display.Javascript object>

In [50]:
forecast_components = plot_components_plotly(m, forecast)
forecast_components

<IPython.core.display.Javascript object>

In [51]:
forecast_components.write_html("images/deaths_forecast_components.html")

<IPython.core.display.Javascript object>

## Forecasting Recovered Cases Worldwide with Prophet (Baseline)

We perform a week's ahead forecast with Prophet, with 95% prediction intervals. Here, no tweaking of seasonality-related parameters and additional regressors are performed.

In [52]:
recovered.columns = ["ds", "y"]
recovered["ds"] = pd.to_datetime(recovered["ds"])

<IPython.core.display.Javascript object>

In [53]:
m = Prophet(interval_width=0.95)
m.fit(recovered)
future = m.make_future_dataframe(periods=30)
future_recovered = future.copy()  # for non-baseline predictions later on
future.tail()

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Unnamed: 0,ds
271,2020-10-19
272,2020-10-20
273,2020-10-21
274,2020-10-22
275,2020-10-23


<IPython.core.display.Javascript object>

In [54]:
forecast = m.predict(future)
forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
271,2020-10-19,27308850.0,26639880.0,27935280.0
272,2020-10-20,27527210.0,26811590.0,28191400.0
273,2020-10-21,27749360.0,26990470.0,28421860.0
274,2020-10-22,27957990.0,27182450.0,28668450.0
275,2020-10-23,28177430.0,27372810.0,28920860.0


<IPython.core.display.Javascript object>

### RMSE

In [55]:
true_pred_df = pd.merge(forecast[['ds', 'yhat']], recovered, on='ds')[['y','yhat']]
mean_squared_error(true_pred_df['y'], true_pred_df['yhat'], squared=False)  

22576.744237305145

<IPython.core.display.Javascript object>

In [56]:
recovered_forecast_plot = plot_plotly(m, forecast)
recovered_forecast_plot

<IPython.core.display.Javascript object>

In [57]:
recovered_forecast_plot.write_html("images/recovered_forecast_plot.html")

<IPython.core.display.Javascript object>

In [58]:
forecast_components = plot_components_plotly(m, forecast)
forecast_components

<IPython.core.display.Javascript object>

In [59]:
forecast_components.write_html("images/recovered_forecast_components.html")

<IPython.core.display.Javascript object>

Seems pretty decent for a baseline Prophet model in the case of the number of recovered! :)

From the forecast component plots, it is clear that there exists an upward trend in the number of cases worldwide. In the weekly trends plot, interestingly, it is the **highest at the weekends**!

In [60]:
confirmed_df = df2[["SNo", "Date", "Province/State", "Country", "Confirmed"]]
confirmed_df.head()

Unnamed: 0,SNo,Date,Province/State,Country,Confirmed
0,13,01/22/2020,Hong Kong,Hong Kong,0.0
1,21,01/22/2020,Macau,Macau,1.0
2,1,01/22/2020,Anhui,Mainland China,1.0
3,2,01/22/2020,Beijing,Mainland China,14.0
4,3,01/22/2020,Chongqing,Mainland China,6.0


<IPython.core.display.Javascript object>

In [61]:
confirmed_df.to_csv("data/confirmed_df.csv", index=False)

<IPython.core.display.Javascript object>

In [62]:
deaths_df = df2[["SNo", "Date", "Province/State", "Country", "Deaths"]]
deaths_df.head()

Unnamed: 0,SNo,Date,Province/State,Country,Deaths
0,13,01/22/2020,Hong Kong,Hong Kong,0.0
1,21,01/22/2020,Macau,Macau,0.0
2,1,01/22/2020,Anhui,Mainland China,0.0
3,2,01/22/2020,Beijing,Mainland China,0.0
4,3,01/22/2020,Chongqing,Mainland China,0.0


<IPython.core.display.Javascript object>

In [63]:
deaths_df.to_csv("data/deaths_df.csv", index=False)

<IPython.core.display.Javascript object>

In [64]:
recovered_df = df2[["SNo", "Date", "Province/State", "Country", "Recovered"]]
recovered_df.head()

Unnamed: 0,SNo,Date,Province/State,Country,Recovered
0,13,01/22/2020,Hong Kong,Hong Kong,0.0
1,21,01/22/2020,Macau,Macau,0.0
2,1,01/22/2020,Anhui,Mainland China,0.0
3,2,01/22/2020,Beijing,Mainland China,0.0
4,3,01/22/2020,Chongqing,Mainland China,0.0


<IPython.core.display.Javascript object>

In [65]:
recovered_df.to_csv("data/recovered_df.csv", index=False)

<IPython.core.display.Javascript object>