In [None]:
%matplotlib notebook
from datetime import datetime, timedelta
from matplotlib import pyplot as plt
from lxml import html
import requests
import pandas as pd
import numpy as np

In [None]:
# Wikipedia article with raw data:
URL = 'https://en.wikipedia.org/wiki/Timeline_of_the_2019%E2%80%9320_Wuhan_coronavirus_outbreak'

In [None]:
# Method to read the raw data from the Wikipedia article:
def read_wikipedia(url):
    page = requests.get(url)
    tree = html.fromstring(page.content)
    table = tree.xpath('/html/body/div[3]/div[3]/div[4]/div/table[2]/tbody')[0]
    data = []
    for row in table.xpath('.//tr'):
        date, confirmed = None, None
        try:
            confirmed = row.xpath('.//td[3]/text()')[0].strip()
            confirmed = confirmed.replace(',', '')
            if confirmed:
                confirmed = int(confirmed)
            else:
                confirmed = None
        except IndexError:
            confirmed = None
        try:
            date = row.xpath('.//td[1]/text()')[0].strip()
            if date:
                try:
                    date = datetime.strptime(date, '%Y-%m-%d')
                except ValueError:
                    date = None
            else:
                date = None
        except IndexError:
            date = None
        data.append((date, confirmed))
    data_frame = pd.DataFrame(data, columns=['date', 'confirmed'])
    data_frame.dropna(inplace=True)
    return data_frame

In [None]:
# Method to fit a exponential function:
def fit_exponential(x, y, xend=20):
    param = np.polyfit(x, np.log(y), 1)
    x_hat = np.linspace(0, xend, 100)
    y_hat = predict_exponential(x_hat, param)
    return x_hat, y_hat, param

In [None]:
# Methods for prediction:
def predict_exponential(x, param):
    return np.exp(param[1]) * np.exp(param[0] * x)


def predict_for_dates(dates, date_zero, param):
    predicted = []
    for date in dates:
        days_end = (datetime(**date) - date_zero).days
        predicted.append([days_end, predict_exponential(days_end, param)])
    return np.array(predicted)

In [None]:
# A method for plotting:
def plot_raw_and_fit(days, cases, x_hat, y_hat, date_zero, predicted=None,
                     logscale=False, xlims=None, ylims=None):
    fig, ax1 = plt.subplots()
    ax1.set_title('Novel coronavirus in Mainland China.')
    if logscale:
        ax1.set_yscale('log')
    ax1.scatter(days, cases, s=150, alpha=0.8)
    ax1.plot(x_hat, y_hat, color='black', label='Fitted')
    max_x = int(x_hat.max())
    min_x = min(ax1.get_xlim())
    if predicted is not None:
        ax1.scatter(predicted[:, 0], predicted[:, 1], s=200,
                    alpha=0.8, label='Predicted', marker='X')
        for values in predicted:
            xlims = ax1.get_xlim()
            ax1.plot(
                [min_x - 1, values[0]],
                [values[1], values[1]],
                ls=':', alpha=0.8, color='black'
            )
            ax1.text(min_x + 1, values[1]*1.4, '{:4.2g}'.format(values[1]), fontsize='x-large')
            ax1.set_xlim(xlims)
        max_x = max((max_x, int(predicted[:, 0].max())))
    ticks = [i for i in range(0, max_x + 1, 2)]
    times = [date_zero + timedelta(days=i) for i in ticks]
    times_str = [i.strftime('%d.%m.%Y') for i in times]
    ax1.set_xticks(ticks)
    ax1.set_xticklabels(times_str, rotation=30,
                        rotation_mode='anchor', ha='right')
    ax1.set(xlabel='Date', ylabel='Confirmed cases')
    if xlims:
        ax1.set_xlim(xlims)
    if ylims:
        ax1.set_ylim(ylims)
    ax1.legend()
    fig.tight_layout()
    return fig, ax1

In [None]:
# Get the data:
data = read_wikipedia(URL)

In [None]:
# Print raw data table:
data

In [None]:
# Get cases and days:
date_zero = data['date'].iloc[0]
days = (data['date'] - date_zero).dt.days.values
cases = data['confirmed'].values

In [None]:
# Do a fit up to February 4th:
date_end = datetime(year=2020, month=2, day=4)
days_end = (date_end - data['date'].iloc[0]).days + 1
x_hat, y_hat, param = fit_exponential(days[5:], cases[5:], xend=days_end)

In [None]:
# Plot the raw data and the fitted exponential:
fig1, ax1 = plot_raw_and_fit(days, cases, x_hat, y_hat,
                             date_zero, ylims=(-100, max(cases)*1.1))

In [None]:
# Predict cases on some specific dates:
dates_predict = [
    {'year': 2020, 'month': 2, 'day': 4},
    {'year': 2020, 'month': 2, 'day': 11}
]
predicted = predict_for_dates(dates_predict, date_zero, param)

In [None]:
# Show the predicted values:
x_hat2 = np.linspace(
    x_hat.min(), max((x_hat.max(), predicted[:, 0].max())), 100
)
y_hat2 = predict_exponential(x_hat2, param)
fig2, _ = plot_raw_and_fit(days, cases, x_hat2, y_hat2, date_zero,
                           predicted=predicted, logscale=True)