In [21]:
from io import BytesIO
import matplotlib.dates as dates
import requests
import pandas as pd
import re
import scipy.optimize
import numpy as np
from matplotlib import pyplot as plt
from openpyxl import load_workbook
import ipywidgets as widgets

%matplotlib ipympl

In [22]:
response = requests.get("https://www.arcgis.com/sharing/rest/content/items/e5fd11150d274bebaaf8fe2a7a2bda11/data")
io = BytesIO(response.content)

In [23]:
workbook = load_workbook(io, read_only=True)

In [24]:
header, *rows = workbook.worksheets[0].values
df = pd.DataFrame({c: b for c, b in zip(header, zip(*rows))})
df.tail()

Unnamed: 0,DateVal,CMODateCount,CumCases,DailyDeaths,CumDeaths
55,2020-03-26,2129,11658,115.0,578.0
56,2020-03-27,2885,14543,181.0,759.0
57,2020-03-28,2546,17089,260.0,1019.0
58,2020-03-29,2433,19522,209.0,1228.0
59,2020-03-30,2619,22141,180.0,1408.0


In [25]:
df = df.rename(columns={'CMODateCount': 'new cases', 'CumCases': 'cases', 'DateVal': 'date'})
df['cases'] = df['new cases'].cumsum()
df.tail()

Unnamed: 0,date,new cases,cases,DailyDeaths,CumDeaths
55,2020-03-26,2129,11663,115.0,578.0
56,2020-03-27,2885,14548,181.0,759.0
57,2020-03-28,2546,17094,260.0,1019.0
58,2020-03-29,2433,19527,209.0,1228.0
59,2020-03-30,2619,22146,180.0,1408.0


In [26]:
# df = df.append({'date': pd.to_datetime(pd.to_datetime('today').date()), 'new cases': 530-456, 'cases': 530}, ignore_index=True)
# df.tail()

In [27]:
df.plot('date', 'cases', kind='scatter', title='UK Cases');

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [28]:
def fit_func(x, a, m, c):
    return a * np.exp(m*x + c)

In [29]:
prediction_dates = df['date'][:-1]
prediction_days = prediction_dates.dt.dayofyear
popt, pcov = scipy.optimize.curve_fit(fit_func, prediction_days, df['cases'][:prediction_days.size], p0=(0.3, 0.13, -2))
popt

  result = getattr(ufunc, method)(*inputs, **kwargs)


array([ 0.06148223,  0.18410472, -3.68389907])

In [30]:
def day_of_year_to_date(doy):
    return pd.to_datetime(doy-1, unit='D', origin=pd.Timestamp("2020-01-01"))

In [31]:
doy_next = prediction_days.to_numpy()[-1]+1
n_cases_after_prediction = fit_func(doy_next, *popt)
f"{n_cases_after_prediction:.0f} cases expected on {day_of_year_to_date(doy_next)}"

'24258 cases expected on 2020-03-30 00:00:00'

In [32]:
plt.plot(prediction_dates, fit_func(prediction_days, *popt), 'gx--');

In [33]:
def predict(change):
    n_days = change['new']
    t_projection = prediction_days.to_numpy()[-1] + np.arange(n_days) + 1
    dt_projection = pd.to_datetime(t_projection-1, unit='D', origin=pd.Timestamp("2020-01-01"))
    line_predict.set_data(dt_projection, fit_func(t_projection, *popt))
    print(dt_projection, )
    
    ax = plt.gca()
    ax.relim()
    ax.autoscale()

In [34]:
line_predict, = plt.plot(df['date'], df['cases'], 'rx--');

In [35]:
n_days = widgets.IntSlider(min=0, max=40, description="Days")
n_days.observe(predict, 'value')
n_days

IntSlider(value=0, description='Days', max=40)

## Plot cases per day vs cases

In [108]:
m = df['cases'] > 100

def linear(x, m, c):
    return m*x + c

popt_cases, pcov_cases = scipy.optimize.curve_fit(linear, 
                                                  np.log(df['cases'].loc[m]), 
                                                  np.log(df['new cases'].loc[m]))


df.plot("cases", "new cases", logx=True, logy=True, kind='scatter')
plt.loglog(df['cases'], np.exp(linear(np.log(df['cases']), *popt_cases)))
popt_cases

  fig = self.plt.figure(figsize=self.figsize)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

array([ 0.90742271, -0.9893926 ])