### Notes

Hopkins and Oxford datasets differ by a day but are the same thing (bc of updating at end of day vs. beginning of day).

---

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# HOPKINS — https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv
hopkins_confirmed = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv").iloc[:, 4:].sum()

In [None]:
# OXFORD – https://cowid.netlify.com/data/full_data.csv
oxford_confirmed = pd.read_csv("https://cowid.netlify.com/data/full_data.csv")
oxford_confirmed = oxford_confirmed.loc[oxford_confirmed["location"] == "World"].loc[:, "total_cases"]

In [None]:
plt.title("Worldwide Spread", fontsize = 12)
plt.plot([i for i in range(len(hopkins_confirmed))], list(hopkins_confirmed), label = 'Hopkins')
plt.plot([i for i in range(len(oxford_confirmed))], list(oxford_confirmed), label = 'Oxford')

plt.xlabel("Date", fontsize = 12)
plt.ylabel("# of People", fontsize = 12)
plt.legend()

plt.show()

---

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime,timedelta
from sklearn.metrics import mean_squared_error
from scipy.optimize import curve_fit
from scipy.optimize import fsolve
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
oxford_confirmed = pd.read_csv("https://cowid.netlify.com/data/full_data.csv")
oxford = oxford_confirmed.loc[oxford_confirmed["location"] == "United States"].loc[:, "total_cases"]

In [3]:
oxford

2391       1
2392       1
2393       2
2394       2
2395       5
2396       5
2397       5
2398       5
2399       6
2400       7
2401       8
2402      11
2403      11
2404      11
2405      12
2406      12
2407      12
2408      12
2409      12
2410      13
2411      13
2412      14
2413      15
2414      15
2415      15
2416      15
2417      15
2418      15
2419      15
2420      15
2421      35
2422      35
2423      35
2424      53
2425      53
2426      59
2427      59
2428      62
2429      62
2430      62
2431      64
2432     108
2433     129
2434     148
2435     213
2436     213
2437     213
2438     472
2439     696
2440     987
2441    1264
2442    1678
2443    1678
Name: total_cases, dtype: int64

In [None]:
plt.title("United States Spread", fontsize = 12)
plt.plot([i for i in range(len(oxford[:]))], list(oxford[:]), label = 'Oxford')

plt.xlabel("Date", fontsize = 12)
plt.ylabel("# of People", fontsize = 12)
plt.legend()

plt.show()

In [None]:
def logistic_model(x,a,b,c):
    return(c / (1 + np.exp(-(x - b) / a)))

x = [i for i in range(len(oxford[29:]))]
y = list(oxford[29:])

fit = curve_fit(logistic_model,x,y)

a = fit[0][0]
b = fit[0][1]
c = fit[0][2]

sol = int(fsolve(lambda x : logistic_model(x,a,b,c) - int(c),b))

errors = [np.sqrt(fit[1][i][i]) for i in [0,1,2]]

pred_x = list(range(max(x),sol))
plt.rcParams['figure.figsize'] = [7, 7]
plt.rc('font', size=14)
# Real data
plt.scatter(x,y,label="Real data",color="red")
# Predicted logistic curve
plt.plot(x+pred_x, [logistic_model(i,fit[0][0],fit[0][1],fit[0][2]) for i in x+pred_x], label="Logistic model" )

plt.legend()
plt.xlabel("Days since 1 January 2020")
plt.ylabel("Total number of infected people")
plt.ylim((min(y)*0.9,c*1.1))
plt.show()