In [2]:
# Packages
import numpy as np
import sklearn 
import pandas as pd
import matplotlib.pyplot as plt
import scipy
from scipy.integrate import odeint
from scipy.optimize import minimize
from utility_code.utility import utils

In [3]:
def deriv(y, t, N, beta, gamma, delta):
    S, I, R, D = y
    dSdt = -beta * S * I / N
    dIdt = beta * S * I / N - (gamma+delta) * I 
    dRdt = gamma * I
    dDdt = delta * I
    return dSdt, dIdt, dRdt, dDdt

In [9]:
#import csvs
train = pd.read_csv("ucla2020-cs145-covid19-prediction/train.csv")
test = pd.read_csv("ucla2020-cs145-covid19-prediction/test.csv")
graph = pd.read_csv("ucla2020-cs145-covid19-prediction/graph.csv")

supp = pd.read_csv("data-test/raw_data_test.csv", skiprows=2, thousands=',')

supp = supp[supp['Location'].isin(train['Province_State'])]
supp['Population'] = supp['Number of COVID-19 Cases'].divide(supp['COVID-19 Cases per 1,000,000 Population']) * 1e6

states = pd.Series.unique(train['Province_State'])
num_states = len(states)
#stratify by state (into state dictionary)
statesdata = {}
for s in states:
    statesdata[s] = train.loc[train['Province_State'] == s,]


In [82]:
state = 'Alabama'
a = statesdata[state]
window = 5
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0, D0 = a.iloc[len(a)-window-1,6], a.iloc[len(a)-window-1, 5], a.iloc[len(a)-window-1,4]
if R0 != R0:
    for i in range(len(a)-1, 0, -1):
        check = a.iloc[i,5]
        if check == check:
            R0 = check
            break
        
        
# Total population, N.
N = supp.loc[supp['Location'] == state, 'Population'] # from internet

beta = a['Active'].mean(skipna = True)/N
gamma = 0.005
delta = a['Mortality_Rate'].mean()/10000

print(R0)
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0 - D0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
# delta is death rate

# A grid of time points (in days)
t = np.linspace(0, window, window+1)

# Initial conditions vector
y0 = S0, I0, R0, D0
# Integrate the SIR equations over the time grid, t.
ret = odeint(deriv, y0, t, args=(N, beta, gamma, delta))
S, I, R, D = ret.T

48028.0


In [83]:
I

array([69181.        , 69177.27207907, 69173.51707606, 69169.73499861,
       69165.92585568, 69162.08965253])

In [84]:
a

Unnamed: 0,ID,Province_State,Date,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,Testing_Rate,Hospitalization_Rate
0,0,Alabama,04-12-2020,3563,93,,3470.0,75.988020,21583.0,437.0,2.610160,460.300152,12.264945
50,50,Alabama,04-13-2020,3734,99,,3635.0,79.634933,29182.0,457.0,2.651312,622.363852,12.238886
100,100,Alabama,04-14-2020,3953,114,,3839.0,84.305541,33117.0,493.0,2.883886,706.285508,12.471541
150,150,Alabama,04-15-2020,4075,118,,3957.0,86.907433,34077.0,525.0,2.895706,726.759406,12.883436
200,200,Alabama,04-16-2020,4345,133,,4212.0,92.665716,36391.0,553.0,3.060990,776.110032,12.727273
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6850,6850,Alabama,08-27-2020,121023,2076,48028.0,70919.0,2468.252779,956809.0,14005.0,1.715376,19514.030166,11.572180
6900,6900,Alabama,08-28-2020,122185,2107,48028.0,72050.0,2491.951660,964863.0,,1.724434,19678.290744,
6950,6950,Alabama,08-29-2020,123889,2152,48028.0,73709.0,2526.704581,975818.0,,1.737039,19901.716945,
7000,7000,Alabama,08-30-2020,125235,2162,48028.0,75045.0,2554.156125,981062.0,,1.726354,20008.667835,


In [59]:
for state in states:
    #state = 'California'
    a = statesdata[state]
    window = 5
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0, D0 = a.iloc[len(a)-window-1,6], a.iloc[len(a)-window-1, 5], a.iloc[len(a)-window-1,4]
    if R0 != R0:
        for i in range(len(a)-1, 0, -1):
            check = a.iloc[i,5]
            if check == check:
                R0 = check
                break


    # Total population, N.
    N = supp.loc[supp['Location'] == state, 'Population'] # from internet

    beta = a['Active'].mean(skipna = True)/N
    gamma = a['Recovered'].divide(a['Confirmed']).mean()/100
    delta = a['Mortality_Rate'].mean()/10000

    print(I0)
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0 - D0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    # delta is death rate

    # A grid of time points (in days)
    t = np.linspace(0, window, window+1)

    # Initial conditions vector
    y0 = S0, I0, R0, D0
    # Integrate the SIR equations over the time grid, t.
    ret = odeint(deriv, y0, t, args=(N, beta, gamma, delta))
    S, I, R, D = ret.T
    

Unnamed: 0,ID,Province_State,Date,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,Testing_Rate,Hospitalization_Rate
4,4,California,04-12-2020,22795,640,,22155.0,58.137726,190328.0,5234.0,2.812020,485.423869,22.961176
54,54,California,04-13-2020,23931,714,,23217.0,61.035048,190882.0,3015.0,2.987756,486.836823,12.598721
104,104,California,04-14-2020,25356,767,,24589.0,64.669453,202208.0,5163.0,3.028869,515.723328,20.362044
154,154,California,04-15-2020,26686,860,,25826.0,68.061564,216486.0,5163.0,3.226411,552.138790,19.347223
204,204,California,04-16-2020,27677,956,,26721.0,70.589069,246400.0,5031.0,3.457745,628.433237,18.177548
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6854,6854,California,08-27-2020,691821,12677,,679144.0,1750.903765,10918415.0,,1.832410,27633.006121,
6904,6904,California,08-28-2020,697385,12805,,684580.0,1764.985483,11010637.0,,1.836145,27866.407314,
6954,6954,California,08-29-2020,702038,12894,,689144.0,1776.761586,11109630.0,,1.836653,28116.944977,
7004,7004,California,08-30-2020,705951,12937,,693014.0,1786.664851,11231829.0,,1.832563,28426.213833,
