In [1]:
import pandas as pd
import numpy as np

In [2]:
from statsmodels.tsa.arima_model import ARMA

  import pandas.util.testing as tm


In [3]:
from sklearn.metrics import mean_squared_error as mse

In [4]:
def cumulative(dataframe):
    
    cumu_num_infections = dataframe.num_infections.cumsum()
    cumu_num_hosp = dataframe.num_hosp.cumsum()
    cumu_num_uci = dataframe.num_uci.cumsum()
    cumu_num_dead = dataframe.num_dead.cumsum()

    cumulative = pd.DataFrame({'cumu_num_infections': cumu_num_infections, 'cumu_num_hosp': cumu_num_hosp, 
                               'cumu_num_uci': cumu_num_uci, 'cumu_num_dead': cumu_num_dead})
    dataframe = pd.concat([dataframe, cumulative], axis = 1)
    return dataframe

In [5]:
def mov_7_ave(dataframe):
    dataframe['ave_7_num_infections'] = dataframe.iloc[:,1].rolling(window=7).mean()
    dataframe['ave_7_num_hosp'] = dataframe.iloc[:,2].rolling(window=7).mean()
    dataframe['ave_7_num_uci'] = dataframe.iloc[:,3].rolling(window=7).mean()
    dataframe['ave_7_num_dead'] = dataframe.iloc[:,4].rolling(window=7).mean()
    
    return dataframe

In [6]:
def evolution(dataframe):
    global db1
    
    bydate = db1.groupby('date').sum().reset_index()
    bydate = cumulative(bydate)
    bydate = mov_7_ave(bydate)
    
    return bydate

In [7]:
def clean_data():
    
    global db1
    db1 = db1[(db1.num_infections > 0)].reset_index()
    db1 = db1.drop(columns = ['index'], axis = 1)
    
    db1['date'] = pd.to_datetime(db1.date)
    
   
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Comunidad Valenciana' if x == 'Valenciana, Comunidad' else x)
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Comunidad de Madrid' if x == 'Madrid, Comunidad de' else x)
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Región de de Murcia' if x == 'Murcia, Región de' else x)
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Comunidad Foral de Navarra' if x == 'Navarra, Comunidad Foral de' else x)
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Principado de Asturias' if x == 'Asturias, Principado de' else x)
    
    
    db1['province'] = db1.province.apply(lambda x: 'Alicante' if x == 'Alicante/Alacant' else x)
    db1['province'] = db1.province.apply(lambda x: 'Castellón' if x == 'Castellón/Castelló' else x)
    db1['province'] = db1.province.apply(lambda x: 'Araba' if x == 'Araba/Álava' else x)
    db1['province'] = db1.province.apply(lambda x: 'Valencia' if x == 'Valencia/València' else x)
    
    db1['sex'] = db1.sex.apply(lambda x: 'UKNOWN' if x == 'NC' else x)
    
    db1['age_interval'] = db1.age_interval.apply(lambda x: 'UKNOWN' if x == 'NC' else x)

In [8]:
db1 =pd.read_csv('/content/covid-19-sample.csv')

In [9]:
db1

Unnamed: 0,date,autonomous_region,province,sex,age_interval,num_infections,num_hosp,num_uci,num_dead
0,2020-01-01,"Valenciana, Comunidad",Alicante/Alacant,H,0-9,0,0,0,0
1,2020-01-01,"Valenciana, Comunidad",Alicante/Alacant,H,10-19,0,0,0,0
2,2020-01-01,"Valenciana, Comunidad",Alicante/Alacant,H,20-29,0,0,0,0
3,2020-01-01,"Valenciana, Comunidad",Alicante/Alacant,H,30-39,0,0,0,0
4,2020-01-01,"Valenciana, Comunidad",Alicante/Alacant,H,40-49,0,0,0,0
...,...,...,...,...,...,...,...,...,...
822115,2021-06-10,Castilla y León,Zamora,NC,50-59,0,0,0,0
822116,2021-06-10,Castilla y León,Zamora,NC,60-69,0,0,0,0
822117,2021-06-10,Castilla y León,Zamora,NC,70-79,0,0,0,0
822118,2021-06-10,Castilla y León,Zamora,NC,80+,0,0,0,0


In [10]:
clean_data()

In [11]:
bydate = evolution(db1)

# 3 days

In [26]:
infections = bydate[['date', 'ave_7_num_infections']].set_index('date')
infections = infections[6:]

In [29]:
test

Unnamed: 0_level_0,ave_7_num_infections
date,Unnamed: 1_level_1
2021-06-08,3761.857143
2021-06-09,3593.714286
2021-06-10,3504.714286


In [27]:
train, test = infections[:-3], infections[-3:]

In [30]:
res=[]
for p in range(18, 20):
  for q in range(10, 14):
    try:
      model=ARMA(train, order = (p, q)).fit(disp=False)
      res.append((p, q, mse(model.predict(len(train), len(infections)-1), test)**0.5))
    except:
      continue



In [31]:
error = [e for p, q, e in res]
best = [res[i] for i, e in enumerate(error) if e == min(error)]

best

[(19, 12, 27.75666509249)]

# hosp

# 5 days

In [12]:
hosp = bydate[['date', 'ave_7_num_hosp']].set_index('date')
hosp = hosp[6:]

In [13]:
train, test = hosp[:-5], hosp[-5:]

In [17]:
res=[]
for p in range(1, 7):
  for q in range(5, 9):
    try:
      model=ARMA(train, order = (p, q)).fit(disp=False)
      res.append((p, q, mse(model.predict(len(train), len(hosp)-1), test)**0.5))
    except:
      continue



In [18]:
error = [e for p, q, e in res]
best = [res[i] for i, e in enumerate(error) if e == min(error)]

best

[(2, 8, 23.05619907643238)]

In [34]:
res=[]
for p in range(18, 20):
  for q in range(10, 14):
    try:
      model=ARMA(train, order = (p, q)).fit(disp=False)
      res.append((p, q, mse(model.predict(len(train), len(hosp)-1), test)**0.5))
    except:
      continue



In [35]:
error = [e for p, q, e in res]
best = [res[i] for i, e in enumerate(error) if e == min(error)]

best

[(19, 10, 85.79565932357109)]

# 7 days

In [19]:
train, test = hosp[:-7], hosp[-7:]

In [21]:
res=[]
for p in range(1, 7):
  for q in range(5, 9):
    try:
      model=ARMA(train, order = (p, q)).fit(disp=False)
      res.append((p, q, mse(model.predict(len(train), len(hosp)-1), test)**0.5))
    except:
      continue



In [22]:
error = [e for p, q, e in res]
best = [res[i] for i, e in enumerate(error) if e == min(error)]

best

[(2, 8, 43.36127310072354)]

In [23]:
res=[]
for p in range(18, 20):
  for q in range(10, 14):
    try:
      model=ARMA(train, order = (p, q)).fit(disp=False)
      res.append((p, q, mse(model.predict(len(train), len(hosp)-1), test)**0.5))
    except:
      continue

  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()


In [24]:
error = [e for p, q, e in res]
best = [res[i] for i, e in enumerate(error) if e == min(error)]

best

[(19, 11, 15.347751701487805)]