# Imports

In [1]:
import pandas as pd
import numpy as np

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import pylab as plt

In [3]:
from statsmodels.tsa.arima_model import ARMA

In [4]:
from sklearn.metrics import mean_squared_error as mse

In [5]:
plt.rcParams['figure.figsize']=(20, 20)

# Functions

## Data cleaning

In [6]:
def clean_data():
    
    global db1
    db1 = db1[(db1.num_infections > 0)].reset_index()
    db1 = db1.drop(columns = ['index'], axis = 1)
    
    db1['date'] = pd.to_datetime(db1.date)
    
   
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Comunidad Valenciana' if x == 'Valenciana, Comunidad' else x)
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Comunidad de Madrid' if x == 'Madrid, Comunidad de' else x)
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Región de de Murcia' if x == 'Murcia, Región de' else x)
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Comunidad Foral de Navarra' if x == 'Navarra, Comunidad Foral de' else x)
    db1['autonomous_region'] = db1.autonomous_region.apply(lambda x: 'Principado de Asturias' if x == 'Asturias, Principado de' else x)
    
    
    db1['province'] = db1.province.apply(lambda x: 'Alicante' if x == 'Alicante/Alacant' else x)
    db1['province'] = db1.province.apply(lambda x: 'Castellón' if x == 'Castellón/Castelló' else x)
    db1['province'] = db1.province.apply(lambda x: 'Araba' if x == 'Araba/Álava' else x)
    db1['province'] = db1.province.apply(lambda x: 'Valencia' if x == 'Valencia/València' else x)
    
    db1['sex'] = db1.sex.apply(lambda x: 'UKNOWN' if x == 'NC' else x)
    
    db1['age_interval'] = db1.age_interval.apply(lambda x: 'UKNOWN' if x == 'NC' else x)

## Cumulative

In [7]:
def cumulative(dataframe):
    
    cumu_num_infections = dataframe.num_infections.cumsum()
    cumu_num_hosp = dataframe.num_hosp.cumsum()
    cumu_num_uci = dataframe.num_uci.cumsum()
    cumu_num_dead = dataframe.num_dead.cumsum()

    cumulative = pd.DataFrame({'cumu_num_infections': cumu_num_infections, 'cumu_num_hosp': cumu_num_hosp, 
                               'cumu_num_uci': cumu_num_uci, 'cumu_num_dead': cumu_num_dead})
    dataframe = pd.concat([dataframe, cumulative], axis = 1)
    return dataframe

## Relative frequencies

In [8]:
def freq_rel(dataframe):
    
    dataframe['Number of infection (%)'] = [round(i/ dataframe['num_infections'].sum(),3)*100 for i in dataframe['num_infections']]
    dataframe['Number of hospitalisation (%)'] = [round(i/ dataframe['num_hosp'].sum(),3)*100 for i in dataframe['num_hosp']]
    dataframe['Number of Intensive Care Unit (%)'] = [round(i/ dataframe['num_uci'].sum(),3)*100 for i in dataframe['num_uci']]
    dataframe['Number of deaths (%)'] = [round(i/ dataframe['num_dead'].sum(),3)*100 for i in dataframe['num_dead']]

In [9]:
def organise(column):
    
    name = db1.groupby(column).sum()
    freq_rel(name)
    
    return name

## Moving averages: 7 days moving average

In [10]:
def mov_7_ave(dataframe):
    dataframe['ave_7_num_infections'] = dataframe.iloc[:,1].rolling(window=7).mean()
    dataframe['ave_7_num_hosp'] = dataframe.iloc[:,2].rolling(window=7).mean()
    dataframe['ave_7_num_uci'] = dataframe.iloc[:,3].rolling(window=7).mean()
    dataframe['ave_7_num_dead'] = dataframe.iloc[:,4].rolling(window=7).mean()
    
    return dataframe

## Evolution

In [11]:
def evolution(dataframe):
    global db1
    
    bydate = db1.groupby('date').sum().reset_index()
    bydate = cumulative(bydate)
    bydate = mov_7_ave(bydate)
    
    return bydate

# Data

In [12]:
db1 =pd.read_csv('covid-19-sample.csv')

In [13]:
clean_data()

In [14]:
bydate = evolution(db1)

In [111]:
bydate.tail(10)

Unnamed: 0,date,num_infections,num_hosp,num_uci,num_dead,cumu_num_infections,cumu_num_hosp,cumu_num_uci,cumu_num_dead,ave_7_num_infections,ave_7_num_hosp,ave_7_num_uci,ave_7_num_dead
470,2021-06-01,5119,221,24,15,3678944,351328,33136,75641,4314.142857,226.142857,25.857143,16.142857
471,2021-06-02,5106,228,14,24,3684050,351556,33150,75665,4287.571429,225.0,23.0,16.142857
472,2021-06-03,4447,194,27,18,3688497,351750,33177,75683,4211.0,218.857143,22.571429,17.285714
473,2021-06-04,4457,186,17,18,3692954,351936,33194,75701,4115.571429,207.142857,22.285714,18.285714
474,2021-06-05,2414,144,14,15,3695368,352080,33208,75716,4048.428571,201.285714,21.857143,19.142857
475,2021-06-06,1693,144,17,10,3697061,352224,33225,75726,4015.142857,196.714286,20.857143,17.714286
476,2021-06-07,4142,218,12,13,3701203,352442,33237,75739,3911.142857,190.714286,17.857143,16.142857
477,2021-06-08,4074,174,13,19,3705277,352616,33250,75758,3761.857143,184.0,16.285714,16.714286
478,2021-06-09,3929,149,11,10,3709206,352765,33261,75768,3593.714286,172.714286,15.857143,14.714286
479,2021-06-10,3824,144,8,13,3713030,352909,33269,75781,3504.714286,165.571429,13.142857,14.0


# Number of infections

In [18]:
infections = bydate[['date', 'ave_7_num_infections']].set_index('date')
infections = infections[6:]

## 1 day: found

In [20]:
train, test = infections[:-1], infections[-1:]

In [23]:
model=ARMA(train, order = (4, 2)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [24]:
mse(model.predict(len(train), len(infections)-1), test)**0.5

0.8948312256152349

# 3 days: check

In [64]:
train, test = infections[:-3], infections[-3:]

In [52]:
model=ARMA(train, order = (4, 12)).fit(disp=False) #4, 12

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [53]:
mse(model.predict(len(train), len(infections)-1), test)**0.5

102.2607415602929

In [62]:
model=ARMA(train, order = (19, 7)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:


  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()


In [63]:
mse(model.predict(len(train), len(infections)-1), test)**0.5

139.8448187561136

In [65]:
model=ARMA(train, order = (19, 12)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [66]:
mse(model.predict(len(train), len(infections)-1), test)**0.5

113.25477696833399

**algorithm gives another solution that doesn't match the result!!!**

In [27]:
mse(model.predict(len(train), len(infections)-1), test)**0.5

102.2607415602929

## 5 days: found

In [58]:
train, test = infections[:-5], infections[-5:]

In [70]:
model=ARMA(train, order = (19, 11)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [71]:
mse(model.predict(len(train), len(infections)-1), test)**0.5

21.709001060458572

## 7 days: found

In [34]:
train, test = infections[:-7], infections[-7:]

In [35]:
model=ARMA(train, order = (19, 7)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:


  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()


In [36]:
mse(model.predict(len(train), len(infections)-1), test)**0.5

62.618719214126635

# Number of hospitalisation

In [49]:
hosp = bydate[['date', 'ave_7_num_hosp']].set_index('date')
hosp = hosp[6:]

## 1 day: found

In [55]:
train, test = hosp[:-1], hosp[-1:]

In [56]:
model=ARMA(train, order = (2, 2)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [57]:
mse(model.predict(len(train), len(hosp)-1), test)**0.5

0.4924252030553191

# 3 days: can be improved

In [67]:
train, test = hosp[:-3], hosp[-3:]

In [68]:
model=ARMA(train, order = (6, 5)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [69]:
mse(model.predict(len(train), len(hosp)-1), test)**0.5

17.309328742227592

# 5 days: can be improved

In [72]:
train, test = hosp[:-5], hosp[-5:]

In [73]:
model=ARMA(train, order = (2, 8)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [74]:
mse(model.predict(len(train), len(hosp)-1), test)**0.5

23.474666310864254

## 7 days

In [112]:
train, test = hosp[:-7], hosp[-7:]

In [113]:
model=ARMA(train, order = (19, 11)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [114]:
mse(model.predict(len(train), len(hosp)-1), test)**0.5

17.752726136805787

# Number Intensive Care Units

In [75]:
uci = bydate[['date', 'ave_7_num_uci']].set_index('date')
uci = uci[6:]

## 1 day: found

In [76]:
train, test = uci[:-1], uci[-1:]

In [77]:
model=ARMA(train, order = (2, 4)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [78]:
mse(model.predict(len(train), len(uci)-1), test)**0.5

1.890054432097731

## 3 days: found

In [79]:
train, test = uci[:-3], uci[-3:]

In [80]:
model=ARMA(train, order = (5, 5)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [81]:
mse(model.predict(len(train), len(uci)-1), test)**0.5

0.5479489411117608

## 5 days

In [82]:
train, test = uci[:-5], uci[-5:]

In [83]:
model=ARMA(train, order = (4, 5)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [84]:
mse(model.predict(len(train), len(uci)-1), test)**0.5

5.567187425260014

## 7 days

In [85]:
train, test = uci[:-7], uci[-7:]

In [86]:
model=ARMA(train, order = (2, 6)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [87]:
mse(model.predict(len(train), len(uci)-1), test)**0.5

6.43941312604807

# Number of deaths

In [88]:
deaths = bydate[['date', 'ave_7_num_dead']].set_index('date')
deaths = deaths[6:]

## 1 day

In [89]:
train, test = deaths[:-1], deaths[-1:]

In [90]:
model=ARMA(train, order = (2, 3)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [91]:
mse(model.predict(len(train), len(deaths)-1), test)**0.5

0.20906624231148285

## 3 days

In [92]:
train, test = deaths[:-3], deaths[-3:]

In [93]:
model=ARMA(train, order = (4, 5)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [94]:
mse(model.predict(len(train), len(deaths)-1), test)**0.5

0.30310804371875627

## 5 days: found

In [99]:
train, test = deaths[:-5], deaths[-5:]

In [102]:
model=ARMA(train, order = (9, 6)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [103]:
mse(model.predict(len(train), len(deaths)-1), test)**0.5

3.6016905827662167

## 7 days

In [122]:
train, test = deaths[:-7], deaths[-7:] #19, 11 é o número mágico

In [123]:
model=ARMA(train, order = (19, 11)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




ValueError: The computed initial AR coefficients are not stationary
You should induce stationarity, choose a different model order, or you can
pass your own start_params.

In [107]:
mse(model.predict(len(train), len(deaths)-1), test)**0.5

9.564724436904507

In [124]:
model=ARMA(train, order = (2, 7)).fit(disp=False)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




In [125]:
mse(model.predict(len(train), len(deaths)-1), test)**0.5

9.564724436904507