In [1]:
 import numpy as np
import pandas as pd
import matplotlib as mpl
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator as gen
from sklearn.preprocessing import MinMaxScaler, StandardScaler


col_list=["date","new_cases","positive_rate"]
dataset = pd.read_csv("new deaths dataset.csv",usecols=col_list)
dataset.describe()

Unnamed: 0,new_cases,positive_rate
count,500.0,500.0
mean,204.206,0.068814
std,296.233164,0.061634
min,0.0,0.0
25%,0.0,0.023
50%,89.0,0.045
75%,281.25,0.1
max,1583.0,0.319


In [2]:
dataset

Unnamed: 0,date,new_cases,positive_rate
0,2020-03-14,3.0,0.0
1,2020-03-15,3.0,0.0
2,2020-03-16,0.0,0.0
3,2020-03-17,1.0,0.0
4,2020-03-18,0.0,0.0
...,...,...,...
495,2021-07-22,0.0,0.0
496,2021-07-23,1090.0,0.0
497,2021-07-24,497.0,0.0
498,2021-07-25,423.0,0.0


In [3]:
dataset['date']=pd.to_datetime(dataset['date'], infer_datetime_format= True)
df= dataset.loc[:,dataset.columns !='date']

In [4]:
scaler= MinMaxScaler()

dataScaled=scaler.fit_transform(df)

In [5]:
dataScaled

array([[1.89513582e-03, 0.00000000e+00],
       [1.89513582e-03, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00],
       [6.31711939e-04, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00],
       [2.52684776e-03, 0.00000000e+00],
       [3.15855970e-03, 0.00000000e+00],
       [1.89513582e-03, 0.00000000e+00],
       [2.52684776e-03, 0.00000000e+00],
       [2.52684776e-03, 0.00000000e+00],
       [1.64245104e-02, 1.84952978e-01],
       [2.52684776e-02, 2.35109718e-01],
       [2.46367656e-02, 2.50783699e-01],
       [3.15855970e-03, 2.00626959e-01],
       [2.52684776e-03, 1.69278997e-01],
       [6.94883133e-03, 1.25391850e-01],
       [0.00000000e+00, 9.71786834e-02],
       [5.68540745e-03, 7.21003135e-02],
       [2.14782059e-02, 6.26959248e-02],
       [5.68540745e-03, 3.76175549e-02],
       [6.31711939e-04, 3.44827586e-02],
       [0.00000000e+00, 2.82131661e-02],
       [5.68540745e-03, 2.82131661e-02],
       [0.00000000e+00, 2.82131661e-02],
       [4.611497

In [6]:
features= dataScaled
target= dataScaled[:,0]

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size= 0.10, random_state=123, shuffle=False) 

In [8]:
win_length= 9
batch_size= 1
num_features=2
train_generator= gen(X_train,Y_train,length=win_length,sampling_rate=1, batch_size= batch_size)
test_generator= gen(X_test,Y_test,length=win_length,sampling_rate=1, batch_size= batch_size)

In [9]:
train_generator[0]

(array([[[0.00189514, 0.        ],
         [0.00189514, 0.        ],
         [0.        , 0.        ],
         [0.00063171, 0.        ],
         [0.        , 0.        ],
         [0.00252685, 0.        ],
         [0.00315856, 0.        ],
         [0.00189514, 0.        ],
         [0.00252685, 0.        ]]]),
 array([0.00252685]))

In [10]:
model= tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(128,input_shape=(win_length,num_features),return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.LSTM(128,return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.LSTM(64,return_sequences=False))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(1))

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 9, 128)            67072     
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 9, 128)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 9, 128)            131584    
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 9, 128)            0         
_________________________________________________________________
dropout (Dropout)            (None, 9, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0

In [12]:
early_stopping= tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=2,mode='min')
model.compile(loss=tf.losses.MeanSquaredError(),optimizer=tf.optimizers.Adam(),metrics=[tf.metrics.MeanAbsoluteError()])
history=model.fit(train_generator,epochs=50,validation_data=test_generator,shuffle=False, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50


In [13]:
model.evaluate_generator(test_generator, verbose=0)

Instructions for updating:
Please use Model.evaluate, which supports generators.


[0.03273167088627815, 0.11066210269927979]

In [14]:
model.evaluate_generator(test_generator, verbose=0)

[0.03273167088627815, 0.11066210269927979]

In [15]:
predictions=model.predict(test_generator)

In [16]:
predictions.shape[0]

41

In [17]:
predictions

array([[0.01759544],
       [0.01769169],
       [0.01764573],
       [0.01772415],
       [0.01760009],
       [0.01737597],
       [0.01729576],
       [0.01727613],
       [0.01726437],
       [0.01741517],
       [0.01727225],
       [0.01704935],
       [0.01701763],
       [0.01706746],
       [0.01728661],
       [0.01775923],
       [0.01787214],
       [0.01750956],
       [0.01721809],
       [0.01702482],
       [0.01690353],
       [0.016899  ],
       [0.01689902],
       [0.01688211],
       [0.01681495],
       [0.01640841],
       [0.01622978],
       [0.01616441],
       [0.01612034],
       [0.0158127 ],
       [0.01560752],
       [0.01560098],
       [0.01570542],
       [0.01575959],
       [0.01571576],
       [0.01572191],
       [0.01514144],
       [0.01480646],
       [0.01493114],
       [0.01561072],
       [0.0157766 ]], dtype=float32)

In [18]:
Y_test

array([0.        , 0.        , 0.08907138, 0.        , 0.0473784 ,
       0.03095389, 0.        , 0.        , 0.13013266, 0.07896399,
       0.        , 0.05622236, 0.        , 0.        , 0.09222994,
       0.11181301, 0.        , 0.01452937, 0.        , 0.        ,
       0.06948831, 0.06759318, 0.10486418, 0.17182565, 0.        ,
       0.        , 0.09665193, 0.15792798, 0.05369551, 0.        ,
       0.19330385, 0.16614024, 0.        , 0.1004422 , 0.28742893,
       0.        , 0.09033481, 0.        , 0.24384081, 0.        ,
       0.20277953, 0.24131396, 0.        , 0.2166772 , 0.        ,
       0.        , 0.68856601, 0.31396083, 0.26721415, 0.58938724])

In [19]:
X_test[:,1:][win_length:]

array([[0.05642633],
       [0.05642633],
       [0.05642633],
       [0.05329154],
       [0.05329154],
       [0.06896552],
       [0.06269592],
       [0.04702194],
       [0.05015674],
       [0.04075235],
       [0.03761755],
       [0.05015674],
       [0.04702194],
       [0.04388715],
       [0.07523511],
       [0.06896552],
       [0.06896552],
       [0.0846395 ],
       [0.10031348],
       [0.09717868],
       [0.0815047 ],
       [0.0815047 ],
       [0.10971787],
       [0.10658307],
       [0.10658307],
       [0.12225705],
       [0.11285266],
       [0.12539185],
       [0.09404389],
       [0.10344828],
       [0.09717868],
       [0.10971787],
       [0.10344828],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ]])

In [20]:
df_pred= pd.concat([pd.DataFrame(predictions), pd.DataFrame(X_test[:,1:][win_length:])], axis=1)

In [21]:
rev_trans= scaler.inverse_transform(df_pred)

In [22]:
rev_trans

array([[2.78535818e+01, 1.80000000e-02],
       [2.80059519e+01, 1.80000000e-02],
       [2.79331931e+01, 1.80000000e-02],
       [2.80573278e+01, 1.70000000e-02],
       [2.78609414e+01, 1.70000000e-02],
       [2.75061580e+01, 2.20000000e-02],
       [2.73791927e+01, 2.00000000e-02],
       [2.73481148e+01, 1.50000000e-02],
       [2.73294916e+01, 1.60000000e-02],
       [2.75682194e+01, 1.30000000e-02],
       [2.73419700e+01, 1.20000000e-02],
       [2.69891208e+01, 1.60000000e-02],
       [2.69389008e+01, 1.50000000e-02],
       [2.70177927e+01, 1.40000000e-02],
       [2.73646975e+01, 2.40000000e-02],
       [2.81128670e+01, 2.20000000e-02],
       [2.82915973e+01, 2.20000000e-02],
       [2.77176292e+01, 2.70000000e-02],
       [2.72562374e+01, 3.20000000e-02],
       [2.69502823e+01, 3.10000000e-02],
       [2.67582834e+01, 2.60000000e-02],
       [2.67511242e+01, 2.60000000e-02],
       [2.67511478e+01, 3.50000000e-02],
       [2.67243748e+01, 3.40000000e-02],
       [2.661806

In [23]:
df_final = df[predictions.shape[0]*-1:]

In [24]:
df_final.count()

new_cases        41
positive_rate    41
dtype: int64

In [25]:
df_final['new_cases_pred'] = rev_trans[:,0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['new_cases_pred'] = rev_trans[:,0]


In [26]:
df_final

Unnamed: 0,new_cases,positive_rate,new_cases_pred
459,125.0,0.018,27.853582
460,0.0,0.018,28.005952
461,89.0,0.018,27.933193
462,0.0,0.017,28.057328
463,0.0,0.017,27.860941
464,146.0,0.022,27.506158
465,177.0,0.02,27.379193
466,0.0,0.015,27.348115
467,23.0,0.016,27.329492
468,0.0,0.013,27.568219


In [27]:
from sklearn.metrics import mean_absolute_error
mae_new_cases=mean_absolute_error(df_final['new_cases'],df_final['new_cases_pred'])
print('Mean absolute error of new cases is:',mae_new_cases)

Mean absolute error of new cases is: 175.1780964114317


In [28]:
import math 
from statistics import mean
mean_new_cases = mean(df_final['new_cases'])
print(mean_new_cases)

180.58536585365854
