In [1]:
import pandas as pd
import numpy as np
import xml.etree.cElementTree as et
from datetime import datetime
from pandas.core.tools.datetimes import to_datetime
import matplotlib.pyplot as plt
from datetime import timedelta

In [2]:
def read_xml_data(filename, selected_items):
  tree=et.parse(filename)
  root=tree.getroot()
  #extract selected items
  for child in root:
    if child.tag in selected_items:
      df = pd.DataFrame()
      for elem in child:      
        df1 = pd.DataFrame(elem.attrib, index=[0])
        #df = df.append(df1)d
        df = pd.concat([df, df1])
      #First column is the timestamp (dayfirst)      
      #df.iloc[:,0] = pd.to_datetime(df.iloc[:,0], dayfirst=True)
      #write to csv file using the timestamp as index      
      df.to_csv(child.tag+'.csv', index=0)  

In [3]:
def read_ts_file(filename):
  #reads csv file where the first column is a timestamp and the index column
  df = pd.read_csv (filename, parse_dates=[0], dayfirst=True, index_col=0)
  return df

In [4]:
def align_timeseries(align_df, source_df, source_columns, default_vals, dest_columns=[], include_index=False):
  # Generates a new dataframe with source_df data aligned to align_df
  # source_columns : list of columns from source_df to be included in result
  # dest_columns = column names of source_columns, if ommited column names will be the same as in source_df
  #                if specified, the first column has to be the name of the index in the destination
  # include_index: if source index must be included in the result
  # default_vals = list of default values for source_columns
 
  df_align = align_df.copy()
  df_align = df_align.sort_index()
  df2 = source_df.copy()
  df2 = df2.sort_index()
  df_dup = pd.DataFrame()
  if (dest_columns==[]) : dest_columns = source_columns.copy()
  if (len(dest_columns) == len(source_columns)) : dest_columns = ['source_ts'] + dest_columns # add column for index
  #add blank cells in destination
  df_align[dest_columns] = [np.nan] + default_vals
  for i in range(len(df2)):
      ts2 = df2.index[i]
      ts1_loc = df_align.index.get_loc(ts2, method='nearest')
      ts1 = df_align.index[ts1_loc]
      vals = [ts2]+list(df2.loc[ts2, source_columns])
      ts_old = df_align.loc[ts1, dest_columns[0]]
      if not pd.isna(ts_old): #there are duplicates
        if (abs(ts1.value-ts2.value) > abs(ts1.value-ts_old.value)): 
          #difference is greater, do not replace        
          df_dup = df_dup.append(df_align.loc[ts1])
          df_dup.loc[ts1, dest_columns] = vals
        else: #replace
          df_dup = df_dup.append(df_align.loc[ts1])
          df_align.loc[ts1, dest_columns] = vals
        continue
      df_align.loc[ts1, dest_columns] = vals
  
  if not(include_index) : df_align = df_align.drop( columns = dest_columns[0])

  return df_align, df_dup

In [5]:
def timedf(df):
  #creates a 5 minute interval timeseries dataframe based in index of df
  # df must have a timestamp index
  #time_df: result dataframe with timestamp index
  timestamp = pd.date_range(start=df.index[0], end=df.index[-1]  + timedelta(minutes=4), freq='5T')
  time_df = pd.DataFrame({'timestamp':timestamp})
  time_df.set_index('timestamp', inplace=True)
  return time_df

In [6]:
def find_gaps(df, greaterthan=5, units='m'):
  # find gaps relative to index, index must be a datetime field
  # greaterthan is the number of time units to be considered a gap
  # units 'm'=minutes, 'h'=hours
  i = 0
  gaps_df = pd.DataFrame()
  while i < len(df) - 1:
    ts = df.index[i]
    next_ts = df.index[i+1]
    duration = next_ts - ts
    if duration > np.timedelta64(greaterthan, units): 
      begin_gap = ts
      end_gap = next_ts
      gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
    i = i + 1
  gaps_df.sort_values(by=['Duration'], ascending=False, inplace=True)
  return gaps_df

In [7]:
def read_files():
  #read glucose
  glucose_df = read_ts_file('glucose_level.csv')
  glucose_df.rename(columns={"ts": "timestamp", "value": "glucose"}, inplace=True)
  print(glucose_df)
  #read meal
  meal_df = read_ts_file('meal.csv')
  print(meal_df)
  #read bolus
  meal_df = read_ts_file('bolus.csv')
  print(meal_df)

In [8]:
def impute_mean(df, column, by='hour'):
  # impute with mean by hours
  # in the future by could be another mean grouping criterion
  df[by] = df.index.hour
  df[column] = df.groupby(by)[column].apply(lambda x: x.fillna(x.mean()))
  df.drop(by, axis=1, inplace=True)

## Read Training  Data

In [9]:
#selected_items = ['glucose_level','bolus','meal']
read_xml_data(filename='c://aadm/570-ws-training.xml', selected_items=['glucose_level','bolus','meal'])

In [10]:
glucose_train = read_ts_file('glucose_level.csv')
glucose_train.rename(columns={"ts": "timestamp", "value": "glucose"}, inplace=True)
glucose_train.describe()

Unnamed: 0,glucose
count,10982.0
mean,187.492989
std,62.328928
min,46.0
25%,142.0
50%,189.0
75%,232.0
max,377.0


# Explore gaps

In [11]:
#Finding the length of the complete time series
time_df = timedf(glucose_train)
print('Missing intervals: ', len(time_df) - len(glucose_train) )

Missing intervals:  629


In [12]:
# All gaps 
gaps_df = find_gaps(glucose_train)
#print(gaps_df)

  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'D

In [13]:
#Adding NA in the whole range of cgm-training
cgmtrainmiss=glucose_train.resample('5T').mean()
cgmtrainmiss.head()

Unnamed: 0_level_0,glucose
ts,Unnamed: 1_level_1
2021-12-07 16:25:00,101.0
2021-12-07 16:30:00,100.0
2021-12-07 16:35:00,100.0
2021-12-07 16:40:00,99.0
2021-12-07 16:45:00,98.0


In [14]:
#mean imputation
impute_mean(cgmtrainmiss, 'glucose')
cgmtrainclean=cgmtrainmiss
cgmtrainclean.head()

Unnamed: 0_level_0,glucose
ts,Unnamed: 1_level_1
2021-12-07 16:25:00,101.0
2021-12-07 16:30:00,100.0
2021-12-07 16:35:00,100.0
2021-12-07 16:40:00,99.0
2021-12-07 16:45:00,98.0


In [15]:
#cgmtrainclean= cgmtrainmiss.interpolate(method="polynomial",order=3)
#cgmtrainclean.describe()

In [16]:
# spline smoothing
import rpy2
import rpy2.rinterface
%load_ext rpy2.ipython
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
splines = importr('splines') 
x_train=np.arange(len(cgmtrainclean))
y_train=cgmtrainclean['glucose']
r_y = robjects.FloatVector(y_train)
r_x = robjects.FloatVector(x_train)
r_smooth_spline = robjects.r['smooth.spline'] #extract R function# run smoothing function
spline1 = r_smooth_spline(x=r_x,y=r_y, spar=.01)
ySpline=np.array(robjects.r['predict'](spline1,robjects.FloatVector(x_train)).rx2('y'))
# print(ySpline)
# plt.figure(figsize=(12,6))
# plt.scatter(x_train,y_train,c="blue")
# plt.plot(x_train,ySpline,c='red')

Unable to determine R home: [WinError 2] The system cannot find the file specified


In [17]:
#cgmtrainclean= cgmtrainmiss.interpolate(method="spline",order=3)
#cgmtrainclean.head()
#missing_minutes =list(df2sp2[df2sp2['glucose'].isna()].index)
#missing_minutes
#cgmtrainclean.info()

In [18]:
from tsmoothie.smoother import *
from tsmoothie.utils_func import create_windows
#help(SplineSmoother)

In [19]:
#Imputation using Kalman smoothing and spline smoothing
from tsmoothie.smoother import *
from tsmoothie.utils_func import create_windows
#smoother1 = SplineSmoother(n_knots=100, spline_type='cubic_spline')
#smoother1.smooth(cgmtrainclean[['glucose']].T)
smoother = KalmanSmoother(component='level_season', 
                          component_noise={'level':0.1, 'season':0.1},n_seasons=7)
smoother.smooth(cgmtrainmiss[['glucose']].T)
glucosekf=smoother.smooth_data[0]
cgmtrainclean=cgmtrainmiss
#cgmtrainclean['glucose']=glucosekf
cgmtrainclean['glucose']=ySpline
#smoother1.smooth_data[0].mean()

## Reading testing data

In [20]:
#selected_items = ['glucose_level',"bolus','meal']
read_xml_data(filename='c://aadm/570-ws-testing.xml', selected_items=['glucose_level','bolus','meal'])

In [21]:
glucose_test = read_ts_file('glucose_level.csv')
glucose_test.rename(columns={"ts": "timestamp", "value": "glucose"}, inplace=True)
glucose_test.min()

glucose    60
dtype: int64

In [22]:
#Finding the length of the complete time series
time_df = timedf(glucose_test)
print('Missing intervals: ', len(time_df) - len(glucose_test) )

Missing intervals:  135


In [23]:
# All gaps 
gaps_df = find_gaps(glucose_test)
#print(gaps_df)

  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
  gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)


In [24]:
#Adding NA in the whole range of cgm-testing
cgmtestmiss=glucose_test.resample('5T').mean()
cgmtestmiss.head()

Unnamed: 0_level_0,glucose
ts,Unnamed: 1_level_1
2022-01-17 00:00:00,135.0
2022-01-17 00:05:00,143.0
2022-01-17 00:10:00,152.0
2022-01-17 00:15:00,159.0
2022-01-17 00:20:00,166.0


In [25]:
#def impute_mean(df, column,by="hour"):
  # impute with mean by hours
  # in the future by could be another grouping criterion
  #df[by] = df.index.hour
  #df[by] = ['{}:{:02d}'.format(r.hour, (r.minute//10)*10) for r in df.index]
  #print(df)
  #df[column] = df.groupby(by)[column].apply(lambda x: x.fillna(x.aggregate(np.mean)))
  #df.drop(by, axis=1, inplace=True)
#impute_mean(cgm559trainmiss, 'glucose')
#impute_mean(cgm559testmiss, 'glucose')
#cgm559trainclean=cgm559trainmiss
#cgm559testclean=cgm559testmiss
#cgm559trainclean.info()

In [26]:
#mean imputation
impute_mean(cgmtestmiss, 'glucose')
cgmtestclean=cgmtestmiss
cgmtestclean.head()

Unnamed: 0_level_0,glucose
ts,Unnamed: 1_level_1
2022-01-17 00:00:00,135.0
2022-01-17 00:05:00,143.0
2022-01-17 00:10:00,152.0
2022-01-17 00:15:00,159.0
2022-01-17 00:20:00,166.0


In [27]:
# #spline imputation
import rpy2
import rpy2.rinterface
%load_ext rpy2.ipython
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
splines = importr('splines') 
x_train=np.arange(len(cgmtestclean))
y_train=cgmtestclean['glucose']
r_y = robjects.FloatVector(y_train)
r_x = robjects.FloatVector(x_train)
r_smooth_spline = robjects.r['smooth.spline'] #extract R function# run smoothing function
spline1 = r_smooth_spline(x=r_x,y=r_y, spar= .01)
ySpline=np.array(robjects.r['predict'](spline1,robjects.FloatVector(x_train)).rx2('y'))
#print(ySpline)
# plt.figure(figsize=(12,6))
# plt.scatter(x_train,y_train,c="blue")
# plt.plot(x_train,ySpline,c='red')

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [28]:
#cgmtestclean= cgmtestmiss.interpolate(method="polynomial",order=3)
#missing_minutes =list(df2sp2[df2sp2['glucose'].isna()].index)
#missing_minutes
#cgmtestclean.describe()

In [29]:
#Imputation using Kalman filter
smoother1 = KalmanSmoother(component='level_season', 
                          component_noise={'level':0.1, 'season':0.1},n_seasons=7)
smoother1.smooth(cgmtestmiss[['glucose']].T)
# smoother1 = SplineSmoother(n_knots=100, spline_type='cubic_spline')
# smoother1.smooth(cgmtestclean[['glucose']].T)
glucosekf=smoother1.smooth_data[0]
cgmtestclean=cgmtestmiss
#cgmtestclean['glucose']=glucosekf
cgmtestclean['glucose']=ySpline
#smoother.smooth_data[0].mean()
#smoother.smooth_data[0].mean()

In [30]:
#Normalizing the data
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
datatrain = np.array(cgmtrainclean.values.astype('float32'))
datatest = np.array(cgmtestclean.values.astype('float32'))
max=datatest.max(axis=0)[0]
min=datatest.min(axis=0)[0]
scaler = MinMaxScaler(feature_range=(0, 1))
datatrain = scaler.fit_transform(datatrain).flatten()
datatest = scaler.fit_transform(datatest).flatten()
#n = len(data)
train_data=pd.DataFrame(datatrain)
test_data=pd.DataFrame(datatest)
print(max,min)

377.3104 70.65777


In [31]:
print(train_data.shape)
print(test_data.shape)

(11611, 1)
(2880, 1)


In [32]:
# convert time series into supervised learning problem
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars=1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [33]:
#Past history: One hour, Prediction horizon: 30 minutes 
n_lag=12
n_seq=6
epochs=50

In [34]:
data1=series_to_supervised(train_data, n_in=n_lag, n_out=n_seq, dropnan=True)
data2=series_to_supervised(test_data, n_in=n_lag, n_out=n_seq, dropnan=True)
train=data1.values
test=data2.values
#print(data1.shape)
print("test shape:",test.shape)

test shape: (2863, 18)


In [35]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers import TimeDistributed
from keras.layers.convolutional import MaxPooling1D
import datetime   
import math

In [36]:
n_features=1
timesteps=n_lag
X, y = train[:, 0:n_lag], train[:, n_lag:]
print(type(X))
print(X.shape)
print(y.shape)
rows_x = len(y)
#print("rows-x=",rows_x)
#print(trainX.shape)
X = np.reshape(X, (rows_x, timesteps, 1)) 
print(X.shape)
#print('train-train',X.shape)
#print('train-test',y.shape)
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(timesteps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(n_seq))
model.summary()
model.compile(optimizer='adam', loss='mse')

<class 'numpy.ndarray'>
(11594, 12)
(11594, 6)
(11594, 12, 1)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 11, 64)            192       
                                                                 
 max_pooling1d (MaxPooling1D  (None, 5, 64)            0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 320)               0         
                                                                 
 dense (Dense)               (None, 50)                16050     
                                                                 
 dense_1 (Dense)             (None, 6)                 306       
                                                                 
Total params: 16,548
Trainable params: 16,548
Non-trainable 

In [37]:
import time
start_time = time.time()
model.fit(X, y, epochs=200,batch_size=16,validation_split=.2,verbose=2)
#model.fit(X, y, epochs=200,validation_split=.2,verbose=2)

Epoch 1/200
580/580 - 1s - loss: 0.0059 - val_loss: 7.0519e-04 - 1s/epoch - 2ms/step
Epoch 2/200
580/580 - 1s - loss: 2.2492e-04 - val_loss: 6.2560e-05 - 692ms/epoch - 1ms/step
Epoch 3/200
580/580 - 1s - loss: 5.6999e-05 - val_loss: 5.1126e-05 - 717ms/epoch - 1ms/step
Epoch 4/200
580/580 - 1s - loss: 6.3533e-05 - val_loss: 4.7421e-05 - 766ms/epoch - 1ms/step
Epoch 5/200
580/580 - 1s - loss: 5.9315e-05 - val_loss: 5.3373e-05 - 742ms/epoch - 1ms/step
Epoch 6/200
580/580 - 1s - loss: 6.0956e-05 - val_loss: 1.0538e-04 - 779ms/epoch - 1ms/step
Epoch 7/200
580/580 - 1s - loss: 5.6142e-05 - val_loss: 5.5279e-05 - 703ms/epoch - 1ms/step
Epoch 8/200
580/580 - 1s - loss: 6.1472e-05 - val_loss: 7.3827e-05 - 701ms/epoch - 1ms/step
Epoch 9/200
580/580 - 1s - loss: 5.7435e-05 - val_loss: 4.5032e-05 - 651ms/epoch - 1ms/step
Epoch 10/200
580/580 - 1s - loss: 5.9341e-05 - val_loss: 4.3999e-05 - 670ms/epoch - 1ms/step
Epoch 11/200
580/580 - 1s - loss: 6.8721e-05 - val_loss: 5.0867e-05 - 679ms/epoch - 1m

Epoch 90/200
580/580 - 1s - loss: 4.8479e-06 - val_loss: 3.7650e-06 - 773ms/epoch - 1ms/step
Epoch 91/200
580/580 - 1s - loss: 3.3275e-06 - val_loss: 4.3463e-06 - 779ms/epoch - 1ms/step
Epoch 92/200
580/580 - 1s - loss: 3.4329e-06 - val_loss: 3.0739e-06 - 895ms/epoch - 2ms/step
Epoch 93/200
580/580 - 1s - loss: 3.5776e-06 - val_loss: 8.1301e-06 - 875ms/epoch - 2ms/step
Epoch 94/200
580/580 - 1s - loss: 3.9595e-06 - val_loss: 4.2418e-06 - 865ms/epoch - 1ms/step
Epoch 95/200
580/580 - 1s - loss: 3.6894e-06 - val_loss: 1.9245e-05 - 1s/epoch - 2ms/step
Epoch 96/200
580/580 - 1s - loss: 3.3486e-06 - val_loss: 3.3590e-06 - 1s/epoch - 2ms/step
Epoch 97/200
580/580 - 1s - loss: 3.7037e-06 - val_loss: 2.6939e-05 - 1s/epoch - 2ms/step
Epoch 98/200
580/580 - 1s - loss: 3.1480e-06 - val_loss: 1.0835e-05 - 766ms/epoch - 1ms/step
Epoch 99/200
580/580 - 1s - loss: 5.6040e-06 - val_loss: 2.9224e-06 - 710ms/epoch - 1ms/step
Epoch 100/200
580/580 - 1s - loss: 2.5606e-06 - val_loss: 5.0554e-06 - 710ms/ep

Epoch 178/200
580/580 - 1s - loss: 2.3443e-06 - val_loss: 4.1377e-06 - 638ms/epoch - 1ms/step
Epoch 179/200
580/580 - 1s - loss: 3.4367e-06 - val_loss: 9.4126e-06 - 685ms/epoch - 1ms/step
Epoch 180/200
580/580 - 1s - loss: 2.6457e-06 - val_loss: 8.2429e-06 - 809ms/epoch - 1ms/step
Epoch 181/200
580/580 - 1s - loss: 2.3961e-06 - val_loss: 6.4546e-06 - 658ms/epoch - 1ms/step
Epoch 182/200
580/580 - 1s - loss: 3.2226e-06 - val_loss: 5.2582e-06 - 640ms/epoch - 1ms/step
Epoch 183/200
580/580 - 1s - loss: 2.1520e-06 - val_loss: 4.4613e-06 - 673ms/epoch - 1ms/step
Epoch 184/200
580/580 - 1s - loss: 3.2156e-06 - val_loss: 4.8934e-06 - 737ms/epoch - 1ms/step
Epoch 185/200
580/580 - 1s - loss: 1.9805e-06 - val_loss: 6.3774e-06 - 776ms/epoch - 1ms/step
Epoch 186/200
580/580 - 1s - loss: 3.2225e-06 - val_loss: 4.1480e-06 - 745ms/epoch - 1ms/step
Epoch 187/200
580/580 - 1s - loss: 2.3351e-06 - val_loss: 6.8548e-06 - 754ms/epoch - 1ms/step
Epoch 188/200
580/580 - 1s - loss: 3.6819e-06 - val_loss: 5.

<keras.callbacks.History at 0x1e74dec8430>

In [38]:
#Normalized Testing predictions 
xt=test[:,0:12]
xt=np.reshape(xt,(xt.shape[0],12,1))
a=model.predict(xt,batch_size=n_seq)
print(a)

[[0.41719362 0.41937268 0.41936922 0.41707924 0.4133876  0.4073137 ]
 [0.42792493 0.4304865  0.43074816 0.42917383 0.42638707 0.42131382]
 [0.43800023 0.44096506 0.44179028 0.44122413 0.43946993 0.43551514]
 ...
 [0.42865947 0.42607677 0.4223253  0.4179763  0.41269612 0.40621462]
 [0.43081295 0.42866522 0.42561936 0.42216274 0.4179967  0.41259214]
 [0.43434328 0.43310744 0.43130556 0.42924577 0.4267465  0.4230691 ]]


In [39]:
#Denormalizing prediction
forec1=a*(max-min)+min
print(forec1.shape)

(2863, 6)


In [40]:
#Denomalizing the actual values
test=pd.DataFrame(test)
testy=test.loc[:,12:18]
actual=testy*(max-min)+min
print(actual.shape)

(2863, 6)


In [41]:
#Computing the RMSE
forec1=np.array(forec1)
actual=np.array(actual)
diff=actual-forec1
#print(diff.shape)
#np.sqrt(np.mean((diff)**2,axis=0))
print("RMSE:",np.sqrt(np.mean((diff)**2,axis=0)).mean())

RMSE: 4.755633


In [42]:
print("--- %s seconds ---" % (time.time() - start_time))

--- 142.58808493614197 seconds ---
