In [29]:
import pandas as pd
import numpy as np
import math
import statistics
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import time
from google.colab import drive

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import learning_curve
from sklearn.kernel_ridge import KernelRidge
import matplotlib.pyplot as plt

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [30]:
data_path = 'gdrive/My Drive/Summer Research/Simulated CGM Data/Wavelet Transformed Data/'
d1namo_data_path = 'gdrive/My Drive/Summer Research/Glucose/Diabetes/Cleaned Data/'

Normalization

In [31]:
def normalize(y):
  scaler = MinMaxScaler(feature_range = (0, 1))
  y_scaled = scaler.fit_transform(y.reshape(-1,1))
  return y_scaled, scaler

Fit regression model and generate predictions for test

In [32]:
def KRRModel(t, y_scaled, scaler, fn):
  model = KernelRidge(kernel='chi2', gamma=0.9) #chi2 has very high training accuracy

  #K-fold Cross Validation
  cv = RepeatedKFold(n_splits=5, n_repeats=3, random_state=1)

  grid = [
    {'alpha': [0.02]}
  ]

  search = GridSearchCV(model, grid, scoring='neg_root_mean_squared_error', cv=cv, n_jobs=-1, verbose=0)
  results = search.fit(t[0:train_size].reshape(-1,1), y_scaled[0:train_size])
  predictions = search.predict(t[:train_size+(3*fn)].reshape(-1,1))
  predictions = scaler.inverse_transform(predictions)
  return predictions

Performance

In [33]:
def performance(y, predictions, fn):
  s = MinMaxScaler(feature_range = (np.min(predictions[:-(3*fn)]), np.max(predictions[:-(3*fn)])))
  #y_s = s.fit_transform(y.reshape(-1,1))
  plt.figure(figsize=(16,9))
  plt.plot(range(0,5*len(predictions),5), 18.016*y[:len(predictions)], color='blue', label='Actual CGM', linewidth=2)
  plt.plot(range(0,5*len(predictions)-90,5), 18.016*predictions[:-(3*fn)], color='red', linewidth=1)
  plt.plot(range(5*len(predictions)-90,5*len(predictions),5), 18.016*predictions[-(3*fn):], 'r--', label='Forecasted CGM', linewidth=2)
  plt.title('CGM Prediction')
  plt.xlabel('Time (minutes)')
  plt.ylabel('CGM (mg/dL)')
  plt.legend()

  plt.savefig('gdrive/My Drive/Summer Research/Figures/KRR/KRR testing D1NAMO/'+str(fnum)+'.png',
              bbox_inches='tight')
  plt.close()

  plt.figure(figsize=(16,9))
  plt.plot(range(0,90,5), 18.016*y[len(predictions)-(3*fn):len(predictions)], color='blue', label='Actual CGM', linewidth=2)
  plt.plot(range(0,90,5), 18.016*predictions[-(3*fn):], 'r--', label='Forecasted CGM', linewidth=2)
  plt.title('CGM 90 Minute Forecast')
  plt.xlabel('Forecast ahead (minutes)')
  plt.ylabel('CGM (mg/dL)')
  plt.legend()

  plt.savefig('gdrive/My Drive/Summer Research/Figures/KRR/KRR testing D1NAMO/'+str(fnum)+' closeup.png',
              bbox_inches='tight')
  plt.close()

In [34]:
def saveStats(y, predictions, fn):
  rmses = []
  stds = []
  avg_diffs = []

  for i in range(3):
    forecast = range(len(predictions)-(3*fn),len(predictions)-((2-i)*fn))
    rmse = math.sqrt(mean_squared_error(y[forecast], predictions[forecast]))
    std = statistics.stdev(y[forecast])
    avg_diff = 0
    for i in range(len(forecast)):
      avg_diff = avg_diff + float(abs(y[len(predictions)-(3*fn)+i] - predictions[len(predictions)-(3*fn)+i]))
    avg_diff = avg_diff / (len(forecast)-1)

    rmses.append(rmse)
    stds.append(std)
    avg_diffs.append(avg_diff)

  stats = {'RMSE':rmses, 'Standard Deviation':stds, 'MAE':avg_diffs}
  df = pd.DataFrame(stats)
  df.index = ['30 min', '60 min', '90 min']
  df.to_csv('gdrive/My Drive/Summer Research/Figures/KRR/KRR testing D1NAMO/'+str(fnum)+'.csv')

Run KRR

In [35]:
for fnum in range(1,10):
  fn=6
  d1namo_data = np.loadtxt(d1namo_data_path+'glucose ('+str(fnum)+')'+'.csv', delimiter=',', skiprows=1, usecols=[2])
  length = len(d1namo_data)

  data_total = int(length)
  train_size = data_total-(3*fn)

  t = np.array(list(range(length)))
  y = d1namo_data[range(length)]

  y_scaled, scaler = normalize(y)
  predictions = KRRModel(t, y_scaled, scaler, fn)
  performance(y, predictions, fn)
  saveStats(y, predictions, fn)

#For simulated CGM
#data_total = 11*512
#train_size = 10*512
#test_size = data_total - train_size
#t = np.array(list(range(data_total)))
#y = np.zeros(data_total)

#for i in range(int(data_total/512)):
#  y[range(512*i,512*(i+1))] = np.loadtxt(data_path+'adult#'+f'{fnum:03d}'+'_'+f'{(i+1):03d}'+'.csv', delimiter=',')