<a href="https://colab.research.google.com/github/adeethyiashankar/HRV-diabetes-classification/blob/main/Forecast/Prophet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation, performance_metrics
from fbprophet.plot import plot_cross_validation_metric

import pandas as pd
import numpy as np
import itertools
import math
import statistics
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from google.colab import drive

import matplotlib.pyplot as plt

data_path = 'gdrive/My Drive/Summer Research/Simulated CGM Data/'
d1namo_data_path = 'gdrive/My Drive/Summer Research/Glucose/Diabetes/Cleaned Data/'
figure_path = 'gdrive/My Drive/Summer Research/Figures/Prophet/'

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Data (returns t, y, data_total, train_size)

In [None]:
def data(c,fnum):
  if c=='d1namo':
    d1namo_data = pd.read_csv(d1namo_data_path+'glucose ('+str(fnum)+')'+'.csv', skiprows=0)
    y = d1namo_data.iloc[:,2]
    length = len(y)
    data_total = int(length)
    train_size = data_total-18
    t = d1namo_data.iloc[:,0]+' '+d1namo_data.iloc[:,1]
    X = pd.concat((t,y),axis=1)
    X.columns = ['ds', 'y']
    return X, data_total, train_size
  elif c=='simulated':
    data_total = 5*512
    train_size = 4*512
    date = pd.read_csv(data_path+'adult#'+f'{fnum:03d}'+'.csv', skiprows=0)
    t = date.iloc[0:data_total,0]
    li = []
    for i in range(int(data_total/512)):
      file = data_path+'Wavelet Transformed Data/'+'adult#'+f'{fnum:03d}'+'_'+f'{(i+1):03d}'+'.csv'
      df = pd.read_csv(file, index_col=None, header=None)
      li.append(df)
    y = pd.concat(li, axis=0, ignore_index=True)
    X = pd.concat((t,y),axis=1)
    X.columns = ['ds', 'y']
    return X, data_total, train_size
  else:
    raise ValueError('Parameter must be d1namo or simulated')

Fit data to Prophet

In [None]:
def fitProphet(X, cps):
  m = Prophet(changepoint_prior_scale=cps)
  return m.fit(X)

Predict with Prophet

In [None]:
def predictProphet(prophet,periods):
  future = prophet.make_future_dataframe(periods=data_total-train_size, freq='5min', include_history=True)
  future.tail()
  forecast = m.predict(future)
  forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
  return forecast

Run Prophet

In [None]:
def runProphet(c,fnum):
  X, data_total, train_size = data(c,fnum)
  prophet = fitProphet(X, 0.1)
  forecast = predictProphet(prophet,data_total-train_size)
  fig1 = fit.plot(forecast)
  fig2 = fit.plot_components(forecast)
  return forecast, fig1, fig2

Run Prophet with Cross-validation

In [None]:
def CVProphet(c,fnum,period,horizon):
  X, data_total, train_size = data(c,fnum)

  param_grid = {'changepoint_prior_scale': [0.1]}
  all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
  rmses = []
  maes = []
  stds = []
  
  for params in all_params:
    m = Prophet(**params).fit(X)
    df_cv = cross_validation(m, initial=str(3*train_size)+'min', period=str(period)+'min', horizon=str(horizon)+'min', parallel='processes')
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmse_avg = sum(df_p['rmse'].values)/len(df_p['rmse'].values)
    mae_avg = sum(df_p['mae'].values)/len(df_p['mae'].values)
    rmses.append(rmse_avg)
    maes.append(mae_avg)

  fig_rmse = plot_cross_validation_metric(df_cv, metric='rmse')
  fig_mae = plot_cross_validation_metric(df_cv, metric='mae')
  tuning_results = pd.DataFrame(all_params, index=None)
  tuning_results['rmse'] = rmses
  tuning_results['mae'] = maes
  return fig_rmse, fig_mae, tuning_results

In [None]:
def runCVProphetForD1NAMO(minutes_ahead):
  d1namo_csv = pd.DataFrame(columns=range(2))
  d1namo_csv.columns = ['rmse','mae']

  for i in range(9):
    f1, f2, t = CVProphet('d1namo',i+1,30,minutes_ahead)
    f1.savefig(figure_path+'D1NAMO/'+str(minutes_ahead)+' min ahead/'+str(i+1)+' RMSE.png',
               bbox_inches='tight')
    f2.savefig(figure_path+'D1NAMO/'+str(minutes_ahead)+' min ahead/'+str(i+1)+' MAE.png',
               bbox_inches='tight')
    dt = {
        'rmse':t['rmse'].loc[0],
        'mae':t['mae'].loc[0]
    }
    d1namo_csv = d1namo_csv.append(dt, ignore_index=True)
    plt.close(f1)
    plt.close(f2)

  d1namo_csv.to_csv(figure_path+'D1NAMO/D1NAMO '+str(minutes_ahead)+' min ahead.csv', index=False)

In [None]:
for i in [30,60,90]:
  runCVProphetForD1NAMO(i)

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Making 95 forecasts with cutoffs between 2014-10-04 17:24:02 and 2014-10-06 16:24:02
INFO:fbprophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7f0b32103f90>
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Making 72 forecasts with cutoffs between 2014-10-03 23:17:03 and 2014-10-05 10:47:03
INFO:fbprophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7f0b307e43d0>
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run p

In [None]:
def runCVProphetForSimulated(minutes_ahead):
  simulated_csv = pd.DataFrame(columns=range(2))
  simulated_csv.columns = ['rmse','mae']

  for i in range(20):
    if i != 11:
      f1, f2, t = CVProphet('simulated',i+1,30,minutes_ahead)
      f1.savefig(figure_path+'Simulated/'+str(minutes_ahead)+' min ahead/'+str(i+1)+' RMSE.png')
      f2.savefig(figure_path+'Simulated/'+str(minutes_ahead)+' min ahead/'+str(i+1)+' MAE.png')
      dt = {
        'rmse':t['rmse'].loc[0],
        'mae':t['mae'].loc[0]
      }
      simulated_csv = simulated_csv.append(dt, ignore_index=True)
      plt.close(f1)
      plt.close(f2)

  simulated_csv.to_csv(figure_path+'Simulated/Simulated '+str(minutes_ahead)+' min ahead.csv', index=False)