# Train LSTM on CGM Data

This notebook cell series demonstrates loading CGM data from `cgm/data/Clarity_Export_Harlow_Iain_2025-05-20_233242.csv`, preparing it into sliding windows, and comparing multiple recurrent architectures (LSTM vs GRU) and hyperparameter settings to predict the next glucose value.


In [None]:
# Add project root and import helper functions
import sys, os
sys.path.append(os.path.abspath('..'))  # ensure 'scripts' is on PYTHONPATH
from scripts.train_lstm_cgm import load_data, create_sequences

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

# Configuration
DATA_FILE = '../data/Clarity_Export_Harlow_Iain_2025-05-20_233242.csv'
SEQ_LENGTH = 24
MODELS = [
    {'type': 'LSTM', 'units': 50},
    {'type': 'LSTM', 'units': 100},
    {'type': 'GRU',  'units': 50},
]

# Load and scale
df = load_data(DATA_FILE)
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df.values)
X, y = create_sequences(scaled, SEQ_LENGTH)
X = X.reshape((len(X), SEQ_LENGTH, 1))

# Train, evaluate, and collect results
results = []
for cfg in MODELS:
    model = Sequential()
    layer = LSTM if cfg['type']=='LSTM' else GRU
    model.add(layer(cfg['units'], activation='relu', input_shape=(SEQ_LENGTH,1)))
    model.add(Dense(1))
    model.compile(optimizer=Adam(1e-3), loss='mse')

    es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(
        X, y,
        epochs=30,
        batch_size=32,
        validation_split=0.2,
        callbacks=[es],
        verbose=0
    )
    best_val = min(history.history['val_loss'])
    results.append({'model': f"{cfg['type']}_{cfg['units']}", 'val_loss': best_val})

# Compare
import pandas as pd
results_df = pd.DataFrame(results).sort_values('val_loss').reset_index(drop=True)
results_df