In [1]:
import pandas as pd
# CSV 
file_path = '/Users/bg.lim/Downloads/TAMU_Agri/New_CGM/pre_thre_glucose.csv'

# 
df = pd.read_csv(file_path, encoding="utf-8")

# Ideal Model that predict 15min after glucose

In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# 1. Sort data and create target column (15 min ahead)
df = df.sort_values(by=["StudyID", "Timestamp"])
df["Glucose_+15m"] = df.groupby("StudyID")["Glucose"].shift(-1)

# 2. Convert time string (e.g., '01:30') to float (1.5)
def time_str_to_float(time_str):
    h, m = map(int, time_str.split(":"))
    return h + m / 60

df["Time_24h"] = df["Time_24h"].apply(time_str_to_float)

# 3. Define selected features
features = [
    'Glucose',  'mean_glucose_previous_1day', 'Previous_1_Glucose',
    '15m_G_Diff_1', 'Previous_2_Glucose', '15m_G_Diff_2',
    'Previous_3_Glucose', '15m_G_Diff_3', 'Previous_4_Glucose', '15m_G_Diff_4',
    'Previous_5_Glucose', '15m_G_Diff_5', 'Previous_6_Glucose', '15m_G_Diff_6',
    'Previous_7_Glucose', '15m_G_Diff_7', 'Previous_8_Glucose', '15m_G_Diff_8',
    'Previous_9_Glucose', '15m_G_Diff_9', 'Previous_10_Glucose', 'is_weekend', 'Time_24h',
    'mean_intensity_1h', 'mean_intensity_2h', 'mean_intensity_3h', 'HR_mean_1h',
    'HR_std_1h', 'HR_slope_1h', 'HR_mean_2h', 'HR_std_2h', 'HR_slope_2h',
    'HR_mean_3h', 'HR_std_3h', 'HR_slope_3h', 'TimeInBed', 'Efficiency',
    'EMA_T_Diff.1', 'hungry_weighted', 'bored_weighted', 'How_stressed_weighted', 'How_anxious_weighted',
    'How_tired_weighted', 'Z_Previous_5', 'Z_Previous_6', 'Z_Previous_7', 'Z_Previous_8',
    'Z_Previous_9', 'Z_Previous_10', 'Z_Previous_4', '1_1.5h_pre_mean', '1_1.5h_pre_std',
    '1_1.5h_pre_slope', '1.5_2h_pre_mean', '1.5_2h_pre_std', '1.5_2h_pre_slope', '2_2.5h_pre_mean',
    '2_2.5h_pre_std', '2_2.5h_pre_slope', 'Glucose_q25_pre_1_2.5h', 'Glucose_median_pre_1_2.5h', 'Glucose_q75_pre_1_2.5h',
    'Glucose_min_pre_1_2.5h', 'Glucose_max_pre_1_2.5h', 'Glucose_iqr_pre_1_2.5h', 'Glucose_range_pre_1_2.5h', 'Glucose_spread_ratio_pre_1_2.5h',
    'Glucose_skew_hint_pre_1_2.5h', 'Glucose_std_pre_1_2.5h', 'Glucose_iqr_to_std_pre_1_2.5h', 'HR', 'Intensity',
    'HR_mean_5d', 'HR_std_5d', 'Intensity_mean_5d', 'Intensity_std_5d', 'Z_HR',
    'Z_Intensity',  'Glucose_pre_1h', 'Glucose_pre_1.5h', 'Glucose_pre_2h'
]

# 4. Drop rows with missing target or features
df = df[df["Glucose_+15m"].notnull()]
df = df.dropna(subset=features)

# 5. Scale feature matrix
X = df[features]
y = df["Glucose_+15m"].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 6. 5-Fold CV
kf = KFold(n_splits=5, shuffle=True, random_state=42)
mae_list, rmse_list, r2_list = [], [], []

for train_idx, val_idx in kf.split(X_scaled):
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)

    mae_list.append(mean_absolute_error(y_val, preds))
    rmse_list.append(np.sqrt(mean_squared_error(y_val, preds)))
    r2_list.append(r2_score(y_val, preds))

# 7. Print results
print("=== Glucose Prediction +15m ===")
print(f"MAE : {np.mean(mae_list):.2f}")
print(f"RMSE: {np.mean(rmse_list):.2f}")
print(f"R²  : {np.mean(r2_list):.2f}")


=== Glucose Prediction +15m ===
MAE : 3.72
RMSE: 5.48
R²  : 0.92


# glucose after 30min

In [16]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# 1. Sort data and create target column (30 min ahead)
df = df.sort_values(by=["StudyID", "Timestamp"])
df["Glucose_+30m"] = df.groupby("StudyID")["Glucose"].shift(-2)

# 2. Convert time string (e.g., '01:30') to float (1.5)
def time_str_to_float(time_str):
    if isinstance(time_str, str):
        h, m = map(int, time_str.split(":"))
        return h + m / 60
    return time_str  # 이미 float이거나 NaN이면 그대로 반환

df['Time_24h'] = df['Time_24h'].apply(time_str_to_float)


# 3. Define selected features
features = [
    'Glucose',  'mean_glucose_previous_1day', 'Previous_1_Glucose',
    '15m_G_Diff_1', 'Previous_2_Glucose', '15m_G_Diff_2',
    'Previous_3_Glucose', '15m_G_Diff_3', 'Previous_4_Glucose', '15m_G_Diff_4',
    'Previous_5_Glucose', '15m_G_Diff_5', 'Previous_6_Glucose', '15m_G_Diff_6',
    'Previous_7_Glucose', '15m_G_Diff_7', 'Previous_8_Glucose', '15m_G_Diff_8',
    'Previous_9_Glucose', '15m_G_Diff_9', 'Previous_10_Glucose', 'is_weekend', 'Time_24h',
    'mean_intensity_1h', 'mean_intensity_2h', 'mean_intensity_3h', 'HR_mean_1h',
    'HR_std_1h', 'HR_slope_1h', 'HR_mean_2h', 'HR_std_2h', 'HR_slope_2h',
    'HR_mean_3h', 'HR_std_3h', 'HR_slope_3h', 'TimeInBed', 'Efficiency',
    'EMA_T_Diff.1', 'hungry_weighted', 'bored_weighted', 'How_stressed_weighted', 'How_anxious_weighted',
    'How_tired_weighted', 'Z_Previous_5', 'Z_Previous_6', 'Z_Previous_7', 'Z_Previous_8',
    'Z_Previous_9', 'Z_Previous_10', 'Z_Previous_4', '1_1.5h_pre_mean', '1_1.5h_pre_std',
    '1_1.5h_pre_slope', '1.5_2h_pre_mean', '1.5_2h_pre_std', '1.5_2h_pre_slope', '2_2.5h_pre_mean',
    '2_2.5h_pre_std', '2_2.5h_pre_slope', 'Glucose_q25_pre_1_2.5h', 'Glucose_median_pre_1_2.5h', 'Glucose_q75_pre_1_2.5h',
    'Glucose_min_pre_1_2.5h', 'Glucose_max_pre_1_2.5h', 'Glucose_iqr_pre_1_2.5h', 'Glucose_range_pre_1_2.5h', 'Glucose_spread_ratio_pre_1_2.5h',
    'Glucose_skew_hint_pre_1_2.5h', 'Glucose_std_pre_1_2.5h', 'Glucose_iqr_to_std_pre_1_2.5h', 'HR', 'Intensity',
    'HR_mean_5d', 'HR_std_5d', 'Intensity_mean_5d', 'Intensity_std_5d', 'Z_HR',
    'Z_Intensity',  'Glucose_pre_1h', 'Glucose_pre_1.5h', 'Glucose_pre_2h'
]

# 4. Drop rows with missing target or features
df = df[df["Glucose_+30m"].notnull()]
df = df.dropna(subset=features)

# 5. Scale feature matrix
X = df[features]
y = df["Glucose_+30m"].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 6. 5-Fold CV
kf = KFold(n_splits=5, shuffle=True, random_state=42)
mae_list, rmse_list, r2_list = [], [], []

for train_idx, val_idx in kf.split(X_scaled):
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)

    mae_list.append(mean_absolute_error(y_val, preds))
    rmse_list.append(np.sqrt(mean_squared_error(y_val, preds)))
    r2_list.append(r2_score(y_val, preds))

# 7. Print results
print("=== Glucose Prediction +30m ===")
print(f"MAE : {np.mean(mae_list):.2f}")
print(f"RMSE: {np.mean(rmse_list):.2f}")
print(f"R²  : {np.mean(r2_list):.2f}")


=== Glucose Prediction +30m ===
MAE : 7.33
RMSE: 10.86
R²  : 0.68


# glucose after 60min

In [18]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# 1. Sort data and create target column (30 min ahead)
df = df.sort_values(by=["StudyID", "Timestamp"])
df["Glucose_+60m"] = df.groupby("StudyID")["Glucose"].shift(-4)

# 2. Convert time string (e.g., '01:30') to float (1.5)
def time_str_to_float(time_str):
    if isinstance(time_str, str):
        h, m = map(int, time_str.split(":"))
        return h + m / 60
    return time_str  # 이미 float이거나 NaN이면 그대로 반환

df['Time_24h'] = df['Time_24h'].apply(time_str_to_float)


# 3. Define selected features
features = [
    'Glucose',  'mean_glucose_previous_1day', 'Previous_1_Glucose',
    '15m_G_Diff_1', 'Previous_2_Glucose', '15m_G_Diff_2',
    'Previous_3_Glucose', '15m_G_Diff_3', 'Previous_4_Glucose', '15m_G_Diff_4',
    'Previous_5_Glucose', '15m_G_Diff_5', 'Previous_6_Glucose', '15m_G_Diff_6',
    'Previous_7_Glucose', '15m_G_Diff_7', 'Previous_8_Glucose', '15m_G_Diff_8',
    'Previous_9_Glucose', '15m_G_Diff_9', 'Previous_10_Glucose', 'is_weekend', 'Time_24h',
    'mean_intensity_1h', 'mean_intensity_2h', 'mean_intensity_3h', 'HR_mean_1h',
    'HR_std_1h', 'HR_slope_1h', 'HR_mean_2h', 'HR_std_2h', 'HR_slope_2h',
    'HR_mean_3h', 'HR_std_3h', 'HR_slope_3h', 'TimeInBed', 'Efficiency',
    'EMA_T_Diff.1', 'hungry_weighted', 'bored_weighted', 'How_stressed_weighted', 'How_anxious_weighted',
    'How_tired_weighted', 'Z_Previous_5', 'Z_Previous_6', 'Z_Previous_7', 'Z_Previous_8',
    'Z_Previous_9', 'Z_Previous_10', 'Z_Previous_4', '1_1.5h_pre_mean', '1_1.5h_pre_std',
    '1_1.5h_pre_slope', '1.5_2h_pre_mean', '1.5_2h_pre_std', '1.5_2h_pre_slope', '2_2.5h_pre_mean',
    '2_2.5h_pre_std', '2_2.5h_pre_slope', 'Glucose_q25_pre_1_2.5h', 'Glucose_median_pre_1_2.5h', 'Glucose_q75_pre_1_2.5h',
    'Glucose_min_pre_1_2.5h', 'Glucose_max_pre_1_2.5h', 'Glucose_iqr_pre_1_2.5h', 'Glucose_range_pre_1_2.5h', 'Glucose_spread_ratio_pre_1_2.5h',
    'Glucose_skew_hint_pre_1_2.5h', 'Glucose_std_pre_1_2.5h', 'Glucose_iqr_to_std_pre_1_2.5h', 'HR', 'Intensity',
    'HR_mean_5d', 'HR_std_5d', 'Intensity_mean_5d', 'Intensity_std_5d', 'Z_HR',
    'Z_Intensity',  'Glucose_pre_1h', 'Glucose_pre_1.5h', 'Glucose_pre_2h'
]

# 4. Drop rows with missing target or features
df = df[df["Glucose_+60m"].notnull()]
df = df.dropna(subset=features)

# 5. Scale feature matrix
X = df[features]
y = df["Glucose_+60m"].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 6. 5-Fold CV
kf = KFold(n_splits=5, shuffle=True, random_state=42)
mae_list, rmse_list, r2_list = [], [], []

for train_idx, val_idx in kf.split(X_scaled):
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)

    mae_list.append(mean_absolute_error(y_val, preds))
    rmse_list.append(np.sqrt(mean_squared_error(y_val, preds)))
    r2_list.append(r2_score(y_val, preds))

# 7. Print results
print("=== Glucose Prediction +60m ===")
print(f"MAE : {np.mean(mae_list):.2f}")
print(f"RMSE: {np.mean(rmse_list):.2f}")
print(f"R²  : {np.mean(r2_list):.2f}")


=== Glucose Prediction +60m ===
MAE : 8.75
RMSE: 12.85
R²  : 0.55
