In [None]:
import pandas as pd
import numpy as np
from sklearn import linear_model 
from math import sqrt
from scipy.stats import zscore
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import warnings

In [None]:
### import CGM data
filename = "ExcelData/CGM_insulin_TG_data_v6.xlsx"
xl = pd.ExcelFile(filename)
blood_analyte = "CGM"
data = xl.parse(blood_analyte)
subjects = np.unique(data["Patient_ID"])

In [None]:
### baseline correction

glucose = data.iloc[:, 2:35]
glucose = glucose.dropna()

def remove_offset(glucose):
    mean_vector = glucose.iloc[:, 0:3].mean(axis=1)
    mean_vector = mean_vector.to_numpy()
    mean_vector = mean_vector.reshape(mean_vector.shape[0], 1)
    return glucose.to_numpy() - mean_vector

glucose = remove_offset(glucose)

In [None]:
protein_gt = [15, 30, 60, 30, 30, 60, 15, 30, 30]
CHO_gt = [52.25, 94.75, 179.75, 52.25, 179.75, 94.75, 94.75, 94.75, 94.75]
fat_gt = [13, 26, 52, 26, 26, 26, 26, 52, 13]
protein_gt = np.array(protein_gt)
CHO_gt = np.array(CHO_gt)
fat_gt = np.array(fat_gt)

gt_dict = {}
gt_dict["38A"] = [0, 1, 2, 3, 4, 5, 6, 8]
gt_dict["38B"] = list(range(1, 9))
gt_dict["38C"] = [0, 2, 3, 4, 5, 6, 7, 8]
gt_dict["38D"] = list(range(0, 9))
gt_dict["38E"] = list(range(0, 9))
gt_dict["38F"] = [0, 1, 3, 4, 5, 6, 7, 8]
gt_dict["38H"] = list(range(0, 9))
gt_dict["38I"] = [0, 1, 2, 3, 4, 6, 7, 8]
gt_dict["38J"] = list(range(0, 9))
gt_dict["38L"] = [1,2, 3, 5, 6, 7, 8]
gt_dict["38M"] = [0, 1, 2, 3, 4, 5, 6, 8]
gt_dict["38N"] = list(range(0, 9))
gt_dict["38O"] = [0, 1, 2, 3, 5, 7]
gt_dict["38P"] = list(range(0, 9))
gt_dict["38Q"] = [0, 2, 3, 4, 5, 6, 7, 8]

gl_dict = {}
gl_dict["38A"] = [0, 1, 2, 3, 4, 5, 6, 7]
gl_dict["38B"] = list(range(8, 16))
gl_dict["38C"] = list(range(16, 24))
gl_dict["38D"] = list(range(24, 33))
gl_dict["38E"] = list(range(33, 42))
gl_dict["38F"] = list(range(42, 50))
gl_dict["38H"] = list(range(50, 59))
gl_dict["38I"] = list(range(59, 67))
gl_dict["38J"] = list(range(67, 76))
gl_dict["38L"] = list(range(76, 83))
gl_dict["38M"] = list(range(83, 91))
gl_dict["38N"] = list(range(91, 100))
gl_dict["38O"] = list(range(100, 106))
gl_dict["38P"] = list(range(106, 115))
gl_dict["38Q"] = list(range(115, 123))

In [None]:
### z-score normalization
glucose = pd.DataFrame(glucose)

for subject in gl_dict.keys():
    glucose.iloc[gl_dict[subject]] = zscore(glucose.iloc[gl_dict[subject]])

In [None]:
def get_label(macro_gt, macro):
    ''' Map macronutrient amounts to labels'''
    index = []
    if macro == 'protein':
        for gt in macro_gt:
            if gt == 15.0:
                index.append(1)
            elif gt == 30.0:
                index.append(2)
            elif gt == 60.0:
                index.append(3)
    if macro == 'CHO':
        for gt in macro_gt:
            if gt == 52.25:
                index.append(1)
            elif gt == 94.75:
                index.append(2)
            elif gt == 179.75:
                index.append(3)
    if macro == 'fat':
        for gt in macro_gt:
            if gt == 13.0:
                index.append(1)
            elif gt == 26.0:
                index.append(2)
            elif gt == 52.0:
                index.append(3)
    return index

def return_macro(labels, macro):
    ''' Map labels to macronutrient amounts'''
    values = []
    if macro == 'protein':
        for label in labels:
            if label == 1:
                values.append(15.0)
            elif label == 2:
                values.append(30.0)
            elif label == 3:
                values.append(60.0)
    if macro == 'CHO':
        for label in labels:
            if label == 1:
                values.append(52.25)
            elif label == 2:
                values.append(94.75)
            elif label == 3:
                values.append(179.75)
    if macro == 'fat':
        for label in labels:
            if label == 1:
                values.append(13.0)
            elif label == 2:
                values.append(26.0)
            elif label == 3:
                values.append(52.0)
    return values

In [None]:
### Predict macornutrients using the proposed sparse coding approach
all_subjects = [*gt_dict.keys()]
total_subjects = len(all_subjects)
alpha_params = [0.05, 0.5, 1, 1.5, 2.5, 5, 10, 15]
macro_names = ["Carbohydrates", "Proteins", "Fats"]

for i, macro_gt in [cho_gt, protein_gt, fat_gt]:
    sub_rmse = []
    gnd_trth = []
    predicted = []

    for test_subject in range(total_subjects):
        alpha_dict = {}
        for val_subject in list(set(range(total_subjects)) - set([test_subject])):
            train_subjects = list(set(range(total_subjects)) - set([test_subject]) - set([val_subject]))
            train_data = pd.DataFrame({})
            train_labels = np.empty((0), float)

            for subject in train_subjects:
                df = glucose.iloc[gl_dict[all_subjects[subject]]]
                train_data = train_data.append(df)
                train_labels = np.append(train_labels, macro_gt[gt_dict[all_subjects[subject]]])
            val_data = glucose.iloc[gl_dict[all_subjects[val_subject]]]
            val_labels = macro_gt[gt_dict[all_subjects[val_subject]]]

            for alpha in alpha_params:
                val_rmse = 0
                mdl = linear_model.Lasso(alpha = alpha, max_iter=1000, positive=True)
                val_pred = []
                for row in val_data.to_numpy():
                    val_sample = row
                    val_sample = np.reshape(val_sample, (-1, 1))
                    mdl.fit(np.transpose(train_data), val_sample)
                    if sum(mdl.coef_ != 0):
                        mdl.coef_ = mdl.coef_/sum(mdl.coef_)
                    val_pred.append(mdl.predict(np.reshape(train_labels, (1, -1)))[0])
                val_rmse += sqrt(np.sum(((val_pred - val_labels)/val_labels)**2)/len(val_pred))
                if alpha not in alpha_dict:
                    alpha_dict[alpha] = [val_rmse/len(train_subjects)]
                else:
                    alpha_dict[alpha].extend([val_rmse/len(train_subjects)])

        min_alpha = 0
        min_value = np.inf

        print(test_subject)
        for alpha in alpha_params:
            if np.mean(alpha_dict[alpha]) < min_value:
                min_value = np.mean(alpha_dict[alpha])
                min_alpha = alpha

        train_data = pd.DataFrame({})
        train_labels = np.empty((0), float)
        for subject in train_subjects:
            df = glucose.iloc[gl_dict[all_subjects[subject]]]
            train_data = train_data.append(df)
            train_labels = np.append(train_labels, macro_gt[gt_dict[all_subjects[subject]]])

        test_data = glucose.iloc[gl_dict[all_subjects[test_subject]]]
        test_labels = macro_gt[gt_dict[all_subjects[test_subject]]]

        pred = []
        for row in test_data.to_numpy():
            test_sample = row
            test_sample = np.reshape(test_sample, (-1, 1))

            mdl = linear_model.Lasso(alpha=min_alpha, max_iter=10000, positive=True)
            mdl.fit(np.transpose(train_data), test_sample)
            if sum(mdl.coef_ != 0):
                mdl.coef_ = mdl.coef_/sum(mdl.coef_)
    #         pred.append(np.dot(mdl.coef_, train_labels))
            pred.append(mdl.predict(np.reshape(train_labels, (1, -1)))[0])
            predicted.extend([mdl.predict(np.reshape(train_labels, (1, -1)))[0]])
        gnd_trth.extend(test_labels)
        sub_rmse.append(sqrt(np.sum(((pred - test_labels)/test_labels)**2)/len(pred)))
    print(f"{macro_names[i]}, Mean NRMSE: {np.mean(sub_rmse)}, Std. NRMSE: {np.std(sub_rmse)}")