## Import libraries

In [2]:
import pickle
import torch
from torch.utils.data import Dataset, DataLoader

import time

import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GPTQConfig, AutoConfig, AutoModelForSeq2SeqLM
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
from chronos import ChronosPipeline, ChronosModel, ChronosConfig
import psutil

## Dataloaders

In [3]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, context_length, prediction_length):
        self.data = data
        self.context_length = context_length
        self.prediction_length = prediction_length

    def __len__(self):
        # Each dataframe contributes (len(df) - context_length - prediction_length + 1) samples
        return sum(len(df) - self.context_length - self.prediction_length + 1 for df in self.data)

    def __getitem__(self, idx):
        # Find the appropriate dataframe and index within that dataframe
        cumulative_length = 0
        for df in self.data:
            current_length = len(df) - self.context_length - self.prediction_length + 1
            if idx < cumulative_length + current_length:
                local_idx = idx - cumulative_length
                context = df['CGM'].values[local_idx:local_idx + self.context_length]
                target = df['CGM'].values[local_idx + self.context_length:local_idx + self.context_length + self.prediction_length]
                return torch.tensor(context), torch.tensor(target)
            cumulative_length += current_length
        raise IndexError("Index out of range in dataset")


In [4]:
def create_dataloaders(dataset_name, data, split_ratios,context_window,prediction_window):

    # Load the datasets
    print(f"Loading {dataset_name} datasets...")

    train_data, val_data, test_data = [], [], []
    train_ratio,val_ratio,test_ratio = split_ratios

    assert(train_ratio+val_ratio+test_ratio==1)

    for i in range(len(data)):
        num_rows = len(data[i])
        sample_size = context_window + prediction_window
        if num_rows>sample_size:
            if num_rows < 3*sample_size + 1:
                train_data.append(data[i])
            else:
                available_samples = (num_rows - 3*sample_size - 1)
                training_samples = int(available_samples * train_ratio)
                validation_samples = int(available_samples * val_ratio)
                testing_samples = int(available_samples * test_ratio)

                if validation_samples == 0:
                    training_samples -= 1
                    validation_samples += 1
                if testing_samples == 0:
                    training_samples -= 1
                    testing_samples += 1

                train_data.append(data[i].iloc[:training_samples + sample_size + 1])
                val_data.append(data[i].iloc[training_samples + sample_size + 1:training_samples + validation_samples + 2*sample_size + 2])
                test_data.append(data[i].iloc[training_samples + validation_samples + 2*sample_size + 2:])

    # TimeSeries Dataloaders
    train_dataset = TimeSeriesDataset(train_data, context_window, prediction_window)
    val_dataset = TimeSeriesDataset(val_data, context_window, prediction_window)
    test_dataset = TimeSeriesDataset(test_data, context_window, prediction_window)

    print(f"Number of training examples: {len(train_dataset)}")
    print(f"Number of validation examples: {len(val_dataset)}")
    print(f"Number of test examples: {len(test_dataset)}")

    return train_dataset,val_dataset,test_dataset

In [18]:
# Function to load model with quantization
def load_model(model_name, quantization_bits=None):
    if quantization_bits == 8:
        bnb_config_8bit = BitsAndBytesConfig(
            load_in_8bit=True,
            llm_int8_threshold=0.0
        )
        pipeline = ChronosPipeline.from_pretrained(model_name,low_cpu_mem_usage=True,device_map="cuda:0",quantization_config=bnb_config_8bit)
    elif quantization_bits == 4:
        bnb_config_4bit = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.bfloat16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_storage=torch.bfloat16
        )
        pipeline = ChronosPipeline.from_pretrained(model_name,low_cpu_mem_usage=True,device_map="cuda:0",quantization_config=bnb_config_4bit)
    else:
        pipeline = ChronosPipeline.from_pretrained(model_name,low_cpu_mem_usage=True,device_map="cuda:0")

    return pipeline

# Function to calculate RMSE
def calculate_rmse(predictions, targets):
    return np.sqrt(mean_squared_error(predictions, targets))

def mean_absolute_percentage_error(predictions,targets):
    return np.mean(np.abs((targets - predictions) / targets)) * 100

# Function to print the dtype, count, and model size of parameters
def print_model_dtype_and_size(model):
    param_dtypes = {}
    dtype_size = {
        torch.float32: 4,
        torch.float16: 2,
        torch.bfloat16: 2,
        torch.int8: 1,
        torch.uint8: 1
    }
    total_size = 0

    total_parameters = 0
    for param in model.parameters():
        dtype = param.dtype
        count_param = param.numel()
        total_parameters+=count_param
        if dtype not in param_dtypes:
            param_dtypes[dtype] = 0
        param_dtypes[dtype] += count_param

    for dtype, count in param_dtypes.items():
        size = count * dtype_size[dtype]
        total_size += size
        print(f"Dtype: {dtype}, Count: {count}, Size: {size / (1024 ** 2):.2f} MB")

    print(f"Total parameters: {total_parameters}")
    print(f"Total model size: {total_size / (1024 ** 2):.2f} MB")

# Function to perform inference and measure time with tqdm progress bar
def test_model(pipeline, dataloader, setting_name, batch_size, num_samples=10):

    all_predictions = []
    all_targets = []
    inference_times = []
    all_inputs = []
    all_outputs = []

    with torch.no_grad():
        for inputs, targets in tqdm(dataloader, desc="Testing"):

            #all_inputs.append(inputs.cpu().numpy())
            #all_outputs.append(targets.cpu().numpy())
            start_time = time.time()
            outputs = pipeline.predict(inputs,prediction_length=prediction_window,num_samples=num_samples)
            end_time = time.time()

            # Calculate the low, median, and high quantiles across the second dimension (dimension with size 3)
            median_quantile = torch.quantile(outputs, q=0.5, dim=1)

            batch_time = (end_time - start_time)/num_samples
            inference_times.append((batch_time/batch_size))

            all_predictions.append(median_quantile.numpy())
            all_targets.append(targets.numpy())

    all_predictions = np.concatenate(all_predictions, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)
    #all_inputs = np.concatenate(all_inputs, axis=0)
    #all_outputs = np.concatenate(all_outputs, axis=0)

    mape = mean_absolute_percentage_error(all_predictions.reshape(all_targets.shape[0],all_targets.shape[1]), all_targets)
    rmse = calculate_rmse(all_predictions.reshape(all_targets.shape[0],all_targets.shape[1]), all_targets)
    avg_inference_time = np.mean(inference_times)

    print(f"{setting_name}. RMSE: {rmse:.2f}, MAPE: {mape:.2f}, Avg Inference Time: {(avg_inference_time*1e3):.3f}")

    return all_inputs, all_outputs

In [6]:
# Parameters
num_samples_per_hour = 4
context_window = 24 * num_samples_per_hour  # 48 hours of data, 4 points per hour (every 15 minutes)
prediction_window = 3 * num_samples_per_hour  # 6 hours of data, 4 points per hour

## Model configuration

In [7]:
# dataset options 'ShanghaiT1DM','ShanghaiT2DM','Patient0', 'Dinamo_Shanghai_T1DM'

##### Option 1
#dataset = 'Dinamo_Shanghai_T1DM'
#base_model = "amazon/chronos-t5-tiny"
#finetuned_model = 'moschouChry/finetuned-chronos-tiny-type-1'

##### Option 2
#dataset = 'Dinamo_Shanghai_T1DM'
#base_model = "amazon/chronos-t5-small"
#finetuned_model = 'moschouChry/finetuned-chronos-small-type-1'

##### Option 3
#dataset = 'ShanghaiT2DM'
#base_model = "amazon/chronos-t5-tiny"
#finetuned_model = 'moschouChry/finetuned-chronos-tiny-type-2'

##### Option 4
#dataset = 'ShanghaiT2DM'
#base_model = "amazon/chronos-t5-small"
#finetuned_model = 'moschouChry/finetuned-chronos-small-type-2'

##### Option 5
#dataset = 'Patient0'
#base_model = "amazon/chronos-t5-tiny"
#finetuned_model = 'moschouChry/finetuned-chronos-tiny-patient-0'

##### Option 6
#dataset = 'Patient0'
#base_model = "amazon/chronos-t5-tiny"
#finetuned_model = 'moschouChry/finetuned-chronos-tiny-patient-0'

##### Option 7
dataset = 'Patient0'
base_model = "amazon/chronos-t5-tiny"
finetuned_model =  'moschouChry/finetuned-chronos-tiny-patient-0'
finetuned_model_1 = 'moschouChry/finetuned-chronos-tiny-type-1_no_lora'
finetuned_model_2 = 'moschouChry/finetuned-chronos-tiny-type-1_lora_option_1'
finetuned_model_3 = 'moschouChry/finetuned-chronos-tiny-type-1_lora_option_2'

# Load the dictionary from the file
with open('dataset_dictionary.pkl', 'rb') as file:
    loaded_data_dict = pickle.load(file)

data = loaded_data_dict[dataset]
dataset_name = dataset

In [8]:
train_ratio=0.9
val_ratio=0.05
test_ratio=0.05

split_ratios = [train_ratio,val_ratio,test_ratio]
train_dataset,val_dataset,test_dataset = create_dataloaders(dataset_name,data,split_ratios,context_window,prediction_window)

Loading ShanghaiT2DM datasets...
Number of training examples: 69537
Number of validation examples: 3982
Number of test examples: 3920


In [9]:
batch_size = 512

train_dataloader = DataLoader(train_dataset,batch_size=batch_size)
test_dataloader = DataLoader(test_dataset,batch_size=batch_size)
val_dataloader = DataLoader(val_dataset,batch_size=batch_size)

In [None]:
#settings = {
#    "Chronos_Original": [base_model, None],
#    "Chronos_finetuned Type 2": [finetuned_model, None],
#    "Chronos_finetuned Type 2 8 bit": [finetuned_model,8],
#    "Chronos_finetuned Type 2 4 bit": [finetuned_model,4]
#}

settings = {
    "Chronos_Original": [base_model, None],
    "Chronos_finetuned Type 1": [finetuned_model, None],
    "Chronos_finetuned Patient 0 (No Lora)": [finetuned_model_1,None],
    "Chronos_finetuned Patient 0 (Lora 1)": [finetuned_model_3,None],
    "Chronos_finetuned Patient 0 (Lora 2)": [finetuned_model_2,None],
}

for setting in settings.items():

    torch.cuda.empty_cache()
    setting_name = setting[0]
    model_name, quantization_bits = setting[1]

    print(setting_name)
    NUM_SAMPLES = 20
    if setting_name=="Chronos_finetuned Patient 0 (No Lora)":
        NUM_SAMPLES = 50 #default 20
    if setting_name=="Chronos_finetuned Patient 0 (Lora 1)":
        NUM_SAMPLES = 50 #default 20
    if setting_name=="Chronos_finetuned Patient 0 (Lora 2)":
        NUM_SAMPLES = 50 #default 20
    pipeline = load_model(model_name, quantization_bits)
    print_model_dtype_and_size(pipeline.model)

    test_model(pipeline, test_dataloader, setting_name, batch_size, NUM_SAMPLES)