# Blood Glucose Predicition
Using historical blood glucose readings, insulin dosage, carbohydrate intake, and smartwatch activity data to predict future blood glucose. 

In [15]:
import numpy as np
import pandas as pd
import polars as pl
import matplotlib.pyplot as plt
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from helper.helper import float_to_time, time_to_float, float_time_range
import random
# Set seed for repeatability
def seed_everything(seed):
    np.random.seed(seed) # np random seed
    random.seed(seed) # py random seed
seed_everything(seed=1024)

In [16]:
train_schema = {
    'id': pl.String(),
    'p_num': pl.String(),
    'time': pl.Time(),
    'bg+1:00': pl.Float64()
    
}

for measurement_time in float_time_range(5.55,0.00,-0.05):
    train_schema[f'bg-{measurement_time}'] = pl.Float64()
    train_schema[f'insulin-{measurement_time}'] = pl.Float64()
    train_schema[f'carbs-{measurement_time}'] = pl.Float64()
    train_schema[f'hr-{measurement_time}'] = pl.Float64()
    train_schema[f'steps-{measurement_time}'] = pl.Float64()
    train_schema[f'cals-{measurement_time}'] = pl.Float64()
    train_schema[f'activity-{measurement_time}'] = pl.String()
    
train_df = pl.read_csv('../../data/train.csv',
                       schema_overrides=pl.Schema(train_schema),
                       null_values = ['',' ','null','NaN','None']
)

step_columns = [f'steps-{t}' for t in float_time_range(5.55, 0.00, -0.05)]
train_df = train_df.with_columns(pl.col(step_columns).cast(pl.Int64))

train_df
    
# train_df[step_columns].top_k(10,by='steps-5:55')

id,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,bg-5:15,bg-5:10,bg-5:05,bg-5:00,bg-4:55,bg-4:50,bg-4:45,bg-4:40,bg-4:35,bg-4:30,bg-4:25,bg-4:20,bg-4:15,bg-4:10,bg-4:05,bg-4:00,bg-3:55,bg-3:50,bg-3:45,bg-3:40,bg-3:35,bg-3:30,bg-3:25,bg-3:20,bg-3:15,bg-3:10,…,activity-2:55,activity-2:50,activity-2:45,activity-2:40,activity-2:35,activity-2:30,activity-2:25,activity-2:20,activity-2:15,activity-2:10,activity-2:05,activity-2:00,activity-1:55,activity-1:50,activity-1:45,activity-1:40,activity-1:35,activity-1:30,activity-1:25,activity-1:20,activity-1:15,activity-1:10,activity-1:05,activity-1:00,activity-0:55,activity-0:50,activity-0:45,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00,bg+1:00
str,str,time,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64
"""p01_0""","""p01""",06:10:00,,,9.6,,,9.7,,,9.2,,,8.7,,,8.4,,,8.1,,,8.3,,,9.6,,,11.1,,,11.8,,,12.8,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,13.4
"""p01_1""","""p01""",06:25:00,,,9.7,,,9.2,,,8.7,,,8.4,,,8.1,,,8.3,,,9.6,,,11.1,,,11.8,,,12.8,,,13.9,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12.8
"""p01_2""","""p01""",06:40:00,,,9.2,,,8.7,,,8.4,,,8.1,,,8.3,,,9.6,,,11.1,,,11.8,,,12.8,,,13.9,,,14.2,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,15.5
"""p01_3""","""p01""",06:55:00,,,8.7,,,8.4,,,8.1,,,8.3,,,9.6,,,11.1,,,11.8,,,12.8,,,13.9,,,14.2,,,14.2,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,14.8
"""p01_4""","""p01""",07:10:00,,,8.4,,,8.1,,,8.3,,,9.6,,,11.1,,,11.8,,,12.8,,,13.9,,,14.2,,,14.2,,,15.4,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12.7
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""p12_25294""","""p12""",23:35:00,8.8,9.1,9.2,9.4,9.8,10.2,10.4,10.3,10.1,10.0,9.8,9.5,9.2,9.0,8.9,8.7,8.3,7.8,7.6,7.6,7.7,7.7,7.7,7.6,7.7,7.8,7.9,8.0,8.1,8.3,8.4,8.5,8.6,8.6,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.1
"""p12_25295""","""p12""",23:40:00,9.1,9.2,9.4,9.8,10.2,10.4,10.3,10.1,10.0,9.8,9.5,9.2,9.0,8.9,8.7,8.3,7.8,7.6,7.6,7.7,7.7,7.7,7.6,7.7,7.8,7.9,8.0,8.1,8.3,8.4,8.5,8.6,8.6,8.5,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.9
"""p12_25296""","""p12""",23:45:00,9.2,9.4,9.8,10.2,10.4,10.3,10.1,10.0,9.8,9.5,9.2,9.0,8.9,8.7,8.3,7.8,7.6,7.6,7.7,7.7,7.7,7.6,7.7,7.8,7.9,8.0,8.1,8.3,8.4,8.5,8.6,8.6,8.5,8.4,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.7
"""p12_25297""","""p12""",23:50:00,9.4,9.8,10.2,10.4,10.3,10.1,10.0,9.8,9.5,9.2,9.0,8.9,8.7,8.3,7.8,7.6,7.6,7.7,7.7,7.7,7.6,7.7,7.8,7.9,8.0,8.1,8.3,8.4,8.5,8.6,8.6,8.5,8.4,8.3,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.5


## Dataset Description

The dataset is from a study that 
- Collected data from young adults in the UK with type 1 diabetes, who used a continuous glucose monitor (CGM), an insulin pump and a smartwatch.
- These devices collected blood glucose readings, insulin dosage, carbohydrate intake, and activity data. 
- The data collected was aggregated to five-minute intervals and formatted into samples. 
- Each sample represents a point in time and includes the aggregated five-minute intervals from the previous six hours. 
- The aim is to predict the blood glucose reading an hour into the future, for each of these samples.

#### The Training Set
- Takes samples from the first three months of study data from nine of the participants and includes the future blood glucose value.
- These training samples appear in chronological order and overlap. 
#### The Testing Set
- Takes samples from the remainder of the study period from fifteen of the participants (so unseen participants appear in the testing set).
- These testing samples do not overlap and are in a random order to avoid data leakage.

### Complexities to be aware of:
- This is medical data so there are missing values and noise in the data
- The participants did not all use the same device models (CGM, insulin pump and smartwatch) so there may be differences in the collection method of the data
- Some participants in the test set do not appear in the training set

#### Files
* `activities.txt`: A list of activity names that appear in the `activity-X:XX` columns.
* `sample_submission.csv`: A sample submission file in the correct format.
* `test.csv`: The test set.
* `train.csv`: The training set.

### Training Features
| Feature | Description |
|---|---|
| `id` | row id consisting of participant number and a count for that participant |
| `p_num` | participant number |
| `time` | time of day in the format HH:MM:SS |
| `bg-X:XX` | blood glucose reading in mmol/L, X:XX(H:MM) time in the past (e.g. bg-2:35, would be the blood glucose reading from 2 hours and 35 minutes before the time value for that row), recorded by the continuous glucose monitor |
| `insulin-X:XX` | total insulin dose received in units in the last 5 minutes, X:XX(H:MM) time in the past (e.g. insulin-2:35, would be the total insulin dose received between 2 hours and 40 minutes and 2 hours and 35 minutes before the time value for that row), recorded by the insulin pump |
| `carbs-X:XX` | total carbohydrate value consumed in grammes in the last 5 minutes, X:XX(H:MM) time in the past (e.g. carbs-2:35, would be the total carbohydrate value consumed between 2 hours and 40 minutes and 2 hours and 35 minutes before the time value for that row), recorded by the participant |
| `hr-X:XX` | mean heart rate in beats per minute in the last 5 minutes, X:XX(H:MM) time in the past (e.g. hr-2:35, would be the mean heart rate between 2 hours and 40 minutes and 2 hours and 35 minutes before the time value for that row), recorded by the smartwatch |
| `steps-X:XX` | total steps walked in the last 5 minutes, X:XX(H:MM) time in the past (e.g. steps-2:35, would be the total steps walked between 2 hours and 40 minutes and 2 hours and 35 minutes before the time value for that row), recorded by the smartwatch |
| `cals-X:XX` | total calories burnt in the last 5 minutes, X:XX(H:MM) time in the past (e.g. cals-2:35, would be the total calories burned between 2 hours and 40 minutes and 2 hours and 35 minutes before the time value for that row), calculated by the smartwatch |
| `activity-X:XX` | self-declared activity performed in the last 5 minutes, X:XX(H:MM) time in the past (e.g. activity-2:35, would show a string name of the activity performed between 2 hours and 40 minutes and 2 hours and 35 minutes before the time value for that row), set on the smartwatch |
| `bg+1:00` | blood glucose reading in mmol/L an hour in the future, this is the value you will be predicting (not provided in test.csv) |