In [1]:
import pandas as pd
import numpy as np
import sklearn

In [2]:
train_file = "./kaggle/brist1d/train.csv"
#test_file = "./kaggle/brist1d/test.csv"

df = pd.read_csv(train_file)
#df_test = pd.read_csv(test_file)

for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].astype('category')

df.dtypes

  df = pd.read_csv(train_file)


id               category
p_num            category
time             category
bg-5:55           float64
bg-5:50           float64
                   ...   
activity-0:15    category
activity-0:10    category
activity-0:05    category
activity-0:00    category
bg+1:00           float64
Length: 508, dtype: object

## Data Cleaning - handling missing values

In [3]:
features = df.columns
bg_cols = [col for col in features if "bg" in col]
insulin_cols = [col for col in features if "insulin" in col]
carbs_cols = [col for col in features if "carbs" in col]
hr_cols = [col for col in features if "hr" in col]
steps_cols = [col for col in features if "steps" in col]
cals_cols = [col for col in features if "cals" in col]
activity_cols = [col for col in features if "activity" in col]

Fill missing values for features based on:
- mean value: blood glucose, carbs, cals
- zeroes: insulin
- forward filled (i.e. propagates the last observation forward): heart rate, steps

In [4]:
for feature in [*bg_cols, *carbs_cols, *cals_cols]:
    df[feature] = df[feature].fillna(df[feature].mean())

for feature in insulin_cols:
    df[feature] = df[feature].fillna(0)

for feature in [*hr_cols, *steps_cols]:
    df[feature] = df[feature].fillna(method="ffill")

df.isna().sum()

id                    0
p_num                 0
time                  0
bg-5:55               0
bg-5:50               0
                  ...  
activity-0:15    174293
activity-0:10    174287
activity-0:05    174271
activity-0:00    174287
bg+1:00               0
Length: 508, dtype: int64

### Fit a simple XGBoost regressor 

In [5]:
#df = df.drop(["p_num", "id", "time", *activity_cols], axis=1)
df = df.drop(["p_num", "id", "time"], axis=1)
X = df.drop("bg+1:00", axis=1)
y = df["bg+1:00"]
df.columns

Index(['bg-5:55', 'bg-5:50', 'bg-5:45', 'bg-5:40', 'bg-5:35', 'bg-5:30',
       'bg-5:25', 'bg-5:20', 'bg-5:15', 'bg-5:10',
       ...
       'activity-0:40', 'activity-0:35', 'activity-0:30', 'activity-0:25',
       'activity-0:20', 'activity-0:15', 'activity-0:10', 'activity-0:05',
       'activity-0:00', 'bg+1:00'],
      dtype='object', length=505)

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert activity columns to category type
for col in activity_cols:
    X_train[col] = X_train[col].astype('category')
    X_test[col] = X_test[col].astype('category')

NameError: name 'X' is not defined

In [8]:

# 3. Scale the features
scaler = StandardScaler()
#X_train_scaled = scaler.fit_transform(X_train)
#X_test_scaled = scaler.transform(X_test)
X_train_scaled = X_train
X_test_scaled = X_test

feature_types = ["c" if feat in activity_cols else "q" for feat in X_train.columns]
# 4. Initialize and train the XGBoost model
xgb_model = XGBRegressor(
    feature_types=feature_types,
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    min_child_weight=1,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='rmse',
    random_state=42,
    enable_categorical=True
)

# Train the model
xgb_model.fit(
    X_train_scaled, 
    y_train,
    eval_set=[(X_train_scaled, y_train), (X_test_scaled, y_test)],
    verbose=False
)

# 5. Make predictions
y_pred = xgb_model.predict(X_test_scaled)

# 6. Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Model Performance Metrics:")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score: {r2:.4f}")

Model Performance Metrics:
RMSE: 1.9605
R² Score: 0.5750


In [10]:
# X_testing = df_test.drop(["p_num", "id", "time", *activity_cols], axis=1)
X_testing = df_test.drop(["p_num", "id", "time"], axis=1)

for col in activity_cols:
    X_testing[col] = X_testing[col].astype('category')
    
y_testing_pred = xgb_model.predict(X_testing)
y_testing_pred

array([12.411012,  8.457562, 12.448939, ..., 10.205155,  9.483859,
       10.063545], dtype=float32)

### Writing the predictions to submission file
Submission file has format: p_number, prediction_value

In [11]:
print(df_test["id"].shape, y_testing_pred.shape, type(y_testing_pred))
#out = np.zeros((2, df_test["id"].shape[0]))
out = np.column_stack([df_test["id"], y_testing_pred])
out = pd.DataFrame(out, columns=["id", "bg+1:00"])

out.to_csv("submission.csv", index=False)


(3644,) (3644,) <class 'numpy.ndarray'>


## Iteration 2.0

In [5]:
import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [6]:
y = df["bg+1:00"]
df = df.drop("bg+1:00", axis=1)

df['time'] = pd.to_timedelta(df['time']).dt.total_seconds()

# for now we just ignore participant profiles
df.drop(columns=["p_num", "id", *activity_cols], inplace=True)

for t in df.dtypes:
    if t != "float64":
        print(t)

In [7]:
print("Data shape", df.shape)

Data shape (177024, 433)


### Normalize features

In [45]:
from sklearn.preprocessing import StandardScaler
import torch.utils
import torch.utils.data

# scale features
scaler = StandardScaler()
df = scaler.fit_transform(df)
y = scaler.fit_transform(np.reshape(y.values, (-1, 1)))

# transform to tensor and reshape
df_tensor = torch.Tensor(df)
y_tensor = torch.Tensor(y)
print(df_tensor.shape)
print(y_tensor.shape)

# create dataloader
dataset = torch.utils.data.TensorDataset(df_tensor, y_tensor)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1024, shuffle=True)


torch.Size([177024, 433])
torch.Size([177024, 1])


### RNN (LSTM)

In [46]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout=0.2):
        super(RNN, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout)
        self.activ = nn.ReLU()
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.activ(out)
        pred = self.linear(out)

        return pred

model = RNN(input_size=df_tensor.size(1), hidden_size=256, num_layers=3)

In [47]:
optimizer = optim.Adam(model.parameters(), lr=0.005)
mse_loss = torch.nn.MSELoss()

In [48]:
losses = []

for i in tqdm.tqdm(range(10)):
    loss = 0
    for j, (data, target) in tqdm.tqdm(enumerate(dataloader)):
        optimizer.zero_grad()

        prediction = model.forward(data)[:, 0]
      
        print("Pred, target", prediction[0], target[0], prediction.shape, target.shape)

        batch_loss = mse_loss(prediction, target)
        batch_loss.backward()
        optimizer.step()

        loss += batch_loss.detach().item()
        print("Loss:", batch_loss.detach().item())
    losses.append(loss)
    print("Loss:", losses[-1])

  return F.mse_loss(input, target, reduction=self.reduction)


Pred, target tensor(0.0566, grad_fn=<SelectBackward0>) tensor([0.3748]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.3261]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.6932]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.1412]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.9935]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.1412]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([2.7109]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([1.3426]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.9935]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-1.0269]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.2413]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.6932]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.2927]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.6418]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.5263]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.6751]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-1.2939]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.2927]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.6932]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([1.0756]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([1.0089]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.4929]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.5597]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.4081]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([1.6096]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.2593]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.5083]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.4929]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.4749]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.9602]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-1.0269]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.3261]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-1.2272]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-1.0269]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.7933]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.4262]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.6598]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.2927]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([1.6430]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.1078]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.3748]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.9087]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.6751]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([2.0434]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.7266]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.3261]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.1592]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.3414]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.7419]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.5931]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.3594]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.1258]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.7266]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.9268]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.3928]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan
Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.8086]) torch.Size([1024]) torch.Size([1024, 1])




Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.0744]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-1.3940]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([3.0113]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([1.0422]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.2593]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.9602]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.1592]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.0925]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-1.6276]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([2.2103]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.6932]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.7933]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.5263]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.7419]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.3594]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([1.3426]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.1592]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.6598]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-1.5275]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.2593]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-0.7933]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan




Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([0.3748]) torch.Size([1024]) torch.Size([1024, 1])
Loss: nan


78it [00:18,  4.11it/s]
  0%|          | 0/10 [00:18<?, ?it/s]

Pred, target tensor(nan, grad_fn=<SelectBackward0>) tensor([-1.0937]) torch.Size([1024]) torch.Size([1024, 1])





KeyboardInterrupt: 