<a href="https://colab.research.google.com/github/haytham918/low-rank-expectile/blob/main/Low_Rank_Expectile.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib as plot
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

In [2]:
df= pd.read_csv("/content/drive/MyDrive/low-rank-expectile/heartrate_seconds_merged.csv")
# Convert the 'Time' column to datetime format
df['Time'] = pd.to_datetime(df['Time'])


In [3]:
user_tenmin_df = df.groupby(['Id', pd.Grouper(key='Time', freq='5T')])['Value'].mean().unstack()
user_tenmin_matrix = user_tenmin_df.values
# print(user_tenmin_df)

In [4]:
nan_matrix = np.isnan(user_tenmin_matrix)
print("Matrix Entry Number: ", user_tenmin_matrix.shape[0] * user_tenmin_matrix.shape[1])
print("Nan Count: ", np.sum(nan_matrix))

Matrix Entry Number:  123718
Nan Count:  56200


In [86]:
# Split the data into trainig/validation and exclude missing values
train_data, val_data, train_mask, val_mask = train_test_split(user_tenmin_matrix, nan_matrix, test_size=0.2, random_state=445)


# Create Tensors based on train/val data
train_tensor = torch.tensor(train_data, dtype=torch.float32)
val_tensor = torch.tensor(val_data, dtype=torch.float32)


# Model definition
class LRModel(nn.Module):
  def __init__(self, number_users, number_times, rank):
    super().__init__()
    self.user_factors = nn.Embedding(number_users, rank)
    self.times_factors = nn.Embedding(number_times, rank)

    self.user_bias = nn.Embedding(number_users, 1)
    self.times_bias = nn.Embedding(number_times, 1)

    # Initializing the bias terms to zeros
    self.user_bias.weight.data.fill_(0.)
    self.times_bias.weight.data.fill_(0.)


  # Define forward propagation
  def forward(self, user, times):
    # print(self.user_factors(user).shape)
    # print(self.times_factors(times).shape)
    pred = self.user_factors(user) * self.times_factors(times)
    pred = pred.sum(1, keepdim=False)
    pred += self.user_bias(user).squeeze() + self.times_bias(times).squeeze()
    return pred

# Define Loss function excluding missing values
def loss_func(predicted, actual, mask):
    # print(predicted.shape, actual.shape, mask.shape)
    invert_mask = ~mask
    # print(predicted[invert_mask].shape, actual[invert_mask].shape)
    loss = nn.MSELoss()
    return loss(predicted[invert_mask].view(-1), actual[invert_mask].view(-1))

# Define parameters in our case
number_users, number_times = train_data.shape
rank = 6
model = LRModel(number_users, number_times, rank)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Epochs and CheckpointPath
number_epochs = 800

global_best_loss = float('inf')
best_epoch = 0

print(val_mask.shape)


(3, 8837)


In [87]:
# Training
for epoch in range(number_epochs):
  user_indices = torch.arange(number_users).repeat_interleave(number_times)
  time_indices = torch.arange(number_times).repeat(number_users)
  output = model(user_indices, time_indices)

  # Calculate loss
  train_tensor_flat = train_tensor.view(-1)
  train_mask_flat = train_mask.reshape(-1)
  training_loss = loss_func(output, train_tensor_flat, train_mask_flat)

  # Backward
  optimizer.zero_grad()
  training_loss.backward()
  optimizer.step()

  with torch.no_grad():
    validation_num_user, validation_num_times = val_tensor.shape
    validation_user_indices = torch.arange(validation_num_user).repeat_interleave(validation_num_times)
    validation_time_indices = torch.arange(validation_num_times).repeat(validation_num_user)
    validation_output = model(validation_user_indices, validation_time_indices)
    validation_loss = loss_func(validation_output, val_tensor.view(-1), val_mask.reshape(-1))

  # print(f"Epoch [{epoch + 1}/{number_epochs}]: Training Loss: {training_loss.item()}; Validation Loss: {validation_loss.item()}")

  if validation_loss < global_best_loss:
      global_best_loss = validation_loss
      best_epoch = epoch
      torch.save({"Epoch": epoch, "Model_state_dict": model.state_dict(), "Optimizer_state_dict": optimizer.state_dict(), "Loss": validation_loss},
                 f"/content/drive/MyDrive/low-rank-expectile/checkpoints/model_checkpoint_epoch{epoch}.pt")

print("Best Validation Epoch: ", best_epoch + 1)
print("Best Validation Loss: ", global_best_loss)



Best Validation Epoch:  297
Best Validation Loss:  tensor(261.4429)
