In [1]:
import warnings 
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from DataLoader import DataLoader
from model.MLP import MLP_Tuner
from model.CNNTransformer import CNNTransformer_Tuner

device = 'cuda' if torch.cuda.is_available() else 'cpu'

import kagglehub

# Download latest version
path = kagglehub.dataset_download("samiraalipour/genomics-of-drug-sensitivity-in-cancer-gdsc")

print("Path to dataset files:", path)

dataloader = DataLoader(path + '/GDSC_DATASET.csv',
                        path + '/Compounds-annotation.csv',
                        path + '/GDSC2-dataset.csv',
                        path + '/Cell_Lines_Details.xlsx')

X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, input_dim = dataloader.get_data()

Path to dataset files: /home/andrew-root/.cache/kagglehub/datasets/samiraalipour/genomics-of-drug-sensitivity-in-cancer-gdsc/versions/2
Loading Done!
Preprocess Done!
Define Done!


## ML

In [2]:
# Initialize the tuner
MLP_tuner = MLP_Tuner(input_dim)

# Tune hyperparameters
best_model = MLP_tuner.tune_hyperparameters(X_train_tensor, y_train_tensor)

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1154[0m  2.3832
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.2055[0m  2.4672
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1401[0m  2.5905
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1118[0m  2.9730
      2        [36m8.0234[0m  2.9751
      2        [36m8.0891[0m  3.0020
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.2082[0m  3.3321
      2        [36m8.0348[0m  3.2669
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1227[0m  3.4822
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1237[0m  3.5869
      2        [36m8.0240[0m  3.6825
      3        [36m8.0154[0m  3.6877
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1672[0m  3.8300
      3      

In [None]:
# Evaluate the best model on the test set
train_losses = []
val_losses = []

for epoch in range(best_model.max_epochs):
    best_model.partial_fit(X_train_tensor, y_train_tensor)
    train_pred = best_model.predict(X_train_tensor).squeeze()
    val_pred = best_model.predict(X_test_tensor).squeeze()
    train_loss = mean_squared_error(y_train_tensor.numpy(), train_pred)
    val_loss = mean_squared_error(y_test_tensor.numpy(), val_pred)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

     31        8.0316  3.4749
     32        8.0321  3.4788
     33        8.0315  3.4772
     34        8.0321  3.3938
     35        [36m8.0309[0m  3.3925
     36        8.0318  3.3983
     37        8.0318  3.3969
     38        8.0327  3.3939
     39        8.0312  3.3940
     40        [36m8.0305[0m  3.3907
     41        8.0309  3.3937
     42        8.0311  3.3932
     43        8.0311  3.3925
     44        8.0307  3.3955
     45        8.0316  3.3996
     46        8.0305  3.4016
     47        8.0314  3.4187
     48        8.0308  3.4044
     49        8.0308  3.3989
     50        8.0309  3.4000
     51        [36m8.0300[0m  3.3980
     52        8.0315  3.3954
     53        [36m8.0299[0m  3.4855
     54        8.0316  3.4836
     55        8.0304  3.4896
     56        [36m8.0298[0m  3.4845
     57        8.0298  3.4113
     58        8.0308  3.3877
     59        [36m8.0297[0m  3.3875
     60        8.0304  3.3864
     61        8.0302  3.3964
     62        8

In [None]:
# Plot training and validation loss to check for overfitting
plt.figure(figsize=(10, 6))
plt.plot(range(1, best_model.max_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, best_model.max_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Evaluate the best model on the test set
# best_model.eval()
with torch.no_grad():
    predictions = best_model.predict(X_test_tensor).squeeze()
    predictions = torch.tensor(predictions)    
    
    # Calculate RMSE, MAE, and MSE
    rmse = torch.sqrt(nn.MSELoss()(predictions, y_test_tensor)).item()
    mae = mean_absolute_error(y_test_tensor.numpy(), predictions.numpy())
    mse = mean_squared_error(y_test_tensor.numpy(), predictions.numpy())
    
    print(f"Test RMSE: {rmse:.4f}")
    print(f"Test MAE: {mae:.4f}")
    print(f"Test MSE: {mse:.4f}")