In [1]:
!nvidia-smi

Fri Dec  6 17:40:51 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060        Off |   00000000:01:00.0 Off |                  N/A |
|  0%   38C    P8             21W /  170W |      36MiB /  12288MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import warnings 
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from DataLoader import DataLoader
from model.MLP import MLP_Tuner
from model.CNNTransformer import CNNTransformer_Tuner

device = 'cuda' if torch.cuda.is_available() else 'cpu'

import kagglehub

# Download latest version
path = kagglehub.dataset_download("samiraalipour/genomics-of-drug-sensitivity-in-cancer-gdsc")

print("Path to dataset files:", path)

dataloader = DataLoader(path + '/GDSC_DATASET.csv',
                        path + '/Compounds-annotation.csv',
                        path + '/GDSC2-dataset.csv',
                        path + '/Cell_Lines_Details.xlsx')

X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, input_dim = dataloader.get_data()

Path to dataset files: /home/andrew-root/.cache/kagglehub/datasets/samiraalipour/genomics-of-drug-sensitivity-in-cancer-gdsc/versions/2
Loading Done!
Preprocess Done!
Define Done!


## DL

In [3]:
# Initialize the tuner
CNNTransformer_tuner = CNNTransformer_Tuner(input_dim)

# Tune hyperparameters
best_model = CNNTransformer_tuner.tune_hyperparameters(X_train_tensor, y_train_tensor)

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.0740[0m  2.9341
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1329[0m  2.9242
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.0846[0m  2.8205
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.0769[0m  3.2774
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1276[0m  3.7434
      2        [36m8.0265[0m  3.4934
      2        [36m8.0812[0m  3.5717
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.0866[0m  4.2404
      2        [36m8.0500[0m  3.9118
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1342[0m  4.7493
      2        [36m8.0177[0m  4.5035
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1776[0m  5.9158
      2        [36m8.0712[0m  5.3252
      3      

In [4]:
train_losses = []
val_losses = []

for epoch in range(1): # best_model.max_epochs
    best_model.partial_fit(X_train_tensor, y_train_tensor)
    train_pred = best_model.predict(X_train_tensor).squeeze()
    val_pred = best_model.predict(X_test_tensor).squeeze()
    train_loss = mean_squared_error(y_train_tensor.numpy(), train_pred)
    val_loss = mean_squared_error(y_test_tensor.numpy(), val_pred)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

     31        8.0288  3.4695
     32        8.0302  3.5436
     33        8.0286  3.5408
     34        8.0288  3.5408
     35        8.0294  3.5421
     36        8.0291  3.4661
     37        8.0286  3.5433
     38        8.0285  3.5390
     39        8.0286  3.5454
     40        8.0282  3.5453
     41        8.0303  3.5128
     42        8.0296  3.6360
     43        8.0285  3.6369
     44        8.0290  3.6365
     45        8.0290  3.6399
     46        8.0289  3.5651
     47        8.0285  3.6415
     48        8.0287  3.6379
     49        8.0289  3.6379
     50        [36m8.0274[0m  3.6395
     51        8.0284  3.5624
     52        8.0284  3.6369
     53        8.0289  3.6369
     54        8.0296  3.5649
     55        8.0285  3.7094
     56        8.0283  3.5679
     57        8.0286  3.5275
     58        8.0291  3.5876
     59        8.0288  3.5342
     60        8.0281  3.6121


In [5]:
'''# Plot training and validation loss to check for overfitting
plt.figure(figsize=(10, 6))
plt.plot(range(1, best_model.max_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, best_model.max_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()'''

"# Plot training and validation loss to check for overfitting\nplt.figure(figsize=(10, 6))\nplt.plot(range(1, best_model.max_epochs + 1), train_losses, label='Training Loss')\nplt.plot(range(1, best_model.max_epochs + 1), val_losses, label='Validation Loss')\nplt.xlabel('Epochs')\nplt.ylabel('Loss (MSE)')\nplt.title('Training and Validation Loss')\nplt.legend()\nplt.grid(True)\nplt.show()"

In [6]:
# Evaluate the best model on the test set
# best_model.eval()
with torch.no_grad():
    predictions = best_model.predict(X_test_tensor).squeeze()
    predictions = torch.tensor(predictions)    
    
    # Calculate RMSE, MAE, and MSE
    rmse = torch.sqrt(nn.MSELoss()(predictions, y_test_tensor)).item()
    mae = mean_absolute_error(y_test_tensor.numpy(), predictions.numpy())
    mse = mean_squared_error(y_test_tensor.numpy(), predictions.numpy())
    
    print(f"Test RMSE: {rmse:.4f}")
    print(f"Test MAE: {mae:.4f}")
    print(f"Test MSE: {mse:.4f}")

Test RMSE: 2.8407
Test MAE: 2.1555
Test MSE: 8.0697


Test RMSE: 2.8407

Test MAE: 2.1555

Test MSE: 8.0697