In [1]:
!nvidia-smi

Fri Dec  6 16:16:11 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060        Off |   00000000:01:00.0  On |                  N/A |
| 53%   48C    P2             50W /  170W |     194MiB /  12288MiB |     11%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import warnings 
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from DataLoader import DataLoader
from model.MLP import MLP_Tuner
from model.CNNTransformer import CNNTransformer_Tuner

device = 'cuda' if torch.cuda.is_available() else 'cpu'

import kagglehub

# Download latest version
path = kagglehub.dataset_download("samiraalipour/genomics-of-drug-sensitivity-in-cancer-gdsc")

print("Path to dataset files:", path)

dataloader = DataLoader(path + '/GDSC_DATASET.csv',
                        path + '/Compounds-annotation.csv',
                        path + '/GDSC2-dataset.csv',
                        path + '/Cell_Lines_Details.xlsx')

X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, input_dim = dataloader.get_data()

Path to dataset files: /home/andrew-root/.cache/kagglehub/datasets/samiraalipour/genomics-of-drug-sensitivity-in-cancer-gdsc/versions/2
Loading Done!
Preprocess Done!
Define Done!


## DL

In [3]:
# Initialize the tuner
CNNTransformer_tuner = CNNTransformer_Tuner(input_dim)

# Tune hyperparameters
best_model = CNNTransformer_tuner.tune_hyperparameters(X_train_tensor, y_train_tensor)

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.0902[0m  1.8236
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.1320[0m  1.9274
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m8.0729[0m  2.0535
      2        [36m8.0268[0m  1.7980
      2        [36m8.0789[0m  1.8036
      2        [36m8.0364[0m  1.8255
      3        [36m8.0157[0m  1.8203
      3        8.0877  1.8416
      3        [36m8.0319[0m  1.8199
      4        8.0188  1.8159
      4        [36m8.0742[0m  1.8158
      4        8.0353  1.8167
      5        [36m8.0154[0m  1.7995
      5        8.0775  1.8386
      5        [36m8.0286[0m  1.8305
      6        [36m8.0153[0m  1.8668
      6        [36m8.0695[0m  1.8545
      6        8.0312  1.8245
      7        8.0168  1.7975
      7        8.0789  1.8286
      7        [36m8.0281[0m  1.8240
      8        8.0185  1.8469
      8        8.0704  1.8440
    

In [None]:
train_losses = []
val_losses = []

for epoch in range(1): # best_model.max_epochs
    best_model.partial_fit(X_train_tensor, y_train_tensor)
    train_pred = best_model.predict(X_train_tensor).squeeze()
    val_pred = best_model.predict(X_test_tensor).squeeze()
    train_loss = mean_squared_error(y_train_tensor.numpy(), train_pred)
    val_loss = mean_squared_error(y_test_tensor.numpy(), val_pred)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

     31        8.0320  3.3407
     32        8.0333  3.3361
     33        8.0327  3.3384
     34        8.0315  3.2540
     35        8.0324  3.3078
     36        8.0317  3.3232
     37        8.0320  3.3226
     38        [36m8.0301[0m  3.3075
     39        8.0334  3.2392
     40        8.0318  3.3031
     41        8.0322  3.3073
     42        8.0332  3.2930
     43        8.0314  3.3637
     44        8.0404  3.3733
     45        8.0312  3.3510
     46        8.0314  3.2920
     47        8.0328  3.1518
     48        8.0357  3.3066
     49        8.0311  3.2277
     50        8.0323  3.3452
     51        8.0321  3.2586
     52        8.0312  3.2149
     53        8.0315  3.3725


In [None]:
'''# Plot training and validation loss to check for overfitting
plt.figure(figsize=(10, 6))
plt.plot(range(1, best_model.max_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, best_model.max_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()'''

In [None]:
# Evaluate the best model on the test set
# best_model.eval()
with torch.no_grad():
    predictions = best_model.predict(X_test_tensor).squeeze()
    predictions = torch.tensor(predictions)    
    
    # Calculate RMSE, MAE, and MSE
    rmse = torch.sqrt(nn.MSELoss()(predictions, y_test_tensor)).item()
    mae = mean_absolute_error(y_test_tensor.numpy(), predictions.numpy())
    mse = mean_squared_error(y_test_tensor.numpy(), predictions.numpy())
    
    print(f"Test RMSE: {rmse:.4f}")
    print(f"Test MAE: {mae:.4f}")
    print(f"Test MSE: {mse:.4f}")