In [None]:
!nvidia-smi

In [None]:
import warnings 
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from utils import DataLoader, MLP_Tuner, CNNTransformer_Tuner

device = 'cuda' if torch.cuda.is_available() else 'cpu'

import kagglehub

# Download latest version
path = kagglehub.dataset_download("samiraalipour/genomics-of-drug-sensitivity-in-cancer-gdsc")

print("Path to dataset files:", path)

dataloader = DataLoader(path + '/GDSC_DATASET.csv',
                        path + '/Compounds-annotation.csv',
                        path + '/GDSC2-dataset.csv',
                        path + '/Cell_Lines_Details.xlsx')
dataloader.load_data()
dataloader.preprocess_data()
dataloader.define_features_and_target()

# Convert the data to PyTorch tensors
X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, input_dim = dataloader.Convert_to_tensors()

## ML

In [None]:
# Initialize the tuner
MLP_tuner = MLP_Tuner(input_dim)

# Tune hyperparameters
best_model = MLP_tuner.tune_hyperparameters(X_train_tensor, y_train_tensor)

In [None]:
# Evaluate the best model on the test set
train_losses = []
val_losses = []

for epoch in range(best_model.max_epochs):
    best_model.partial_fit(X_train_tensor, y_train_tensor)
    train_pred = best_model.predict(X_train_tensor).squeeze()
    val_pred = best_model.predict(X_test_tensor).squeeze()
    train_loss = mean_squared_error(y_train_tensor.numpy(), train_pred)
    val_loss = mean_squared_error(y_test_tensor.numpy(), val_pred)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

In [None]:
# Plot training and validation loss to check for overfitting
plt.figure(figsize=(10, 6))
plt.plot(range(1, best_model.max_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, best_model.max_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Evaluate the best model on the test set
# best_model.eval()
with torch.no_grad():
    predictions = best_model.predict(X_test_tensor).squeeze()
    predictions = torch.tensor(predictions)    
    
    # Calculate RMSE, MAE, and MSE
    rmse = torch.sqrt(nn.MSELoss()(predictions, y_test_tensor)).item()
    mae = mean_absolute_error(y_test_tensor.numpy(), predictions.numpy())
    mse = mean_squared_error(y_test_tensor.numpy(), predictions.numpy())
    
    print(f"Test RMSE: {rmse:.4f}")
    print(f"Test MAE: {mae:.4f}")
    print(f"Test MSE: {mse:.4f}")

## DL

In [None]:
# Initialize the tuner
CNNTransformer_tuner = CNNTransformer_Tuner(input_dim)

# Tune hyperparameters
best_model = CNNTransformer_tuner.tune_hyperparameters(X_train_tensor, y_train_tensor)

In [None]:
# Evaluate the best model on the test set
train_losses = []
val_losses = []

for epoch in range(best_model.max_epochs):
    best_model.partial_fit(X_train_tensor, y_train_tensor)
    train_pred = best_model.predict(X_train_tensor).squeeze()
    val_pred = best_model.predict(X_test_tensor).squeeze()
    train_loss = mean_squared_error(y_train_tensor.numpy(), train_pred)
    val_loss = mean_squared_error(y_test_tensor.numpy(), val_pred)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

In [None]:
# Plot training and validation loss to check for overfitting
plt.figure(figsize=(10, 6))
plt.plot(range(1, best_model.max_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, best_model.max_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Evaluate the best model on the test set
# best_model.eval()
with torch.no_grad():
    predictions = best_model.predict(X_test_tensor).squeeze()
    predictions = torch.tensor(predictions)    
    
    # Calculate RMSE, MAE, and MSE
    rmse = torch.sqrt(nn.MSELoss()(predictions, y_test_tensor)).item()
    mae = mean_absolute_error(y_test_tensor.numpy(), predictions.numpy())
    mse = mean_squared_error(y_test_tensor.numpy(), predictions.numpy())
    
    print(f"Test RMSE: {rmse:.4f}")
    print(f"Test MAE: {mae:.4f}")
    print(f"Test MSE: {mse:.4f}")