In [1]:
import gc, csv
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from DataLoader import DataLoader
from model.MLP import MLP_Tuner
from ML_model import DecisionTree_Tuner, CatBoost_Tuner, ElasticNet_Tuner, HistGradientBoosting_Tuner, KNN_Tuner, \
    Lasso_Tuner, LGBM_Tuner, LinearRegression_Tuner, RF_Tuner, Ridge_Tuner, SVR_Tuner, XGBRegressor_Tuner

device = 'cuda' if torch.cuda.is_available() else 'cpu'

import kagglehub

# Download latest version
path = kagglehub.dataset_download("samiraalipour/genomics-of-drug-sensitivity-in-cancer-gdsc")

print("Path to dataset files:", path)

dataloader = DataLoader(path + '/GDSC_DATASET.csv',
                        path + '/Compounds-annotation.csv',
                        path + '/GDSC2-dataset.csv',
                        path + '/Cell_Lines_Details.xlsx')

X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, input_dim = dataloader.get_data()

'''queue = [MLP_Tuner, DecisionTree_Tuner, CatBoost_Tuner, ElasticNet_Tuner, HistGradientBoosting_Tuner, KNN_Tuner, \
    Lasso_Tuner, LGBM_Tuner, LinearRegression_Tuner, RF_Tuner, Ridge_Tuner, SVR_Tuner, XGBRegressor_Tuner]'''
    
queue = [XGBRegressor_Tuner]

Path to dataset files: /home/andrew-root/.cache/kagglehub/datasets/samiraalipour/genomics-of-drug-sensitivity-in-cancer-gdsc/versions/2
Loading Done!
Preprocess Done!
Define Done!


In [2]:
experimental_result = []

for i in range(len(queue)):
    # Initialize the tuner
    print("************************ " + str(queue[i]) + " ************************")
    tuner = queue[i](input_dim)

    # Tune hyperparameters
    best_model = tuner.tune_hyperparameters(X_train_tensor, y_train_tensor)
    
    # Evaluate the best model on the test set
    # train_losses = []
    # val_losses = []

    for epoch in range(best_model.max_epochs):
        best_model.partial_fit(X_train_tensor, y_train_tensor)
        train_pred = best_model.predict(X_train_tensor).squeeze()
        val_pred = best_model.predict(X_test_tensor).squeeze()
        '''train_loss = mean_squared_error(y_train_tensor.numpy(), train_pred)
        val_loss = mean_squared_error(y_test_tensor.numpy(), val_pred)
        train_losses.append(train_loss)
        val_losses.append(val_loss)'''
        
    with torch.no_grad():
        predictions = best_model.predict(X_test_tensor).squeeze()
        predictions = torch.tensor(predictions)    
    
        # Calculate RMSE, MAE, and MSE
        rmse = torch.sqrt(nn.MSELoss()(predictions, y_test_tensor)).item()
        mae = mean_absolute_error(y_test_tensor.numpy(), predictions.numpy())
        mse = mean_squared_error(y_test_tensor.numpy(), predictions.numpy())
        
        print(f"Test RMSE: {rmse:.4f}")
        print(f"Test MAE: {mae:.4f}")
        print(f"Test MSE: {mse:.4f}")
        
        experimental_result.append({"model": str(queue[i]), "rmse": rmse, "mae": mae, "mse": mse})

# 指定輸出檔案名稱
csv_filename = "model_results.csv"

# 將結果寫入 CSV 檔案
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["model", "rmse", "mae", "mse"])
    writer.writeheader()  # 寫入標題
    writer.writerows(experimental_result)  # 寫入每行數據

print(f"CSV file '{csv_filename}' has been created!")

gc.collect()

************************ <class 'ML_model.XGBRegressor_Tuner'> ************************
Best Parameters: {'colsample_bytree': 1.0, 'learning_rate': 0.2, 'max_depth': 10, 'n_estimators': 300, 'subsample': 0.6}
Best Score: -0.04561541477839152


AttributeError: 'tuple' object has no attribute 'max_epochs'

In [None]:
# Initialize the tuner
MLP_tuner = MLP_Tuner(input_dim)

# Tune hyperparameters
best_model = MLP_tuner.tune_hyperparameters(X_train_tensor, y_train_tensor)

In [None]:
# Evaluate the best model on the test set
train_losses = []
val_losses = []

for epoch in range(best_model.max_epochs):
    best_model.partial_fit(X_train_tensor, y_train_tensor)
    train_pred = best_model.predict(X_train_tensor).squeeze()
    val_pred = best_model.predict(X_test_tensor).squeeze()
    train_loss = mean_squared_error(y_train_tensor.numpy(), train_pred)
    val_loss = mean_squared_error(y_test_tensor.numpy(), val_pred)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

In [None]:
'''# Plot training and validation loss to check for overfitting
plt.figure(figsize=(10, 6))
plt.plot(range(1, best_model.max_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, best_model.max_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()'''

In [None]:
# Evaluate the best model on the test set
# best_model.eval()
with torch.no_grad():
    predictions = best_model.predict(X_test_tensor).squeeze()
    predictions = torch.tensor(predictions)    
    
    # Calculate RMSE, MAE, and MSE
    rmse = torch.sqrt(nn.MSELoss()(predictions, y_test_tensor)).item()
    mae = mean_absolute_error(y_test_tensor.numpy(), predictions.numpy())
    mse = mean_squared_error(y_test_tensor.numpy(), predictions.numpy())
    
    print(f"Test RMSE: {rmse:.4f}")
    print(f"Test MAE: {mae:.4f}")
    print(f"Test MSE: {mse:.4f}")