# Model training, validation, testing function

In [None]:
# Define Training and Validation (or Testing) Function

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import tqdm
from sklearn.metrics import mean_squared_error, root_mean_squared_error
from sklearn.metrics import accuracy_score, auc, roc_curve, f1_score
from sklearn.metrics import precision_score, recall_score, confusion_matrix, roc_auc_score
import os
import numpy as np
import pandas as pd
from datetime import datetime

def model_training_validtion_or_testing(X_train, y_train, X_valid, y_valid, 
                             model_name, hyperparameters, task_type, loss_type, optimizer_type,
                             dl_learning_rate, epochs_num, batch_size, save_records=True, testing=False):
    
    deep_learning_model_names = ["FM", "MLP", "DeepFM", "AFM", "DCN", "xDeepFM", "AutoInt", "AFN"]
    machine_learning_model_names = ["Linear", "KNN", "SVM", "DecisionTree", "RandomForest", 
                                    "AdaBoost", "XGBoost"]
    
    model = 0

    if model_name in deep_learning_model_names:
        
        if save_records:
            
            training_column_list = [
                "Timestamp", "Model_name", "Training_for", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], 
                "Task_type", "Loss_type", "Optimizer_type", "DL_learning_rate", "Epochs_num", "Batch_size", 
                "Epoch/Epochs_num", "Avg_Loss", "Avg_RMSE", "Avg_MSE", "Avg_Accuracy", 
                "Avg_AUC_score", "Avg_F1", "Avg_Precision", "Avg_Recall", "Avg_Specificity"
            ]
            validation_column_list = [
                "Timestamp", "Model_name", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], 
                "Task_type", "Loss_type", "Optimizer_type", "DL_learning_rate", "Epochs_num", "Batch_size", 
                "RMSE", "MSE", "Accuracy", "AUC_score", "F1", "Precision", "Recall", "Specificity"
            ]
            
            # Create folder if not exists
            folder_name = f"{model_name}_result_records"
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)
            
            training_file_path = os.path.join(folder_name, f'{model_name}_training_result_records.csv')
            
            # Create record file if not exists
            if not os.path.exists(training_file_path):
                empty_df = pd.DataFrame(columns=training_column_list)
                empty_df.to_csv(training_file_path, index=False, float_format='%.6f')
#             else:
#                 record_df = pd.read_csv(training_file_path)
#                 columns_to_compare = ["Model_name", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], 
#                                       "Task_type", "Loss_type", "Optimizer_type", "DL_learning_rate", "Epochs_num", "Batch_size"]
#                 record_df = record_df[columns_to_compare]
#                 record_df = record_df.values.tolist()
#                 temp_df_values = [[model_name, *hyperparameters.values(), task_type, 
#                                    loss_type, optimizer_type, dl_learning_rate, epochs_num, batch_size]]
#                 temp_df = pd.DataFrame(temp_df_values, columns=columns_to_compare)
#                 temp_df = temp_df.values.tolist()[0]
                
#                 if temp_df in record_df:
# #                     print("Parameters already exists !")
#                     return 0
            
            # If testing, then change validation results csv file path to testing results csv file path
            validation_file_path = os.path.join(folder_name, f'{model_name}_validation_result_records.csv')
            
            if testing:
                validation_file_path = os.path.join(folder_name, f'{model_name}_testing_result_records.csv')
            else: 
                validation_file_path = validation_file_path
            
            if not os.path.exists(validation_file_path):
                empty_df = pd.DataFrame(columns=validation_column_list)
                empty_df.to_csv(validation_file_path, index=False, float_format='%.6f')
            else:
                record_df = pd.read_csv(validation_file_path)
                columns_to_compare = ["Model_name", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())],
                          "Task_type", "Loss_type", "Optimizer_type", "DL_learning_rate", "Epochs_num", "Batch_size"]
                record_df = record_df[columns_to_compare]
                record_df = record_df.values.tolist()
                temp_df_values = [[model_name, *hyperparameters.values(), task_type, 
                                   loss_type, optimizer_type, dl_learning_rate, epochs_num, batch_size]]
                temp_df = pd.DataFrame(temp_df_values, columns=columns_to_compare)
                temp_df = temp_df.values.tolist()[0]

                if temp_df in record_df:
#                     print("Parameters already exists !")
                    return 0
        
        else:
            print("Training and validation results are not saved to csv !")
        
        if model_name == "FM":
            model = FactorizationMachine(**hyperparameters)
        elif model_name == "MLP":
            model = MultiLayerPerceptron(**hyperparameters)
        elif model_name == "DeepFM":
            model = DeepFM(**hyperparameters)
        elif model_name == "AFM":
            model = AttentionalFactorizationMachineModel(**hyperparameters)
        elif model_name == "DCN":
            model = DeepCrossNetworkModel(**hyperparameters)
        elif model_name == "xDeepFM":
            model = ExtremeDeepFactorizationMachineModel(**hyperparameters)
        elif model_name == "AutoInt":
            model = AutomaticFeatureInteractionModel(**hyperparameters)
        elif model_name == "AFN":
            model = AdaptiveFactorizationNetwork(**hyperparameters)
        else:
            print(f"Please choose a model in {deep_learning_model_names} !")

        # Convert to float tensor
        X_train_tensor = torch.from_numpy(X_train).float()
        Xi_train_tensor = torch.arange(X_train_tensor.size(1)).unsqueeze(0).unsqueeze(-1).repeat(X_train_tensor.size(0), 1, 1).int()
        y_train_tensor = torch.from_numpy(y_train).float()
        X_valid_tensor = torch.from_numpy(X_valid).float()
        Xi_valid_tensor = torch.arange(X_valid_tensor.size(1)).unsqueeze(0).unsqueeze(-1).repeat(X_valid_tensor.size(0), 1, 1).int()
        y_valid_tensor = torch.from_numpy(y_valid).float()

        # Convert data to DataLoader
        train_dataset = TensorDataset(Xi_train_tensor, X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        validation_dataset = TensorDataset(Xi_valid_tensor, X_valid_tensor, y_valid_tensor)
        validation_loader = DataLoader(validation_dataset, batch_size=len(validation_data), shuffle=True)

        criterion = nn.MSELoss() # Use mean squared error loss as default criterion type
#         metric_type = "RMSE" # Use RMSE as default metric type
        
        regression_loss_list = ["MSE, RMSE"]
        classification_loss_list = ["CrossEntropy", "BinaryCrossEntropy"]
#         classification_metric_list = ["Accuracy", "AUC", "ROC", "F1-score"]

        optimizer_type_list = ["Adam", "Adagrad", "RMSprop", "Adadelta", "Adamax", "Nadam"]
        optimizer = optim.Adam(model.parameters(), lr=dl_learning_rate)  # Use Adam optimizer as default type
        
        if optimizer_type == "Adam":
            optimizer = optim.Adam(model.parameters(), lr=dl_learning_rate)
        elif optimizer_type == "Adagrad":
            optimizer = optim.Adagrad(model.parameters(), lr=dl_learning_rate)
        elif optimizer_type == "RMSprop":
            optimizer = optim.RMSprop(model.parameters(), lr=dl_learning_rate)
        elif optimizer_type == "Adadelta":
            optimizer = optim.Adadelta(model.parameters(), lr=dl_learning_rate)
        elif optimizer_type == "Adamax":
            optimizer = optim.Adamax(model.parameters(), lr=dl_learning_rate)
        elif optimizer_type == "Nadam":
            optimizer = optim.Nadam(model.parameters(), lr=dl_learning_rate)
        else:
            print(f"Please select an optimizer in {optimizer_name_list}")
            return 0
        
        # Train the model
        training_results_df = []
        
        print(f'Start training {model_name} model ...')
        for epoch in range(epochs_num):
            
            total_loss = 0.0
            total_batches = 0
            
            total_rmse = 0.0
            total_mse = 0.0
            total_accuracy = 0.0
            total_auc_score = 0.0
            total_f1 = 0.0
            total_precision = 0.0
            total_recall = 0.0
            total_specificity = 0.0

            for t, (xi, x, y_true) in enumerate(train_loader):
                
                optimizer.zero_grad()
                
                y_pred = 0
                if model_name == "DeepFM":
                    y_pred = model(xi, x)
                else:    
                    y_pred = model(x)
                
                loss = 0.0
                
                rmse = 0.0
                mse = 0.0
                accuracy = 0.0
                auc_score = 0.0
                f1 = 0.0
                precision = 0.0
                recall = 0.0
                specificity = 0.0
                
                if task_type == "Regression":
                    criterion = nn.MSELoss()
                    if loss_type == "RMSE":
                        eps = 1e-6
                        if model_name == "FM":
                            loss = torch.sqrt(criterion(y_pred, y_true.view(-1, 1)) + eps)
                        else:
                            loss = torch.sqrt(criterion(y_pred, y_true.view(-1)) + eps)
                    elif loss_type == "MSE":
                        if model_name == "FM":
                            loss = criterion(y_pred, y_true.view(-1, 1))
                        else:        
                            loss = criterion(y_pred, y_true.view(-1))
                    else:
                        print(f"Please make sure loss type is in {regression_loss_list}")
                        return 0
                    
                    y_true = y_true.detach().numpy()
                    y_pred = y_pred.detach().numpy()
                    
                    rmse = root_mean_squared_error(y_true, y_pred)
                    mse = mean_squared_error(y_true, y_pred)
                    
                elif task_type == "Classification":
                    if loss_type == "BCE":
                        criterion = nn.BCELoss()
                        y_pred = torch.sigmoid(y_pred)
                    elif loss_type == "CE":
                        criterion = nn.CrossEntropyLoss()
                    else:
                        print(f"Please make sure loss type is in {classification_loss_list}")
                        return 0
                    
                    loss = criterion(y_pred, y_true)
                    
                    y_true = y_true.detach().numpy()
                    y_pred = y_pred.detach().numpy()
                    
                    accuracy = accuracy_score(y_true, y_pred) # Accuracy
                    auc_score = roc_auc_score(y_true, y_pred) # AUC
                    f1 = f1_score(y_true, y_pred) # F1-score
                    precision = precision_score(y_true, y_pred) # Precision
                    recall = recall_score(y_true, y_pred) # Recall
                    specificity = recall_score(y_true, y_pred, pos_label=0) # Specificity (True Negative Rate)
                    
                else:
                    print("Please make sure the task is regression or classification !")
                    return 0
                
                loss.backward()
                optimizer.step()

                total_loss += loss.item()
                total_batches += 1
                
                total_rmse += rmse
                total_mse += mse
                total_accuracy += accuracy
                total_auc_score += auc_score
                total_f1 += f1
                total_precision += precision
                total_recall += recall
                total_specificity += specificity
            
            avg_loss = total_loss / total_batches
            
            avg_rmse = total_rmse / total_batches
            avg_mse = total_mse / total_batches
            
            avg_accuracy = total_accuracy / total_batches
            avg_auc_score = total_auc_score / total_batches
            avg_f1 = total_f1 / total_batches
            avg_precision = total_precision / total_batches
            avg_recall = total_recall / total_batches
            avg_specificity = total_specificity / total_batches
            
            value_list = [
                datetime.now().strftime('%Y-%m-%d %H:%M:%S'), model_name, 
                "for_testing" if testing else "for_validation",
                *hyperparameters.values(), task_type, loss_type,
                optimizer_type, dl_learning_rate, epochs_num, batch_size, f"{epoch + 1}th/{epochs_num}", 
                avg_loss, avg_rmse, avg_mse, avg_accuracy, avg_auc_score, 
                avg_f1, avg_precision, avg_recall, avg_specificity
            ]
#             print(f'Epoch {epoch + 1}/{epochs_num}, Average RMSE: {average_rmse:.4f}')
            
            training_results_df.append(value_list)

        # Make predictions on validation data
        
        validation_results_df = []
        
        print("Start validating ...")
        all_predictions = []
        
        with torch.no_grad():
            for t, (xi, x, y_true) in enumerate(validation_loader):
                
                predictions = 0
                if model_name == "DeepFM":
                    predictions = model(xi, x)
                else:
                    predictions = model(x)
                
                all_predictions.append(predictions.numpy())

        # Concatenate all predictions into a single numpy array
        all_predictions = np.concatenate(all_predictions)

        # Reshape all_predictions if needed
        y_pred = all_predictions.squeeze()

        # Calculate metric values
        rmse = 0.0
        mse = 0.0
        accuracy = 0.0
        auc_score = 0.0
        f1 = 0.0
        precision = 0.0
        recall = 0.0
        specificity = 0.0
        
        y_true = y_true.detach().numpy() # Convert PyTorch tensor to NumPy array
        
        if task_type == "Regression":
            rmse = root_mean_squared_error(y_true, y_pred)
            mse = mean_squared_error(y_true, y_pred)
        elif task_type == "Classification":
            accuracy = accuracy_score(y_true, y_pred) # Accuracy
            auc_score = roc_auc_score(y_true, y_pred) # AUC
            f1 = f1_score(y_true, y_pred) # F1-score
            precision = precision_score(y_true, y_pred) # Precision
            recall = recall_score(y_true, y_pred) # Recall
            specificity = recall_score(y_true, y_pred, pos_label=0) # Specificity (True Negative Rate)
        else:
            print("Please make sure the task is regression or classification !")
            return 0
        
        value_list = [
            datetime.now().strftime('%Y-%m-%d %H:%M:%S'), model_name, *hyperparameters.values(), 
            task_type, loss_type, optimizer_type, dl_learning_rate, epochs_num, batch_size, 
            rmse, mse, accuracy, auc_score, f1, precision, recall, specificity
        ]
        
        validation_results_df.append(value_list)
#         print(f"RMSE on the validation set: {rmse}")
        
        # Save training and validation results to file
        training_records_df = pd.DataFrame(training_results_df, columns=training_column_list)
        validation_records_df = pd.DataFrame(validation_results_df, columns=validation_column_list)
        
#         output = ', '.join([f"{col} : {val}" for col, val in validation_records_df.iloc[0].items()])
#         print(output)
        
#         if save_records:
            
#             # Create folder if not exists
#             folder_name = f"{model_name}_result_records"
#             if not os.path.exists(folder_name):
#                 os.makedirs(folder_name)
            
#             training_file_path = os.path.join(folder_name, f'{model_name}_training_result_records.csv')
            
#             if not os.path.exists(training_file_path):
#                 training_records_df.to_csv(training_file_path, index=False, float_format='%.6f')
#             else:
        
        training_records_df.to_csv(training_file_path, mode='a', header=False, index=False, float_format='%.6f')
            
#             # If testing, then change validation results csv file path to testing results csv file path
#             validation_file_path = os.path.join(folder_name, f'{model_name}_validation_result_records.csv')
            
#             if testing:
#                 validation_file_path = os.path.join(folder_name, f'{model_name}_testing_result_records.csv')
#             else: 
#                 validation_file_path = validation_file_path
            
#             if not os.path.exists(validation_file_path):
#                 validation_records_df.to_csv(validation_file_path, index=False, float_format='%.6f')
#             else:
                
        validation_records_df.to_csv(validation_file_path, mode='a', header=False, index=False, float_format='%.6f')
        
#         else:
#             print("Training and validation results are not saved to csv !")

    elif model_name in machine_learning_model_names:
        
        if save_records:
            
            training_records_df = f"{model_name} has no training result records."
            validation_column_list = [
                "Timestamp", "Model_name", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], "Task_type",
                "RMSE", "MSE", "Accuracy", "AUC_score", "F1", "Precision", "Recall", "Specificity"
            ]
            
            folder_name = f"{model_name}_result_records"
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)
            
            file_path = os.path.join(folder_name, f'{model_name}_validation_result_records.csv')
            
            # If testing, then change validation results csv file path to testing results csv file path
            if testing:
                file_path = os.path.join(folder_name, f'{model_name}_testing_result_records.csv')
            else: 
                file_path = file_path
            
            if not os.path.exists(file_path):
                empty_df = pd.DataFrame(columns=validation_column_list)
                empty_df.to_csv(file_path, index=False, float_format='%.6f')
            else:
                record_df = pd.read_csv(file_path)
                columns_to_compare = ["Model_name", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], "Task_type"]
                record_df = record_df[columns_to_compare]
                record_df = record_df.values.tolist()
                temp_df_values = [[model_name, *hyperparameters.values(), task_type]]
                temp_df = pd.DataFrame(temp_df_values, columns=columns_to_compare)
                temp_df = temp_df.values.tolist()[0]
                
                if temp_df in record_df:
#                     print("Parameters already exists !")
                    return 0
                
        else:
            print("Training and validation results are not saved to csv !")
        
        if model_name == "Linear":
            model = LinearRegression() if task_type == "Regression" else LogisticRegression()
        elif model_name == "KNN":
            model = KNeighborsRegressor(**hyperparameters) if task_type == "Regression" else KNeighborsClassifier(**hyperparameters)
        elif model_name == "SVM":
            model = SVR(**hyperparameters) if task_type == "Regression" else SVC(**hyperparameters)
        elif model_name == "DecisionTree":
            model = DecisionTreeRegressor(**hyperparameters) if task_type == "Regression" else DecisionTreeClassifier(**hyperparameters)
        elif model_name == "RandomForest":
            model = RandomForestRegressor(**hyperparameters) if task_type == "Regression" else RandomForestClassifier(**hyperparameters)
        elif model_name == "AdaBoost":
            model = AdaBoostRegressor(**hyperparameters) if task_type == "Regression" else AdaBoostClassifier(**hyperparameters)
        elif model_name == "XGBoost":
            model = XGBRegressor(**hyperparameters) if task_type == "Regression" else XGBClassifier(**hyperparameters)
        else:
            print(f"Please choose a model in {machine_learning_model_names} !")

        # Train the model
        print(f'Start training {model_name} model ...')
        model.fit(X_train, y_train)

        # Make predictions on validation data
        
        validation_results_df = []
        
        print("Start validating ...")
        y_pred = model.predict(X_valid)

        # Calculate metric values
        rmse = 0.0
        mse = 0.0
        accuracy = 0.0
        auc_score = 0.0
        f1 = 0.0
        precision = 0.0
        recall = 0.0
        specificity = 0.0
        
        if task_type == "Regression":
            rmse = root_mean_squared_error(y_valid, y_pred)
            mse = mean_squared_error(y_valid, y_pred)
        elif task_type == "Classification":
            accuracy = accuracy_score(y_valid, y_pred) # Accuracy
            auc_score = roc_auc_score(y_valid, y_pred) # AUC
            f1 = f1_score(y_valid, y_pred) # F1-score
            precision = precision_score(y_valid, y_pred) # Precision
            recall = recall_score(y_valid, y_pred) # Recall
            specificity = recall_score(y_valid, y_pred, pos_label=0) # Specificity (True Negative Rate)
        else:
            print("Please make sure the task is regression or classification !")
            return 0
        
        value_list = [
            datetime.now().strftime('%Y-%m-%d %H:%M:%S'), model_name, *hyperparameters.values(), task_type, 
            rmse, mse, accuracy, auc_score, f1, precision, recall, specificity
        ]
        
        validation_results_df.append(value_list)       
#         print(f"RMSE on the validation set: {rmse}")
        
        # Save validation results to file
        validation_records_df = pd.DataFrame(validation_results_df, columns=validation_column_list)
        
#         output = ', '.join([f"{col} : {val}" for col, val in validation_records_df.iloc[0].items()])
#         print(output)
        
#         if save_records:
#             folder_name = f"{model_name}_result_records"
#             if not os.path.exists(folder_name):
#                 os.makedirs(folder_name)
            
#             file_path = os.path.join(folder_name, f'{model_name}_validation_result_records.csv')
            
#             # If testing, then change validation results csv file path to testing results csv file path
#             if testing:
#                 file_path = os.path.join(folder_name, f'{model_name}_testing_result_records.csv')
#             else: 
#                 file_path = file_path
            
#             if not os.path.exists(file_path):
#                 validation_records_df.to_csv(file_path, index=False, float_format='%.6f')
#             else:
                
        validation_records_df.to_csv(file_path, mode='a', header=False, index=False, float_format='%.6f')
        
#         else:
#             print("Training and validation results are not saved to csv !")

    else:
        return f"Please select a model in {deep_learning_model_names} and {machine_learning_model_names} !"
         
    return training_records_df, validation_records_df

# (MLflow) Model training, validation, testing function

In [None]:
# from pathlib import Path
# import nbformat

# def load_notebook(notebook_path):
#     with open(notebook_path, 'r', encoding='utf-8') as f:
#         nb = nbformat.read(f, as_version=4)
#     code_cells = [cell.source for cell in nb.cells if cell.cell_type == 'code']
#     exec('\n'.join(code_cells), globals())

## import written function and variable

# parent_directory = Path('../../../')
## parent_directory = Path('../../')
# data_preprocessing_utils_path = parent_directory / 'data_preprocessing_utils.ipynb'

# load_notebook(data_preprocessing_utils_path)

In [None]:
# Define Training and Validation (or Testing) Function with MLflow

import mlflow
from mlflow.models import infer_signature
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchinfo import summary
from torch.utils.data import DataLoader, TensorDataset
import tqdm
from sklearn.metrics import mean_squared_error, root_mean_squared_error, mean_absolute_error
from sklearn.metrics import accuracy_score, auc, roc_curve, f1_score
from sklearn.metrics import precision_score, recall_score, confusion_matrix, roc_auc_score
import os
import numpy as np
import pandas as pd
from datetime import datetime

def mlflow_model_training_validtion_or_testing(X_train, y_train, X_valid, y_valid, 
                             model_name, hyperparameters, task_type, loss_type, optimizer_type,
                             dl_learning_rate, epochs_num, batch_size, 
                             train_metric=None, model_metric=None, save_records=True, testing=False):
    
    deep_learning_model_names = ["FM", "MLP", "DeepFM", "AFM", "DCN", "xDeepFM", "AutoInt", "AFN"]
    machine_learning_model_names = ["Linear", "KNN", "SVM", "DecisionTree", "RandomForest", 
                                    "AdaBoost", "XGBoost"]
    
    phase = "test" if testing else "eval"
    model = 0
    predictions = 0 # for logging model

    if model_name in deep_learning_model_names:

        if save_records:

            training_column_list = [
                "Timestamp", "Model_name", "Training_for", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], 
                "Task_type", "Loss_type", "Optimizer_type", "DL_learning_rate", "Epochs_num", "Batch_size", 
                "Epoch/Epochs_num", "Avg_Loss", "Avg_RMSE", "Avg_MSE", "Avg_MAE", "Avg_Accuracy", 
                "Avg_AUC_score", "Avg_F1", "Avg_Precision", "Avg_Recall", "Avg_Specificity"
            ]
            validation_column_list = [
                "Timestamp", "Model_name", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], 
                "Task_type", "Loss_type", "Optimizer_type", "DL_learning_rate", "Epochs_num", "Batch_size", 
                "Epoch/Epochs_num", "Loss", "RMSE", "MSE", "MAE", "Accuracy", "AUC_score", "F1", "Precision", "Recall", "Specificity"
            ]

            # Create folder if not exists
            folder_name = f"{model_name}_result_records"
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)

            training_file_path = os.path.join(folder_name, f'{model_name}_training_result_records.csv')

            # Create record file if not exists
            if not os.path.exists(training_file_path):
                empty_df = pd.DataFrame(columns=training_column_list)
                empty_df.to_csv(training_file_path, index=False, float_format='%.6f')

            # If testing, then change validation results csv file path to testing results csv file path
            validation_file_path = os.path.join(folder_name, f'{model_name}_validation_result_records.csv')

            if testing:
                validation_file_path = os.path.join(folder_name, f'{model_name}_testing_result_records.csv')
            else: 
                validation_file_path = validation_file_path

            if not os.path.exists(validation_file_path):
                empty_df = pd.DataFrame(columns=validation_column_list)
                empty_df.to_csv(validation_file_path, index=False, float_format='%.6f')
            else:
                record_df = pd.read_csv(validation_file_path)
                columns_to_compare = ["Model_name", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())],
                          "Task_type", "Loss_type", "Optimizer_type", "DL_learning_rate", "Epochs_num", "Batch_size"]
                record_df = record_df[columns_to_compare]
                record_df = record_df.values.tolist()
                record_df = [[str(value) for value in onelist] for onelist in record_df]
                temp_df_values = [[model_name, *hyperparameters.values(), task_type, 
                                   loss_type, optimizer_type, dl_learning_rate, epochs_num, batch_size]]
                temp_df = pd.DataFrame(temp_df_values, columns=columns_to_compare)
                temp_df = temp_df.values.tolist()[0]
                temp_df = [str(value) for value in temp_df]

                if temp_df in record_df:
                    print("Parameters already exists in validation or test results !")
                    return 0
        
        else:
            print("Training and validation results are not saved to csv !")
        
        run_name=0
        if testing:
            if train_metric != None and model_metric == None:
                run_name = f"Retrained {train_metric.uppper()} params"
            elif train_metric != None and model_metric != None:
                run_name = f"Retrained {train_metric.uppper()} and {model_metric.upper()} params"
            elif train_metric == None and model_metric == None:
                run_name = None
            else:
                print("If you want to set run name, please make sure train_metric is not None !")
                return 0
        else:
            run_name = None
        
        with mlflow.start_run(run_name=run_name) as run:
        
            # Log training parameters
            params = {
                **hyperparameters,
                "task_type": task_type,
                "loss_type": loss_type,
                "optimizer_type": optimizer_type,
                "dl_learning_rate": dl_learning_rate,
                "epochs_num": epochs_num,
                "batch_size": batch_size
            }
            mlflow.log_params(params)
    
        
            if model_name == "FM":
                model = FactorizationMachine(**hyperparameters)
            elif model_name == "MLP":
                model = MultiLayerPerceptron(**hyperparameters)
            elif model_name == "DeepFM":
                model = DeepFM(**hyperparameters)
            elif model_name == "AFM":
                model = AttentionalFactorizationMachineModel(**hyperparameters)
            elif model_name == "DCN":
                model = DeepCrossNetworkModel(**hyperparameters)
            elif model_name == "xDeepFM":
                model = ExtremeDeepFactorizationMachineModel(**hyperparameters)
            elif model_name == "AutoInt":
                model = AutomaticFeatureInteractionModel(**hyperparameters)
            elif model_name == "AFN":
                model = AdaptiveFactorizationNetwork(**hyperparameters)
            else:
                print(f"Please choose a model in {deep_learning_model_names} !")
            
            # Log model summary.
            with open(f"{model_name}_model_summary.txt", "w") as f:
                f.write(str(summary(model)))
            mlflow.log_artifact(f"{model_name}_model_summary.txt")

            # Convert to float tensor
            X_train_tensor = torch.from_numpy(X_train).float()
            Xi_train_tensor = torch.arange(X_train_tensor.size(1)).unsqueeze(0).unsqueeze(-1).repeat(X_train_tensor.size(0), 1, 1).int()
            y_train_tensor = torch.from_numpy(y_train).float()
            X_valid_tensor = torch.from_numpy(X_valid).float()
            Xi_valid_tensor = torch.arange(X_valid_tensor.size(1)).unsqueeze(0).unsqueeze(-1).repeat(X_valid_tensor.size(0), 1, 1).int()
            y_valid_tensor = torch.from_numpy(y_valid).float()

            # Convert data to DataLoader
            train_dataset = TensorDataset(Xi_train_tensor, X_train_tensor, y_train_tensor)
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            validation_dataset = TensorDataset(Xi_valid_tensor, X_valid_tensor, y_valid_tensor)
            validation_loader = DataLoader(validation_dataset, batch_size=len(validation_data), shuffle=True)

            criterion = nn.MSELoss() # Use mean squared error loss as default criterion type
    #         metric_type = "RMSE" # Use RMSE as default metric type

            regression_loss_list = ["MSE, RMSE", "MAE"]
            classification_loss_list = ["CrossEntropy", "BinaryCrossEntropy"]
    #         classification_metric_list = ["Accuracy", "AUC", "ROC", "F1-score"]

            optimizer_type_list = ["Adam", "SGD", "Adagrad", "RMSprop", "Adadelta", "Adamax", "Nadam"]
            optimizer = optim.Adam(model.parameters(), lr=dl_learning_rate)  # Use Adam optimizer as default type

            if optimizer_type == "Adam":
                optimizer = optim.Adam(model.parameters(), lr=dl_learning_rate)
            elif optimizer_type == "SGD":
                optimizer = optim.SGD(model.parameters(), lr=dl_learning_rate)
            elif optimizer_type == "Adagrad":
                optimizer = optim.Adagrad(model.parameters(), lr=dl_learning_rate)
            elif optimizer_type == "RMSprop":
                optimizer = optim.RMSprop(model.parameters(), lr=dl_learning_rate)
            elif optimizer_type == "Adadelta":
                optimizer = optim.Adadelta(model.parameters(), lr=dl_learning_rate)
            elif optimizer_type == "Adamax":
                optimizer = optim.Adamax(model.parameters(), lr=dl_learning_rate)
            elif optimizer_type == "Nadam":
                optimizer = optim.Nadam(model.parameters(), lr=dl_learning_rate)
            else:
                print(f"Please select an optimizer in {optimizer_name_list}")
                return 0
            
            # Train and validate the model
            training_results_df = []
            validation_results_df = []

            print(f'Start training {model_name} model ...')
            for epoch in range(epochs_num):

                total_loss = 0.0
                total_batches = 0

                total_rmse = 0.0
                total_mse = 0.0
                total_mae = 0.0
                total_accuracy = 0.0
                total_auc_score = 0.0
                total_f1 = 0.0
                total_precision = 0.0
                total_recall = 0.0
                total_specificity = 0.0

                for t, (xi, x, y_true) in enumerate(train_loader):

                    optimizer.zero_grad()

                    y_pred = 0
                    if model_name == "DeepFM":
                        y_pred = model(xi, x)
                    elif model_name == "MLP":
                        y_pred = model(x)
                        y_pred = y_pred.view(-1)
                    else:    
                        y_pred = model(x)

                    loss = 0.0

                    rmse = 0.0
                    mse = 0.0
                    mae = 0.0
                    accuracy = 0.0
                    auc_score = 0.0
                    f1 = 0.0
                    precision = 0.0
                    recall = 0.0
                    specificity = 0.0

                    if task_type == "Regression":
                        criterion = nn.MSELoss()
                        if loss_type == "RMSE":
                            eps = 1e-6
                            if model_name == "FM":
                                loss = torch.sqrt(criterion(y_pred, y_true.view(-1, 1)) + eps)
                            else:
                                loss = torch.sqrt(criterion(y_pred, y_true.view(-1)) + eps)
                        elif loss_type == "MSE":
                            if model_name == "FM":
                                loss = criterion(y_pred, y_true.view(-1, 1))
                            else:        
                                loss = criterion(y_pred, y_true.view(-1))
                        else:
                            print(f"Please make sure loss type is in {regression_loss_list}")
                            return 0

                        y_true = y_true.detach().numpy()
                        y_pred = y_pred.detach().numpy()
                        y_pred = np.nan_to_num(y_pred, nan=0) # prevent error
                        predictions = y_pred

                        rmse = root_mean_squared_error(y_true, y_pred)
                        mse = mean_squared_error(y_true, y_pred)
                        mae = mean_absolute_error(y_true, y_pred)
                        
                        # log metrics at each step
#                         mlflow.log_metric("loss", f"{loss:4f}", step=t)
#                         mlflow.log_metric("rmse", f"{rmse:4f}", step=t)
#                         mlflow.log_metric("mse", f"{mse:4f}", step=t)

                    elif task_type == "Classification":
                        if loss_type == "BCE":
                            criterion = nn.BCELoss()
                            y_pred = torch.sigmoid(y_pred)
                        elif loss_type == "CE":
                            criterion = nn.CrossEntropyLoss()
                        else:
                            print(f"Please make sure loss type is in {classification_loss_list}")
                            return 0

                        loss = criterion(y_pred, y_true)

                        y_true = y_true.detach().numpy()
                        y_pred = y_pred.detach().numpy()
#                         y_pred = np.nan_to_num(y_pred, nan=0) # prevent error
                        predictions = y_pred # for logging model
                        
                        accuracy = accuracy_score(y_true, y_pred) # Accuracy
                        auc_score = roc_auc_score(y_true, y_pred) # AUC
                        f1 = f1_score(y_true, y_pred) # F1-score
                        precision = precision_score(y_true, y_pred) # Precision
                        recall = recall_score(y_true, y_pred) # Recall
                        specificity = recall_score(y_true, y_pred, pos_label=1) # Specificity (True Negative Rate)                        
                        
                        # log metrics at each step
#                         mlflow.log_metric("loss", f"{loss:4f}", step=t)
#                         mlflow.log_metric("accuracy", f"{accuracy:4f}", step=t)
#                         mlflow.log_metric("auc_score", f"{auc_score:4f}", step=t)
#                         mlflow.log_metric("f1", f"{f1:4f}", step=t)
#                         mlflow.log_metric("precision", f"{precision:4f}", step=t)
#                         mlflow.log_metric("recall", f"{recall:4f}", step=t)
#                         mlflow.log_metric("specificity", f"{specificity:4f}", step=t)
                        
                    else:
                        print("Please make sure the task is regression or classification !")
                        return 0

                    loss.backward()
                    optimizer.step()

                    total_loss += loss.item()
                    total_batches += 1

                    total_rmse += rmse
                    total_mse += mse
                    total_mae += mae
                    total_accuracy += accuracy
                    total_auc_score += auc_score
                    total_f1 += f1
                    total_precision += precision
                    total_recall += recall
                    total_specificity += specificity

                avg_loss = total_loss / total_batches

                avg_rmse = total_rmse / total_batches
                avg_mse = total_mse / total_batches
                avg_mae = total_mae / total_batches

                avg_accuracy = total_accuracy / total_batches
                avg_auc_score = total_auc_score / total_batches
                avg_f1 = total_f1 / total_batches
                avg_precision = total_precision / total_batches
                avg_recall = total_recall / total_batches
                avg_specificity = total_specificity / total_batches
                
                # log metrics at each epoch
                mlflow.log_metric("train_loss", f"{avg_loss:4f}", step=(epoch+1))
                mlflow.log_metric("train_rmse", f"{avg_rmse:4f}", step=(epoch+1))
                mlflow.log_metric("train_mse", f"{avg_mse:4f}", step=(epoch+1))
                mlflow.log_metric("train_mae", f"{avg_mae:4f}", step=(epoch+1))
                mlflow.log_metric("train_accuracy", f"{avg_accuracy:4f}", step=(epoch+1))
                mlflow.log_metric("train_auc_score", f"{avg_auc_score:4f}", step=(epoch+1))
                mlflow.log_metric("train_f1", f"{avg_f1:4f}", step=(epoch+1))
                mlflow.log_metric("train_precision", f"{avg_precision:4f}", step=(epoch+1))
                mlflow.log_metric("train_recall", f"{avg_recall:4f}", step=(epoch+1))
                mlflow.log_metric("train_specificity", f"{avg_specificity:4f}", step=(epoch+1))

                value_list = [
                    datetime.now().strftime('%Y-%m-%d %H:%M:%S'), model_name, 
                    "for_testing" if testing else "for_validation",
                    *hyperparameters.values(), task_type, loss_type,
                    optimizer_type, dl_learning_rate, epochs_num, batch_size, f"{epoch + 1}th/{epochs_num}", 
                    avg_loss, avg_rmse, avg_mse, avg_mae, avg_accuracy, avg_auc_score, 
                    avg_f1, avg_precision, avg_recall, avg_specificity
                ]
    #             print(f'Epoch {epoch + 1}/{epochs_num}, Average RMSE: {average_rmse:.4f}')

                training_results_df.append(value_list)

                # Make predictions on validation data --------------------------------------
                if testing:
                    print("Start testing ...")
                else:
                    print("Start validating ...")
                
#                 all_predictions = []

                y_pred = 0
                
                model.eval()
                with torch.no_grad():
                    for t, (xi, x, y_true) in enumerate(validation_loader):

#                         predictions = 0
                        if model_name == "DeepFM":
                            y_pred = model(xi, x)
                        elif model_name == "MLP":
                            y_pred = model(x)
                            y_pred = y_pred.view(-1)
                        else:
                            y_pred = model(x)

#                         all_predictions.append(y_pred.numpy())

#                 # Concatenate all predictions into a single numpy array
#                 all_predictions = np.concatenate(all_predictions)

#                 # Reshape all_predictions if needed
#                 y_pred = all_predictions.squeeze()

                # Calculate metric values
                loss = 0.0
                
                rmse = 0.0
                mse = 0.0
                mae = 0.0
                accuracy = 0.0
                auc_score = 0.0
                f1 = 0.0
                precision = 0.0
                recall = 0.0
                specificity = 0.0

                if task_type == "Regression":
                    criterion = nn.MSELoss()
                    if loss_type == "RMSE":
                        eps = 1e-6
                        if model_name == "FM":
                            loss = torch.sqrt(criterion(y_pred, y_true.view(-1, 1)) + eps)
                        else:
                            loss = torch.sqrt(criterion(y_pred, y_true.view(-1)) + eps)
                    elif loss_type == "MSE":
                        if model_name == "FM":
                            loss = criterion(y_pred, y_true.view(-1, 1))
                        else:        
                            loss = criterion(y_pred, y_true.view(-1))
                    else:
                        print(f"Please make sure loss type is in {regression_loss_list}")
                        return 0
                    
                    y_true = y_true.detach().numpy() # Convert PyTorch tensor to NumPy array
                    y_pred = y_pred.detach().numpy() # Convert PyTorch tensor to NumPy array
                    y_pred = np.nan_to_num(y_pred, nan=0) # prevent error
                    
                    rmse = root_mean_squared_error(y_true, y_pred)
                    mse = mean_squared_error(y_true, y_pred)
                    mae = mean_absolute_error(y_true, y_pred)

                elif task_type == "Classification":
                    if loss_type == "BCE":
                            criterion = nn.BCELoss()
                            y_pred = torch.sigmoid(y_pred)
                    elif loss_type == "CE":
                        criterion = nn.CrossEntropyLoss()
                    else:
                        print(f"Please make sure loss type is in {classification_loss_list}")
                        return 0

                    loss = criterion(y_pred, y_true)
                    
                    y_true = y_true.detach().numpy() # Convert PyTorch tensor to NumPy array
                    y_pred = y_pred.detach().numpy() # Convert PyTorch tensor to NumPy array
#                     y_pred = np.nan_to_num(y_pred, nan=0) # prevent error
                    
                    accuracy = accuracy_score(y_true, y_pred) # Accuracy
                    auc_score = roc_auc_score(y_true, y_pred) # AUC
                    f1 = f1_score(y_true, y_pred) # F1-score
                    precision = precision_score(y_true, y_pred) # Precision
                    recall = recall_score(y_true, y_pred) # Recall
                    specificity = recall_score(y_true, y_pred, pos_label=1) # Specificity (True Negative Rate)
                    
                else:
                    print("Please make sure the task is regression or classification !")
                    return 0

                # log metrics at each step
#                 mlflow.log_metric("eval_loss", f"{loss:4f}", step=t)
#                 mlflow.log_metric("eval_rmse", f"{rmse:4f}", step=t)
#                 mlflow.log_metric("eval_mse", f"{mse:4f}", step=t)
                    
                # log metrics at each epoch
                mlflow.log_metric(f"{phase}_loss", f"{loss:4f}", step=(epoch+1))
                mlflow.log_metric(f"{phase}_rmse", f"{rmse:4f}", step=(epoch+1))
                mlflow.log_metric(f"{phase}_mse", f"{mse:4f}", step=(epoch+1))
                mlflow.log_metric(f"{phase}_mae", f"{mae:4f}", step=(epoch+1))
                
                # log metrics at each step
#                 mlflow.log_metric("eval_loss", f"{loss:4f}", step=t)
#                 mlflow.log_metric("eval_accuracy", f"{accuracy:4f}", step=t)
#                 mlflow.log_metric("eval_auc_score", f"{auc_score:4f}", step=t)
#                 mlflow.log_metric("eval_f1", f"{f1:4f}", step=t)
#                 mlflow.log_metric("eval_precision", f"{precision:4f}", step=t)
#                 mlflow.log_metric("eval_recall", f"{recall:4f}", step=t)
#                 mlflow.log_metric("eval_specificity", f"{specificity:4f}", step=t)

                # log metrics at each epoch
#                 mlflow.log_metric("eval_loss", f"{loss:4f}", step=(epoch+1))
                mlflow.log_metric(f"{phase}_accuracy", f"{accuracy:4f}", step=(epoch+1))
                mlflow.log_metric(f"{phase}_auc_score", f"{auc_score:4f}", step=(epoch+1))
                mlflow.log_metric(f"{phase}_f1", f"{f1:4f}", step=(epoch+1))
                mlflow.log_metric(f"{phase}_precision", f"{precision:4f}", step=(epoch+1))
                mlflow.log_metric(f"{phase}_recall", f"{recall:4f}", step=(epoch+1))
                mlflow.log_metric(f"{phase}_specificity", f"{specificity:4f}", step=(epoch+1))
                
                value_list = [
                    datetime.now().strftime('%Y-%m-%d %H:%M:%S'), model_name, *hyperparameters.values(), 
                    task_type, loss_type, optimizer_type, dl_learning_rate, epochs_num, batch_size, 
                    f"{epoch + 1}th/{epochs_num}", loss, rmse, mse, mae, accuracy, auc_score, f1, precision, 
                    recall, specificity
                ]

                validation_results_df.append(value_list)
        #         print(f"RMSE on the validation set: {rmse}")

            # Save training and validation results to file
            training_records_df = pd.DataFrame(training_results_df, columns=training_column_list)
            validation_records_df = pd.DataFrame(validation_results_df, columns=validation_column_list)

            training_records_df.to_csv(training_file_path, mode='a', header=False, index=False, float_format='%.6f')
            validation_records_df.to_csv(validation_file_path, mode='a', header=False, index=False, float_format='%.6f')
                
            # Save the trained model to MLflow.
            input_example = X_train[0]
            signatures = infer_signature(input_example, predictions)
            mlflow.pytorch.log_model(model, f"{model_name}_model", signature=signatures)
#             mlflow.pytorch.log_model(model, f"{model_name}_model")
                
    elif model_name in machine_learning_model_names:

        if save_records:

#             training_records_df = f"{model_name} has no training result records."
            training_column_list = [
                "Timestamp", "Model_name", "Training_for", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], "Task_type",
                "RMSE", "MSE", "MAE", "Accuracy", "AUC_score", "F1", "Precision", "Recall", "Specificity"
            ]
            validation_column_list = [
                "Timestamp", "Model_name", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], "Task_type",
                "RMSE", "MSE", "MAE", "Accuracy", "AUC_score", "F1", "Precision", "Recall", "Specificity"
            ]

            folder_name = f"{model_name}_result_records"
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)
            
            training_file_path = os.path.join(folder_name, f'{model_name}_training_result_records.csv')

            # Create record file if not exists
            if not os.path.exists(training_file_path):
                empty_df = pd.DataFrame(columns=training_column_list)
                empty_df.to_csv(training_file_path, index=False, float_format='%.6f')
            
            validation_file_path = os.path.join(folder_name, f'{model_name}_validation_result_records.csv')

            # If testing, then change validation results csv file path to testing results csv file path
            if testing:
                validation_file_path = os.path.join(folder_name, f'{model_name}_testing_result_records.csv')
            else: 
                validation_file_path = validation_file_path

            if not os.path.exists(validation_file_path):
                empty_df = pd.DataFrame(columns=validation_column_list)
                empty_df.to_csv(validation_file_path, index=False, float_format='%.6f')
            else:
                record_df = pd.read_csv(validation_file_path)
                columns_to_compare = ["Model_name", *[f"{model_name}_{key}" for key in list(hyperparameters.keys())], "Task_type"]
                record_df = record_df[columns_to_compare]
                record_df = record_df.values.tolist()
                record_df = [[str(value) for value in onelist] for onelist in record_df]
                temp_df_values = [[model_name, *hyperparameters.values(), task_type]]
                temp_df = pd.DataFrame(temp_df_values, columns=columns_to_compare)
                temp_df = temp_df.values.tolist()[0]
                temp_df = [str(value) for value in temp_df]

                if temp_df in record_df:
                    print("Parameters already exists in validation or test results !")
                    return 0

        else:
            print("Training and validation results are not saved to csv !")

        if model_name == "Linear":
            model = LinearRegression() if task_type == "Regression" else LogisticRegression()
        elif model_name == "KNN":
            model = KNeighborsRegressor(**hyperparameters) if task_type == "Regression" else KNeighborsClassifier(**hyperparameters)
        elif model_name == "SVM":
            model = SVR(**hyperparameters) if task_type == "Regression" else SVC(**hyperparameters)
        elif model_name == "DecisionTree":
            model = DecisionTreeRegressor(**hyperparameters) if task_type == "Regression" else DecisionTreeClassifier(**hyperparameters)
        elif model_name == "RandomForest":
            model = RandomForestRegressor(**hyperparameters) if task_type == "Regression" else RandomForestClassifier(**hyperparameters)
        elif model_name == "AdaBoost":
            model = AdaBoostRegressor(**hyperparameters) if task_type == "Regression" else AdaBoostClassifier(**hyperparameters)
        elif model_name == "XGBoost":
            model = XGBRegressor(**hyperparameters) if task_type == "Regression" else XGBClassifier(**hyperparameters)
        else:
            print(f"Please choose a model in {machine_learning_model_names} !")
        
        run_name=0
        if testing:
            if train_metric != None and model_metric == None:
                run_name = f"Retrained {train_metric.uppper()} params"
            elif train_metric != None and model_metric != None:
                run_name = f"Retrained {train_metric.uppper()} and {model_metric.upper()} params"
            elif train_metric == None and model_metric == None:
                run_name = None
            else:
                print("If you want to set run name, please make sure train_metric is not None !")
                return 0
        else:
            run_name = None
        
        with mlflow.start_run(run_name=run_name) as run:

            # Log training parameters
            params = {
                **hyperparameters,
                "task_type": task_type,
                "loss_type": loss_type,
                "optimizer_type": optimizer_type,
                "dl_learning_rate": dl_learning_rate,
                "epochs_num": epochs_num,
                "batch_size": batch_size
            }
            mlflow.log_params(params)

            # Train the model
            print(f'Start training {model_name} model ...')
            model.fit(X_train, y_train)

            # Make predictions on training data
            training_results_df = []
            y_pred = model.predict(X_train)
            y_pred = np.nan_to_num(y_pred, nan=0) # prevent error
            predictions = y_pred # for logging model
            
            # Calculate metric values
            rmse = 0.0
            mse = 0.0
            mae = 0.0
            accuracy = 0.0
            auc_score = 0.0
            f1 = 0.0
            precision = 0.0
            recall = 0.0
            specificity = 0.0

            if task_type == "Regression":
                rmse = root_mean_squared_error(y_train, y_pred)
                mse = mean_squared_error(y_train, y_pred)
                mae = mean_absolute_error(y_train, y_pred)
            elif task_type == "Classification":
                accuracy = accuracy_score(y_train, y_pred) # Accuracy
                auc_score = roc_auc_score(y_train, y_pred) # AUC
                f1 = f1_score(y_train, y_pred) # F1-score
                precision = precision_score(y_train, y_pred) # Precision
                recall = recall_score(y_train, y_pred) # Recall
                specificity = recall_score(y_train, y_pred, pos_label=1) # Specificity (True Negative Rate)
            else:
                print("Please make sure the task is regression or classification !")
                return 0

            mlflow.log_metric("train_rmse", f"{rmse:4f}")
            mlflow.log_metric("train_mse", f"{mse:4f}")
            mlflow.log_metric("train_mae", f"{mae:4f}")
            mlflow.log_metric("train_accuracy", f"{accuracy:4f}")
            mlflow.log_metric("train_auc_score", f"{auc_score:4f}")
            mlflow.log_metric("train_f1", f"{f1:4f}")
            mlflow.log_metric("train_precision", f"{precision:4f}")
            mlflow.log_metric("train_recall", f"{recall:4f}")
            mlflow.log_metric("train_specificity", f"{specificity:4f}")

            value_list = [
                datetime.now().strftime('%Y-%m-%d %H:%M:%S'), model_name,
                "for_testing" if testing else "for_validation",
                *hyperparameters.values(), task_type, 
                rmse, mse, mae, accuracy, auc_score, f1, precision, recall, specificity
            ]

            training_results_df.append(value_list)       
    #         print(f"RMSE on the validation set: {rmse}")

            # Save training results to file
            training_records_df = pd.DataFrame(training_results_df, columns=training_column_list)
            training_records_df.to_csv(training_file_path, mode='a', header=False, index=False, float_format='%.6f')

            # Make predictions on validation data -------------------------------------
            validation_results_df = []

            if testing:
                print("Start testing ...")
            else:
                print("Start validating ...")
                    
            y_pred = model.predict(X_valid)
            y_pred = np.nan_to_num(y_pred, nan=0) # prevent error
            # Calculate metric values
            rmse = 0.0
            mse = 0.0
            mae = 0.0
            accuracy = 0.0
            auc_score = 0.0
            f1 = 0.0
            precision = 0.0
            recall = 0.0
            specificity = 0.0

            if task_type == "Regression":
                rmse = root_mean_squared_error(y_valid, y_pred)
                mse = mean_squared_error(y_valid, y_pred)
                mae = mean_absolute_error(y_valid, y_pred)
            elif task_type == "Classification":
                accuracy = accuracy_score(y_valid, y_pred) # Accuracy
                auc_score = roc_auc_score(y_valid, y_pred) # AUC
                f1 = f1_score(y_valid, y_pred) # F1-score
                precision = precision_score(y_valid, y_pred) # Precision
                recall = recall_score(y_valid, y_pred) # Recall
                specificity = recall_score(y_valid, y_pred, pos_label=1) # Specificity (True Negative Rate)
            else:
                print("Please make sure the task is regression or classification !")
                return 0

            mlflow.log_metric(f"{phase}_rmse", f"{rmse:4f}")
            mlflow.log_metric(f"{phase}_mse", f"{mse:4f}")
            mlflow.log_metric(f"{phase}_mae", f"{mae:4f}")
            mlflow.log_metric(f"{phase}_accuracy", f"{accuracy:4f}")
            mlflow.log_metric(f"{phase}_auc_score", f"{auc_score:4f}")
            mlflow.log_metric(f"{phase}_f1", f"{f1:4f}")
            mlflow.log_metric(f"{phase}_precision", f"{precision:4f}")
            mlflow.log_metric(f"{phase}_recall", f"{recall:4f}")
            mlflow.log_metric(f"{phase}_specificity", f"{specificity:4f}")

            value_list = [
                datetime.now().strftime('%Y-%m-%d %H:%M:%S'), model_name, *hyperparameters.values(), task_type, 
                rmse, mse, mae, accuracy, auc_score, f1, precision, recall, specificity
            ]

            validation_results_df.append(value_list)       
    #         print(f"RMSE on the validation set: {rmse}")

            # Save validation results to file
            validation_records_df = pd.DataFrame(validation_results_df, columns=validation_column_list)
            validation_records_df.to_csv(validation_file_path, mode='a', header=False, index=False, float_format='%.6f')
            
            input_example = X_train[0]
            signatures = infer_signature(input_example, predictions)
            # Save the trained model to MLflow.
            if model_name in ["Linear", "KNN", "SVM", "DecisionTree", "RandomForest", "AdaBoost"]:
                mlflow.sklearn.log_model(model, f"{model_name}_model", signature=signatures)
#                 mlflow.sklearn.log_model(model, f"{model_name}_model")
            elif model_name == "XGBoost":
                mlflow.xgboost.log_model(model, f"{model_name}_model", signature=signatures)
#                 mlflow.xgboost.log_model(model, f"{model_name}_model")
            elif model_name == "CatBoost":
                mlflow.catboost.log_model(model, f"{model_name}_model", signature=signatures)
#                 mlflow.catboost.log_model(model, f"{model_name}_model")
            else:
                print(f"Model not in {machine_learning_model_names}, couldn't save the model !")
            
    else:
        return f"Please select a model in {deep_learning_model_names} and {machine_learning_model_names} !"

    # Save the trained model to MLflow.
    # mlflow.pytorch.log_model(model, "model")

    return training_records_df, validation_records_df