# Logistic Regression Model

In [5]:
import numpy as np

def calculate_iqs(true_genotypes, imputed_dosages):
    # Convert imputed dosages to discrete values
    imputed_discrete = np.round(imputed_dosages).astype(int)

    # Clip the imputed discrete values to be within the range of 0 to 2
    imputed_discrete = np.clip(imputed_discrete, 0, 2)

    # Create a contingency table
    contingency_table = np.zeros((3, 3), dtype=int)

    # Fill the contingency table
    for true_geno, imputed_geno in zip(true_genotypes, imputed_discrete):
        for true_allele, imputed_allele in zip(true_geno, imputed_geno):
            contingency_table[int(true_allele), int(imputed_allele)] += 1

    # Calculate the total number of genotypes
    total_genotypes = np.sum(contingency_table)

    # Calculate observed proportion of agreement (Po)
    observed_agreement = np.trace(contingency_table) / total_genotypes

    # Calculate marginal sums
    row_marginals = np.sum(contingency_table, axis=1)
    col_marginals = np.sum(contingency_table, axis=0)

    # Calculate chance agreement (Pc)
    chance_agreement = np.sum((row_marginals * col_marginals) / (total_genotypes ** 2))

    # Calculate IQS
    if chance_agreement == 1:  # To prevent division by zero in case of perfect chance agreement
        iqs_score = 0
    else:
        iqs_score = (observed_agreement - chance_agreement) / (1 - chance_agreement)

    return iqs_score

# Example usage:
true_genotypes = np.array([[0, 1, 2], [1, 2, 0], [2, 0, 1]])
imputed_dosages = np.array([[0.1, 1.2, 1.9], [1.0, 1.8, 0.3], [2.0, 0.5, 1.4]])

iqs_score = calculate_iqs(true_genotypes, imputed_dosages)
print(f"IQS Score: {iqs_score}")


IQS Score: 1.0


In [7]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve
from sklearn.metrics import r2_score as sklearn_r2_score
import optuna
from matplotlib import pyplot as plt
import os
import csv

# Load and preprocess the data
data_directory = '../../../Data/Filtered_split_training_data/'
start = 1

# Initialize lists to store the performance metrics for each chromosome
accuracies = []
precisions = []
recalls = []
false_positive_rates = []
auc_rocs = []
r2_scores = []
iqs_scores = []

# Create folders for saving files
output_folder = "../../../Data/model_results/logistic_regression/"
model_folder = output_folder + "models/"
csv_folder = output_folder + "csv_files/"
curve_folder = output_folder + "roc_curves/"

os.makedirs(model_folder, exist_ok=True)
os.makedirs(csv_folder, exist_ok=True)
os.makedirs(curve_folder, exist_ok=True)

for chromosome_number in range(start, 23):
    # Create subfolders for the current chromosome
    chr_model_folder = model_folder + f"chr{chromosome_number}/"
    chr_csv_folder = csv_folder + f"chr{chromosome_number}/"
    chr_curve_folder = curve_folder + f"chr{chromosome_number}/"

    os.makedirs(chr_model_folder, exist_ok=True)
    os.makedirs(chr_csv_folder, exist_ok=True)
    os.makedirs(chr_curve_folder, exist_ok=True)

    file_name = data_directory + f"23AndMe_PRS313_merged_chr{chromosome_number}_matching_split.parquet"
    data = pd.read_parquet(file_name)


    # # Split the data into features and target
    # X = torch.tensor(data.filter(regex='^(?!.*Unknown)').values, dtype=torch.float32)
    # y = torch.tensor(data.filter(regex='Unknown').values, dtype=torch.float32)

    # print("Unknown PRS313 SNPs: ", y.shape[1])
    # print("Known PRS313 SNPs: ", data[[col for col in data.columns if "PRS313_Known" in col]].shape[1])
    # print("23AndMe SNPs with LD to Unknown PRS313 SNPs: ", data[[col for col in data.columns if "PRS313_" not in col]].shape[1])
    # print("Total SNPs used for Training: ", X.shape[1])

    # Split the data into features and target
    X = torch.tensor(data.filter(regex='^(?!.*PRS313_)').values, dtype=torch.float32)
    y = torch.tensor(data.filter(regex='PRS313_').values, dtype=torch.float32)

    print("Total SNPs: ", data.shape[1])
    print("PRS313 SNPs: ", y.shape[1])
    print("Total SNPs used for Training: ", X.shape[1])


    # Split the data into train-validation and test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the logistic regression model with lasso regularization
    class LogisticRegression(nn.Module):
        def __init__(self, input_dim, output_dim, l1_coef=0.0):
            super(LogisticRegression, self).__init__()
            self.linear = nn.Linear(input_dim, output_dim)
            self.sigmoid = nn.Sigmoid()
            self.l1_coef = l1_coef

        def forward(self, x):
            out = self.linear(x)
            out = self.sigmoid(out)
            return out

        def l1_loss(self):
            return self.l1_coef * torch.norm(self.linear.weight, p=1)
        
    # Set the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Set the hyperparameters for tuning
    input_dim = X_train_val.shape[1]
    output_dim = y_train_val.shape[1]
    num_epochs = 500
    batch_size = 128

    # Define the objective function for Optuna with cross-validation and early stopping
    def objective(trial):
        learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
        l1_coef = trial.suggest_float('l1_coef', 1e-5, 1e-1, log=True)
        patience = trial.suggest_int('patience', 5, 20)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

        model = LogisticRegression(input_dim, output_dim, l1_coef).to(device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.BCELoss()
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=False)

        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        fold_losses = []

        for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_val, y_train_val.argmax(dim=1))):
            X_train, X_val = X_train_val[train_idx], X_train_val[val_idx]
            y_train, y_val = y_train_val[train_idx], y_train_val[val_idx]

            train_dataset = TensorDataset(X_train, y_train)
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

            best_val_loss = float('inf')
            counter = 0

            for epoch in range(num_epochs):
                train_loss = 0.0
                for batch_X, batch_y in train_loader:
                    batch_X, batch_y = batch_X.to(device), batch_y.to(device)

                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y) + model.l1_loss()

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    train_loss += loss.item()

                train_loss /= len(train_loader)

                val_dataset = TensorDataset(X_val, y_val)
                val_loader = DataLoader(val_dataset, batch_size=batch_size)

                with torch.no_grad():
                    val_loss = 0.0
                    for batch_X, batch_y in val_loader:
                        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                        outputs = model(batch_X)
                        loss = criterion(outputs, batch_y) + model.l1_loss()
                        val_loss += loss.item()

                    val_loss /= len(val_loader)
                    scheduler.step(val_loss)

                    if val_loss < best_val_loss:
                        best_val_loss = val_loss
                        counter = 0
                    else:
                        counter += 1

                    if counter >= patience:
                        # print(f"Early stopping at epoch {epoch+1}")
                        break

            fold_losses.append(best_val_loss)

        return np.mean(fold_losses)

    # Create the "optuna_studies" folder if it doesn't exist
    os.makedirs("optuna_studies", exist_ok=True)

    # Create an Optuna study and optimize the hyperparameters
    study_name = f"chr{chromosome_number}_study"
    storage_name = f"sqlite:///optuna_studies/{study_name}.db"

    # Check if the study exists

    current_dir = os.getcwd()
    study_exists = os.path.exists(current_dir + f"/optuna_studies/{study_name}.db")
    
    if study_exists:
        # Load the existing study
        study = optuna.load_study(study_name=study_name, storage=storage_name)
    else:
        # Create a new study
        study = optuna.create_study(direction='minimize', study_name=study_name, storage=storage_name)

    study.optimize(objective, n_trials=10, n_jobs=-1)

    # Print the best hyperparameters and best value
    print(f"Chr {chromosome_number} - Best hyperparameters: {study.best_params}")
    print(f"Chr {chromosome_number} - Best value: {study.best_value:.4f}")

    # Train the final model with the best hyperparameters and early stopping
    best_learning_rate = study.best_params['learning_rate']
    best_l1_coef = study.best_params['l1_coef']
    best_patience = study.best_params['patience']
    best_batch_size = study.best_params['batch_size']

    model = LogisticRegression(input_dim, output_dim, best_l1_coef).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_learning_rate)
    criterion = nn.BCELoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=False)

    train_dataset = TensorDataset(X_train_val, y_train_val)
    train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)

    best_train_loss = float('inf')
    counter = 0

    for epoch in range(num_epochs):
        train_loss = 0.0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            outputs = model(batch_X)
            loss = criterion(outputs, batch_y) + model.l1_loss()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}")

        if train_loss < best_train_loss:
            best_train_loss = train_loss
            counter = 0
        else:
            counter += 1

        if counter >= best_patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        scheduler.step(train_loss)

    # Save the final model
    model_save_path = chr_model_folder + f'final_model_chr{chromosome_number}.pth'
    torch.save(model.state_dict(), model_save_path)
    print(f"Final model saved at: {model_save_path}")

    # Evaluate the final model on the test set
    with torch.no_grad():
        test_outputs = model(X_test.to(device))
        test_preds = (test_outputs > 0.5).float()
        test_accuracy = float(((test_preds > 0.5) == y_test).float().mean())
        test_precision = precision_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_recall = recall_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_f1 = f1_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_roc_auc = roc_auc_score(y_test.cpu().numpy(), test_outputs.cpu().numpy(), average='micro')
        test_r2 = sklearn_r2_score(y_test.cpu().numpy(), test_outputs.cpu().numpy())
        test_iqs = calculate_iqs(y_test.cpu().numpy(), test_outputs.cpu().numpy())

        # Calculate false positive rate
        cm = confusion_matrix(y_test.cpu().numpy().ravel(), test_preds.cpu().numpy().ravel())
        tn, fp, fn, tp = cm.ravel()
        test_fpr = fp / (fp + tn)

        # Append performance metrics to the lists
        accuracies.append(test_accuracy)
        precisions.append(test_precision)
        recalls.append(test_recall)
        false_positive_rates.append(test_fpr)
        auc_rocs.append(test_roc_auc)
        r2_scores.append(test_r2)
        iqs_scores.append(test_iqs)

        # Calculate individual R^2 scores for each SNP
        individual_r2_scores = sklearn_r2_score(y_test.cpu().numpy(), test_outputs.cpu().numpy(), multioutput='raw_values')

        # Calculate individual IQS scores for each SNP
        individual_iqs_scores = np.array([calculate_iqs(y_test.cpu().numpy()[:, i].reshape(-1, 1), test_outputs.cpu().numpy()[:, i].reshape(-1, 1)) for i in range(y_test.shape[1])])

        # Get the names of the SNPs from the original dataframe
        snp_names = data.filter(regex='Unknown').columns

        # Save individual R^2 scores to a CSV file
        csv_file = chr_csv_folder + f'individual_r2_scores_chr{chromosome_number}.csv'

        with open(csv_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['SNP', 'R2 Score'])
            for snp, r2_score in zip(snp_names, individual_r2_scores):
                writer.writerow([snp, r2_score])

        print(f"Individual R^2 scores saved at: {csv_file}")

        # Save individual IQS scores to a CSV file
        iqs_csv_file = chr_csv_folder + f'individual_iqs_scores_chr{chromosome_number}.csv'

        with open(iqs_csv_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['SNP', 'IQS Score'])
            for snp, iqs_score in zip(snp_names, individual_iqs_scores):
                writer.writerow([snp, iqs_score])

        print(f"Individual IQS scores saved at: {iqs_csv_file}")

        # Save individual AUC ROC curves for each SNP
        for i, snp in enumerate(snp_names):
            try: 
                fpr, tpr, _ = roc_curve(y_test.cpu().numpy()[:, i], test_outputs.cpu().numpy()[:, i])
                plt.figure()
                plt.plot(fpr, tpr, label=f'AUC ROC = {roc_auc_score(y_test.cpu().numpy()[:, i], test_outputs.cpu().numpy()[:, i]):.4f}')
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title(f'AUC ROC Curve - {snp}')
                plt.legend()
                
                curve_file = chr_curve_folder + f'auc_roc_curve_{snp}_chr{chromosome_number}.png'
                plt.savefig(curve_file)
                plt.close()
            except ValueError:
                # Save a placeholder image if there is insufficient data
                plt.figure()
                plt.axis('off')
                plt.text(0.5, 0.5, "Insufficient data for ROC curve", ha='center', va='center')
                curve_file = chr_curve_folder + f'auc_roc_curve_{snp}_chr{chromosome_number}.png'
                plt.savefig(curve_file)
                plt.close()

                print(f"Skipping SNP {snp} due to insufficient data")


        print(f"Individual AUC ROC curves saved in: {curve_folder}")

        # Create a DataFrame to store the performance metrics for each chromosome
        performance_df = pd.DataFrame({
            'Chromosome': list(range(start, chromosome_number + 1)),
            'R2 Score': r2_scores,
            'IQS Score': iqs_scores,

            'Accuracy': accuracies,
            # 'Precision': precisions,
            # 'Recall': recalls,
            # 'False Positive Rate': false_positive_rates,
            'AUC ROC': auc_rocs,
        })

        # Save the performance metrics to a CSV file
        performance_csv_file = csv_folder + 'performance_metrics.csv'
        performance_df.to_csv(performance_csv_file, index=False)
        print(f"Performance metrics saved at: {performance_csv_file}")

Total SNPs:  4056
PRS313 SNPs:  60
Total SNPs used for Training:  3996


[I 2024-06-18 11:44:23,327] Trial 84 finished with value: 0.18371078372001648 and parameters: {'learning_rate': 0.00023651832542293644, 'l1_coef': 1.6183137333100933e-05, 'patience': 8, 'batch_size': 256}. Best is trial 55 with value: 0.0865513201802969.
[I 2024-06-18 11:44:25,513] Trial 89 finished with value: 0.17923060953617095 and parameters: {'learning_rate': 0.00025997567252216097, 'l1_coef': 1.6320712506802015e-05, 'patience': 8, 'batch_size': 256}. Best is trial 55 with value: 0.0865513201802969.
[I 2024-06-18 11:44:26,057] Trial 82 finished with value: 0.1767739474773407 and parameters: {'learning_rate': 0.00027094527463766333, 'l1_coef': 1.6155766632101717e-05, 'patience': 8, 'batch_size': 256}. Best is trial 55 with value: 0.0865513201802969.
[I 2024-06-18 11:44:26,476] Trial 81 finished with value: 0.17002045959234238 and parameters: {'learning_rate': 0.00029573312329341097, 'l1_coef': 1.5081674735551774e-05, 'patience': 8, 'batch_size': 256}. Best is trial 55 with value: 0

Chr 1 - Best hyperparameters: {'learning_rate': 0.012254038414284674, 'l1_coef': 1.0101565691518604e-05, 'patience': 14, 'batch_size': 128}
Chr 1 - Best value: 0.0866
Epoch [1/500], Train Loss: 1.4850
Epoch [2/500], Train Loss: 0.8234
Epoch [3/500], Train Loss: 0.6218
Epoch [4/500], Train Loss: 0.5539
Epoch [5/500], Train Loss: 0.5144
Epoch [6/500], Train Loss: 0.4836
Epoch [7/500], Train Loss: 0.4656
Epoch [8/500], Train Loss: 0.4446
Epoch [9/500], Train Loss: 0.3014
Epoch [10/500], Train Loss: 0.1575
Epoch [11/500], Train Loss: 0.1416
Epoch [12/500], Train Loss: 0.1305
Epoch [13/500], Train Loss: 0.1247
Epoch [14/500], Train Loss: 0.1213
Epoch [15/500], Train Loss: 0.1177
Epoch [16/500], Train Loss: 0.1138
Epoch [17/500], Train Loss: 0.1109
Epoch [18/500], Train Loss: 0.1086
Epoch [19/500], Train Loss: 0.1062
Epoch [20/500], Train Loss: 0.1044
Epoch [21/500], Train Loss: 0.1026
Epoch [22/500], Train Loss: 0.1018
Epoch [23/500], Train Loss: 0.1007
Epoch [24/500], Train Loss: 0.0996
Ep

[I 2024-06-18 11:45:50,525] Trial 57 finished with value: 0.11485631838440895 and parameters: {'learning_rate': 0.016521549557693263, 'l1_coef': 1.8485221379272658e-05, 'patience': 12, 'batch_size': 128}. Best is trial 25 with value: 0.09493996109813452.
[I 2024-06-18 11:45:59,381] Trial 54 finished with value: 0.11340021379292012 and parameters: {'learning_rate': 0.01702029055657234, 'l1_coef': 1.7350493209451e-05, 'patience': 14, 'batch_size': 128}. Best is trial 25 with value: 0.09493996109813452.
[I 2024-06-18 11:46:02,917] Trial 52 finished with value: 0.1146919772028923 and parameters: {'learning_rate': 0.0161801200121631, 'l1_coef': 1.9402323060655733e-05, 'patience': 14, 'batch_size': 128}. Best is trial 25 with value: 0.09493996109813452.
[I 2024-06-18 11:46:03,743] Trial 59 finished with value: 0.10882821343839169 and parameters: {'learning_rate': 0.01610085996523582, 'l1_coef': 1.782756342700332e-05, 'patience': 14, 'batch_size': 128}. Best is trial 25 with value: 0.09493996

Chr 2 - Best hyperparameters: {'learning_rate': 0.02154387590823124, 'l1_coef': 1.1376943462129345e-05, 'patience': 17, 'batch_size': 128}
Chr 2 - Best value: 0.0949
Epoch [1/500], Train Loss: 1.9818
Epoch [2/500], Train Loss: 1.2570
Epoch [3/500], Train Loss: 1.0592
Epoch [4/500], Train Loss: 0.9875
Epoch [5/500], Train Loss: 0.9400
Epoch [6/500], Train Loss: 0.9310
Epoch [7/500], Train Loss: 0.9015
Epoch [8/500], Train Loss: 0.8749
Epoch [9/500], Train Loss: 0.8502
Epoch [10/500], Train Loss: 0.7036
Epoch [11/500], Train Loss: 0.3951
Epoch [12/500], Train Loss: 0.1729
Epoch [13/500], Train Loss: 0.1588
Epoch [14/500], Train Loss: 0.1511
Epoch [15/500], Train Loss: 0.1399
Epoch [16/500], Train Loss: 0.1144
Epoch [17/500], Train Loss: 0.1058
Epoch [18/500], Train Loss: 0.1011
Epoch [19/500], Train Loss: 0.0984
Epoch [20/500], Train Loss: 0.0963
Epoch [21/500], Train Loss: 0.0952
Epoch [22/500], Train Loss: 0.0950
Epoch [23/500], Train Loss: 0.0939
Epoch [24/500], Train Loss: 0.0915
Epo

[I 2024-06-18 11:48:48,386] Trial 54 finished with value: 0.046257991982357846 and parameters: {'learning_rate': 0.003725170108721471, 'l1_coef': 1.0410588177326541e-05, 'patience': 17, 'batch_size': 64}. Best is trial 4 with value: 0.04438067571474956.
[I 2024-06-18 11:49:00,952] Trial 59 finished with value: 0.046641488852245463 and parameters: {'learning_rate': 0.0037394428051745446, 'l1_coef': 1.0618844933444048e-05, 'patience': 17, 'batch_size': 64}. Best is trial 4 with value: 0.04438067571474956.
[I 2024-06-18 11:49:01,028] Trial 55 finished with value: 0.04912701744054045 and parameters: {'learning_rate': 0.003739549867485792, 'l1_coef': 1.0516936204202721e-05, 'patience': 17, 'batch_size': 64}. Best is trial 4 with value: 0.04438067571474956.
[I 2024-06-18 11:49:01,522] Trial 60 finished with value: 0.04651105201670101 and parameters: {'learning_rate': 0.003833431954175403, 'l1_coef': 1.0609556893970465e-05, 'patience': 17, 'batch_size': 64}. Best is trial 4 with value: 0.0443

Chr 3 - Best hyperparameters: {'learning_rate': 0.0014528755690903109, 'l1_coef': 1.074745971712647e-05, 'patience': 18, 'batch_size': 32}
Chr 3 - Best value: 0.0444
Epoch [1/500], Train Loss: 0.4344
Epoch [2/500], Train Loss: 0.3248
Epoch [3/500], Train Loss: 0.2665
Epoch [4/500], Train Loss: 0.2297
Epoch [5/500], Train Loss: 0.2025
Epoch [6/500], Train Loss: 0.1816
Epoch [7/500], Train Loss: 0.1657
Epoch [8/500], Train Loss: 0.1522
Epoch [9/500], Train Loss: 0.1418
Epoch [10/500], Train Loss: 0.1331
Epoch [11/500], Train Loss: 0.1249
Epoch [12/500], Train Loss: 0.1184
Epoch [13/500], Train Loss: 0.1132
Epoch [14/500], Train Loss: 0.1081
Epoch [15/500], Train Loss: 0.1036
Epoch [16/500], Train Loss: 0.0994
Epoch [17/500], Train Loss: 0.0958
Epoch [18/500], Train Loss: 0.0924
Epoch [19/500], Train Loss: 0.0895
Epoch [20/500], Train Loss: 0.0869
Epoch [21/500], Train Loss: 0.0841
Epoch [22/500], Train Loss: 0.0819
Epoch [23/500], Train Loss: 0.0798
Epoch [24/500], Train Loss: 0.0779
Epo

[I 2024-06-18 11:52:12,771] Trial 56 finished with value: 0.0798052256660802 and parameters: {'learning_rate': 0.006384330127793367, 'l1_coef': 2.3220731475718292e-05, 'patience': 15, 'batch_size': 64}. Best is trial 10 with value: 0.06264721751213073.
[I 2024-06-18 11:52:15,053] Trial 54 finished with value: 0.11812034866639545 and parameters: {'learning_rate': 0.0014794803256235752, 'l1_coef': 6.815614299528405e-05, 'patience': 15, 'batch_size': 64}. Best is trial 10 with value: 0.06264721751213073.
[I 2024-06-18 11:52:23,090] Trial 48 finished with value: 0.11650087514093943 and parameters: {'learning_rate': 0.0016948318506605812, 'l1_coef': 6.698567179962252e-05, 'patience': 15, 'batch_size': 64}. Best is trial 10 with value: 0.06264721751213073.
[I 2024-06-18 11:52:47,769] Trial 52 finished with value: 0.11725795758622033 and parameters: {'learning_rate': 0.00165296324488275, 'l1_coef': 7.296065732083937e-05, 'patience': 14, 'batch_size': 64}. Best is trial 10 with value: 0.062647

Chr 4 - Best hyperparameters: {'learning_rate': 0.028405218431256294, 'l1_coef': 1.82676872855965e-05, 'patience': 20, 'batch_size': 128}
Chr 4 - Best value: 0.0626
Epoch [1/500], Train Loss: 3.1886
Epoch [2/500], Train Loss: 1.9401
Epoch [3/500], Train Loss: 1.6592
Epoch [4/500], Train Loss: 1.5975
Epoch [5/500], Train Loss: 1.5638
Epoch [6/500], Train Loss: 1.3461
Epoch [7/500], Train Loss: 0.3583
Epoch [8/500], Train Loss: 0.1473
Epoch [9/500], Train Loss: 0.1189
Epoch [10/500], Train Loss: 0.1102
Epoch [11/500], Train Loss: 0.1031
Epoch [12/500], Train Loss: 0.0969
Epoch [13/500], Train Loss: 0.0925
Epoch [14/500], Train Loss: 0.0900
Epoch [15/500], Train Loss: 0.0874
Epoch [16/500], Train Loss: 0.0836
Epoch [17/500], Train Loss: 0.0812
Epoch [18/500], Train Loss: 0.0787
Epoch [19/500], Train Loss: 0.0779
Epoch [20/500], Train Loss: 0.0759
Epoch [21/500], Train Loss: 0.0748
Epoch [22/500], Train Loss: 0.0733
Epoch [23/500], Train Loss: 0.0725
Epoch [24/500], Train Loss: 0.0723
Epoc

[I 2024-06-18 11:56:19,004] Trial 16 finished with value: 0.10366071164608001 and parameters: {'learning_rate': 0.0023232412763265855, 'l1_coef': 1.1907966237269316e-05, 'patience': 5, 'batch_size': 256}. Best is trial 16 with value: 0.10366071164608001.
[I 2024-06-18 11:56:20,619] Trial 11 finished with value: 0.10538749992847443 and parameters: {'learning_rate': 0.0019171492641988116, 'l1_coef': 1.1324352108789524e-05, 'patience': 5, 'batch_size': 256}. Best is trial 16 with value: 0.10366071164608001.
[I 2024-06-18 11:56:23,841] Trial 12 finished with value: 0.11968241408467292 and parameters: {'learning_rate': 0.0019050776788936401, 'l1_coef': 1.8012014176749892e-05, 'patience': 5, 'batch_size': 256}. Best is trial 16 with value: 0.10366071164608001.
[I 2024-06-18 11:56:33,280] Trial 20 finished with value: 0.09828574880957604 and parameters: {'learning_rate': 0.002371235745312867, 'l1_coef': 1.1652150483740447e-05, 'patience': 5, 'batch_size': 256}. Best is trial 20 with value: 0.

Chr 5 - Best hyperparameters: {'learning_rate': 0.0020083341636623056, 'l1_coef': 1.0869851864798313e-05, 'patience': 15, 'batch_size': 256}
Chr 5 - Best value: 0.0937
Epoch [1/500], Train Loss: 0.6081
Epoch [2/500], Train Loss: 0.5121
Epoch [3/500], Train Loss: 0.4675
Epoch [4/500], Train Loss: 0.4366
Epoch [5/500], Train Loss: 0.4107
Epoch [6/500], Train Loss: 0.3894
Epoch [7/500], Train Loss: 0.3711
Epoch [8/500], Train Loss: 0.3542
Epoch [9/500], Train Loss: 0.3399
Epoch [10/500], Train Loss: 0.3277
Epoch [11/500], Train Loss: 0.3165
Epoch [12/500], Train Loss: 0.3058
Epoch [13/500], Train Loss: 0.2963
Epoch [14/500], Train Loss: 0.2871
Epoch [15/500], Train Loss: 0.2795
Epoch [16/500], Train Loss: 0.2722
Epoch [17/500], Train Loss: 0.2656
Epoch [18/500], Train Loss: 0.2595
Epoch [19/500], Train Loss: 0.2535
Epoch [20/500], Train Loss: 0.2482
Epoch [21/500], Train Loss: 0.2429
Epoch [22/500], Train Loss: 0.2381
Epoch [23/500], Train Loss: 0.2340
Epoch [24/500], Train Loss: 0.2297
E

[I 2024-06-18 12:01:56,405] Trial 28 finished with value: 0.1448026800384888 and parameters: {'learning_rate': 0.00012044600714687634, 'l1_coef': 1.3129473481457664e-05, 'patience': 15, 'batch_size': 32}. Best is trial 28 with value: 0.1448026800384888.
[I 2024-06-18 12:01:57,576] Trial 27 finished with value: 0.1374111989369759 and parameters: {'learning_rate': 0.00013023881858474157, 'l1_coef': 1.1452876176725031e-05, 'patience': 15, 'batch_size': 32}. Best is trial 27 with value: 0.1374111989369759.
[I 2024-06-18 12:02:04,329] Trial 24 finished with value: 0.14138092341331335 and parameters: {'learning_rate': 0.00016222939553995326, 'l1_coef': 1.4277410469897544e-05, 'patience': 15, 'batch_size': 32}. Best is trial 27 with value: 0.1374111989369759.
[I 2024-06-18 12:02:14,645] Trial 21 finished with value: 0.1376157751450172 and parameters: {'learning_rate': 0.00011154694309329122, 'l1_coef': 1.0653188846671136e-05, 'patience': 15, 'batch_size': 32}. Best is trial 27 with value: 0.1

Chr 6 - Best hyperparameters: {'learning_rate': 0.00011208164062831504, 'l1_coef': 1.0284689264438478e-05, 'patience': 15, 'batch_size': 32}
Chr 6 - Best value: 0.1329
Epoch [1/500], Train Loss: 0.5885
Epoch [2/500], Train Loss: 0.5362
Epoch [3/500], Train Loss: 0.5205
Epoch [4/500], Train Loss: 0.5089
Epoch [5/500], Train Loss: 0.4991
Epoch [6/500], Train Loss: 0.4899
Epoch [7/500], Train Loss: 0.4814
Epoch [8/500], Train Loss: 0.4736
Epoch [9/500], Train Loss: 0.4659
Epoch [10/500], Train Loss: 0.4585
Epoch [11/500], Train Loss: 0.4517
Epoch [12/500], Train Loss: 0.4449
Epoch [13/500], Train Loss: 0.4385
Epoch [14/500], Train Loss: 0.4321
Epoch [15/500], Train Loss: 0.4260
Epoch [16/500], Train Loss: 0.4205
Epoch [17/500], Train Loss: 0.4145
Epoch [18/500], Train Loss: 0.4093
Epoch [19/500], Train Loss: 0.4039
Epoch [20/500], Train Loss: 0.3990
Epoch [21/500], Train Loss: 0.3940
Epoch [22/500], Train Loss: 0.3894
Epoch [23/500], Train Loss: 0.3847
Epoch [24/500], Train Loss: 0.3801
E

[I 2024-06-18 12:04:37,214] Trial 14 finished with value: 0.2029388654690522 and parameters: {'learning_rate': 0.012217341685773455, 'l1_coef': 0.00024800926780902354, 'patience': 11, 'batch_size': 32}. Best is trial 9 with value: 0.12938632162717673.
[I 2024-06-18 12:04:41,128] Trial 19 finished with value: 0.08129436098612272 and parameters: {'learning_rate': 0.0122528085987631, 'l1_coef': 1.0198011241500657e-05, 'patience': 11, 'batch_size': 32}. Best is trial 19 with value: 0.08129436098612272.
[I 2024-06-18 12:04:50,779] Trial 15 finished with value: 0.21050595068014583 and parameters: {'learning_rate': 0.01012496453703742, 'l1_coef': 0.0002774996172998803, 'patience': 11, 'batch_size': 32}. Best is trial 19 with value: 0.08129436098612272.
[I 2024-06-18 12:04:55,243] Trial 16 finished with value: 0.08064935952425002 and parameters: {'learning_rate': 0.011097410078928614, 'l1_coef': 1.072551035716764e-05, 'patience': 11, 'batch_size': 32}. Best is trial 16 with value: 0.0806493595

Chr 7 - Best hyperparameters: {'learning_rate': 0.011097410078928614, 'l1_coef': 1.072551035716764e-05, 'patience': 11, 'batch_size': 32}
Chr 7 - Best value: 0.0806
Epoch [1/500], Train Loss: 0.4487
Epoch [2/500], Train Loss: 0.2323
Epoch [3/500], Train Loss: 0.1787
Epoch [4/500], Train Loss: 0.1501
Epoch [5/500], Train Loss: 0.1340
Epoch [6/500], Train Loss: 0.1221
Epoch [7/500], Train Loss: 0.1134
Epoch [8/500], Train Loss: 0.1065
Epoch [9/500], Train Loss: 0.1010
Epoch [10/500], Train Loss: 0.0955
Epoch [11/500], Train Loss: 0.0927
Epoch [12/500], Train Loss: 0.0887
Epoch [13/500], Train Loss: 0.0867
Epoch [14/500], Train Loss: 0.0832
Epoch [15/500], Train Loss: 0.0819
Epoch [16/500], Train Loss: 0.0798
Epoch [17/500], Train Loss: 0.0784
Epoch [18/500], Train Loss: 0.0786
Epoch [19/500], Train Loss: 0.0760
Epoch [20/500], Train Loss: 0.0756
Epoch [21/500], Train Loss: 0.0737
Epoch [22/500], Train Loss: 0.0729
Epoch [23/500], Train Loss: 0.0728
Epoch [24/500], Train Loss: 0.0723
Epoc

[I 2024-06-18 12:08:50,044] Trial 16 finished with value: 0.16171413702624185 and parameters: {'learning_rate': 0.010216466895955227, 'l1_coef': 5.7799715158518986e-05, 'patience': 20, 'batch_size': 64}. Best is trial 16 with value: 0.16171413702624185.
[I 2024-06-18 12:08:52,418] Trial 10 finished with value: 0.16542784060750687 and parameters: {'learning_rate': 0.008003044946376763, 'l1_coef': 6.33547015495335e-05, 'patience': 20, 'batch_size': 64}. Best is trial 16 with value: 0.16171413702624185.
[I 2024-06-18 12:08:59,011] Trial 18 finished with value: 0.16395897269248966 and parameters: {'learning_rate': 0.009838449467549658, 'l1_coef': 5.964117848342693e-05, 'patience': 19, 'batch_size': 64}. Best is trial 16 with value: 0.16171413702624185.
[I 2024-06-18 12:09:00,421] Trial 14 finished with value: 0.17000248900481635 and parameters: {'learning_rate': 0.00841112224204875, 'l1_coef': 7.04439209409591e-05, 'patience': 19, 'batch_size': 64}. Best is trial 16 with value: 0.161714137

Chr 8 - Best hyperparameters: {'learning_rate': 0.006998305486984654, 'l1_coef': 5.051659353995837e-05, 'patience': 19, 'batch_size': 64}
Chr 8 - Best value: 0.1518
Epoch [1/500], Train Loss: 0.5835
Epoch [2/500], Train Loss: 0.4033
Epoch [3/500], Train Loss: 0.3390
Epoch [4/500], Train Loss: 0.3086
Epoch [5/500], Train Loss: 0.2844
Epoch [6/500], Train Loss: 0.2668
Epoch [7/500], Train Loss: 0.2511
Epoch [8/500], Train Loss: 0.2409
Epoch [9/500], Train Loss: 0.2335
Epoch [10/500], Train Loss: 0.2244
Epoch [11/500], Train Loss: 0.2180
Epoch [12/500], Train Loss: 0.2130
Epoch [13/500], Train Loss: 0.2098
Epoch [14/500], Train Loss: 0.2044
Epoch [15/500], Train Loss: 0.1984
Epoch [16/500], Train Loss: 0.1956
Epoch [17/500], Train Loss: 0.1930
Epoch [18/500], Train Loss: 0.1903
Epoch [19/500], Train Loss: 0.1894
Epoch [20/500], Train Loss: 0.1855
Epoch [21/500], Train Loss: 0.1870
Epoch [22/500], Train Loss: 0.1847
Epoch [23/500], Train Loss: 0.1833
Epoch [24/500], Train Loss: 0.1776
Epoc

[I 2024-06-18 12:10:32,272] Trial 8 finished with value: 11.523130798339844 and parameters: {'learning_rate': 0.09926867945539508, 'l1_coef': 8.780327247739423e-05, 'patience': 19, 'batch_size': 256}. Best is trial 8 with value: 11.523130798339844.
[I 2024-06-18 12:10:39,547] Trial 7 finished with value: 0.1619011029601097 and parameters: {'learning_rate': 0.0024898795229624563, 'l1_coef': 1.35876606392741e-05, 'patience': 14, 'batch_size': 256}. Best is trial 7 with value: 0.1619011029601097.
[I 2024-06-18 12:11:07,244] Trial 9 finished with value: 0.5189666211605072 and parameters: {'learning_rate': 0.010492904081249617, 'l1_coef': 0.0634166459985364, 'patience': 16, 'batch_size': 128}. Best is trial 7 with value: 0.1619011029601097.
[I 2024-06-18 12:11:07,667] Trial 4 finished with value: 0.5186945080757142 and parameters: {'learning_rate': 0.03503731117279075, 'l1_coef': 0.014374944067070083, 'patience': 13, 'batch_size': 64}. Best is trial 7 with value: 0.1619011029601097.
[I 2024

Chr 9 - Best hyperparameters: {'learning_rate': 0.005943003216514158, 'l1_coef': 1.4184324432812446e-05, 'patience': 13, 'batch_size': 32}
Chr 9 - Best value: 0.1067
Epoch [1/500], Train Loss: 0.4544
Epoch [2/500], Train Loss: 0.2970
Epoch [3/500], Train Loss: 0.2429
Epoch [4/500], Train Loss: 0.2127
Epoch [5/500], Train Loss: 0.1920
Epoch [6/500], Train Loss: 0.1766
Epoch [7/500], Train Loss: 0.1651
Epoch [8/500], Train Loss: 0.1549
Epoch [9/500], Train Loss: 0.1471
Epoch [10/500], Train Loss: 0.1415
Epoch [11/500], Train Loss: 0.1358
Epoch [12/500], Train Loss: 0.1315
Epoch [13/500], Train Loss: 0.1276
Epoch [14/500], Train Loss: 0.1237
Epoch [15/500], Train Loss: 0.1209
Epoch [16/500], Train Loss: 0.1184
Epoch [17/500], Train Loss: 0.1157
Epoch [18/500], Train Loss: 0.1140
Epoch [19/500], Train Loss: 0.1115
Epoch [20/500], Train Loss: 0.1094
Epoch [21/500], Train Loss: 0.1070
Epoch [22/500], Train Loss: 0.1058
Epoch [23/500], Train Loss: 0.1059
Epoch [24/500], Train Loss: 0.1039
Epo

[I 2024-06-18 12:11:56,218] A new study created in RDB with name: chr10_study
[I 2024-06-18 12:12:23,249] Trial 3 finished with value: 0.8315802752971649 and parameters: {'learning_rate': 0.04316488646162698, 'l1_coef': 0.004020571974426502, 'patience': 6, 'batch_size': 256}. Best is trial 3 with value: 0.8315802752971649.
[I 2024-06-18 12:12:29,671] Trial 4 finished with value: 1.3295094311237334 and parameters: {'learning_rate': 0.02660875630854928, 'l1_coef': 0.0170194537146354, 'patience': 6, 'batch_size': 256}. Best is trial 3 with value: 0.8315802752971649.
[I 2024-06-18 12:13:03,086] Trial 0 finished with value: 0.28118076324462893 and parameters: {'learning_rate': 0.055332888810095765, 'l1_coef': 0.0004476137750530598, 'patience': 13, 'batch_size': 128}. Best is trial 0 with value: 0.28118076324462893.
[I 2024-06-18 12:13:33,451] Trial 6 finished with value: 0.5402462762135726 and parameters: {'learning_rate': 0.004251237361785462, 'l1_coef': 0.02924264918127103, 'patience': 11

Chr 10 - Best hyperparameters: {'learning_rate': 0.0024068053397211277, 'l1_coef': 3.542020357212867e-05, 'patience': 17, 'batch_size': 32}
Chr 10 - Best value: 0.1334
Epoch [1/500], Train Loss: 0.4991
Epoch [2/500], Train Loss: 0.3793
Epoch [3/500], Train Loss: 0.3239
Epoch [4/500], Train Loss: 0.2909
Epoch [5/500], Train Loss: 0.2663
Epoch [6/500], Train Loss: 0.2489
Epoch [7/500], Train Loss: 0.2345
Epoch [8/500], Train Loss: 0.2238
Epoch [9/500], Train Loss: 0.2140
Epoch [10/500], Train Loss: 0.2066
Epoch [11/500], Train Loss: 0.1995
Epoch [12/500], Train Loss: 0.1934
Epoch [13/500], Train Loss: 0.1893
Epoch [14/500], Train Loss: 0.1844
Epoch [15/500], Train Loss: 0.1800
Epoch [16/500], Train Loss: 0.1763
Epoch [17/500], Train Loss: 0.1733
Epoch [18/500], Train Loss: 0.1701
Epoch [19/500], Train Loss: 0.1681
Epoch [20/500], Train Loss: 0.1650
Epoch [21/500], Train Loss: 0.1636
Epoch [22/500], Train Loss: 0.1615
Epoch [23/500], Train Loss: 0.1595
Epoch [24/500], Train Loss: 0.1575
E

[I 2024-06-18 12:14:07,354] A new study created in RDB with name: chr11_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  2550
PRS313 SNPs:  38
Total SNPs used for Training:  2512


[I 2024-06-18 12:15:05,088] Trial 4 finished with value: 0.5590310394763947 and parameters: {'learning_rate': 0.0017871461918697491, 'l1_coef': 0.09580086744294376, 'patience': 20, 'batch_size': 256}. Best is trial 4 with value: 0.5590310394763947.
[I 2024-06-18 12:15:32,691] Trial 0 finished with value: 0.714206212759018 and parameters: {'learning_rate': 0.0008616720743709761, 'l1_coef': 0.029945094962247513, 'patience': 5, 'batch_size': 128}. Best is trial 4 with value: 0.5590310394763947.
[I 2024-06-18 12:15:41,871] Trial 9 finished with value: 0.14558064682143074 and parameters: {'learning_rate': 0.044956640417592425, 'l1_coef': 0.00014219997677332013, 'patience': 7, 'batch_size': 64}. Best is trial 9 with value: 0.14558064682143074.
[I 2024-06-18 12:15:50,792] Trial 6 finished with value: 0.2547811672091484 and parameters: {'learning_rate': 0.00014842325354664542, 'l1_coef': 8.681198170156922e-05, 'patience': 20, 'batch_size': 256}. Best is trial 9 with value: 0.14558064682143074.

Chr 11 - Best hyperparameters: {'learning_rate': 0.001372434672947406, 'l1_coef': 1.7839007175765143e-05, 'patience': 10, 'batch_size': 64}
Chr 11 - Best value: 0.0590
Epoch [1/500], Train Loss: 0.5093
Epoch [2/500], Train Loss: 0.4159
Epoch [3/500], Train Loss: 0.3683
Epoch [4/500], Train Loss: 0.3323
Epoch [5/500], Train Loss: 0.3040
Epoch [6/500], Train Loss: 0.2818
Epoch [7/500], Train Loss: 0.2640
Epoch [8/500], Train Loss: 0.2489
Epoch [9/500], Train Loss: 0.2350
Epoch [10/500], Train Loss: 0.2235
Epoch [11/500], Train Loss: 0.2132
Epoch [12/500], Train Loss: 0.2037
Epoch [13/500], Train Loss: 0.1962
Epoch [14/500], Train Loss: 0.1893
Epoch [15/500], Train Loss: 0.1828
Epoch [16/500], Train Loss: 0.1767
Epoch [17/500], Train Loss: 0.1708
Epoch [18/500], Train Loss: 0.1661
Epoch [19/500], Train Loss: 0.1617
Epoch [20/500], Train Loss: 0.1574
Epoch [21/500], Train Loss: 0.1537
Epoch [22/500], Train Loss: 0.1504
Epoch [23/500], Train Loss: 0.1474
Epoch [24/500], Train Loss: 0.1439
E

[I 2024-06-18 12:17:07,243] A new study created in RDB with name: chr12_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  1704
PRS313 SNPs:  34
Total SNPs used for Training:  1670


[I 2024-06-18 12:18:05,844] Trial 5 finished with value: 0.48401095569133756 and parameters: {'learning_rate': 0.04413119754639475, 'l1_coef': 0.009574731136781377, 'patience': 20, 'batch_size': 128}. Best is trial 5 with value: 0.48401095569133756.
[I 2024-06-18 12:18:08,116] Trial 2 finished with value: 0.16865556165575982 and parameters: {'learning_rate': 0.02541031364574953, 'l1_coef': 0.00010491748760437849, 'patience': 18, 'batch_size': 128}. Best is trial 2 with value: 0.16865556165575982.
[I 2024-06-18 12:18:26,508] Trial 3 finished with value: 0.18919315785169602 and parameters: {'learning_rate': 0.058028674999873915, 'l1_coef': 0.00015324608951669666, 'patience': 10, 'batch_size': 128}. Best is trial 2 with value: 0.16865556165575982.
[I 2024-06-18 12:18:41,749] Trial 1 finished with value: 0.10776926353573799 and parameters: {'learning_rate': 0.0014395840013284337, 'l1_coef': 1.0867185953519262e-05, 'patience': 18, 'batch_size': 256}. Best is trial 1 with value: 0.1077692635

Chr 12 - Best hyperparameters: {'learning_rate': 0.0014395840013284337, 'l1_coef': 1.0867185953519262e-05, 'patience': 18, 'batch_size': 256}
Chr 12 - Best value: 0.1078
Epoch [1/500], Train Loss: 0.5445
Epoch [2/500], Train Loss: 0.4869
Epoch [3/500], Train Loss: 0.4501
Epoch [4/500], Train Loss: 0.4291
Epoch [5/500], Train Loss: 0.4128
Epoch [6/500], Train Loss: 0.3997
Epoch [7/500], Train Loss: 0.3879
Epoch [8/500], Train Loss: 0.3771
Epoch [9/500], Train Loss: 0.3671
Epoch [10/500], Train Loss: 0.3583
Epoch [11/500], Train Loss: 0.3497
Epoch [12/500], Train Loss: 0.3414
Epoch [13/500], Train Loss: 0.3336
Epoch [14/500], Train Loss: 0.3263
Epoch [15/500], Train Loss: 0.3197
Epoch [16/500], Train Loss: 0.3130
Epoch [17/500], Train Loss: 0.3072
Epoch [18/500], Train Loss: 0.3016
Epoch [19/500], Train Loss: 0.2961
Epoch [20/500], Train Loss: 0.2910
Epoch [21/500], Train Loss: 0.2864
Epoch [22/500], Train Loss: 0.2816
Epoch [23/500], Train Loss: 0.2768
Epoch [24/500], Train Loss: 0.2729

[I 2024-06-18 12:20:07,502] A new study created in RDB with name: chr13_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  304
PRS313 SNPs:  10
Total SNPs used for Training:  294


[I 2024-06-18 12:23:09,875] Trial 6 finished with value: 0.35048297941684725 and parameters: {'learning_rate': 0.0018807895726354535, 'l1_coef': 0.09610981970513735, 'patience': 12, 'batch_size': 256}. Best is trial 6 with value: 0.35048297941684725.
[I 2024-06-18 12:23:10,417] Trial 3 finished with value: 0.16070269346237182 and parameters: {'learning_rate': 0.007066957719541967, 'l1_coef': 0.00031266809480140994, 'patience': 19, 'batch_size': 256}. Best is trial 3 with value: 0.16070269346237182.
[I 2024-06-18 12:23:48,639] Trial 8 finished with value: 0.2476855754852295 and parameters: {'learning_rate': 0.0021282199415676595, 'l1_coef': 0.0034180019405649882, 'patience': 6, 'batch_size': 256}. Best is trial 3 with value: 0.16070269346237182.
[I 2024-06-18 12:24:00,818] Trial 7 finished with value: 0.32873142063617705 and parameters: {'learning_rate': 0.0013510730298077918, 'l1_coef': 0.019459737746326176, 'patience': 10, 'batch_size': 128}. Best is trial 3 with value: 0.160702693462

Chr 13 - Best hyperparameters: {'learning_rate': 0.000279730036447635, 'l1_coef': 1.9175629848811384e-05, 'patience': 19, 'batch_size': 64}
Chr 13 - Best value: 0.1135
Epoch [1/500], Train Loss: 0.5776
Epoch [2/500], Train Loss: 0.4336
Epoch [3/500], Train Loss: 0.3766
Epoch [4/500], Train Loss: 0.3506
Epoch [5/500], Train Loss: 0.3340
Epoch [6/500], Train Loss: 0.3234
Epoch [7/500], Train Loss: 0.3168
Epoch [8/500], Train Loss: 0.3095
Epoch [9/500], Train Loss: 0.3041
Epoch [10/500], Train Loss: 0.2997
Epoch [11/500], Train Loss: 0.2950
Epoch [12/500], Train Loss: 0.2911
Epoch [13/500], Train Loss: 0.2892
Epoch [14/500], Train Loss: 0.2863
Epoch [15/500], Train Loss: 0.2821
Epoch [16/500], Train Loss: 0.2801
Epoch [17/500], Train Loss: 0.2773
Epoch [18/500], Train Loss: 0.2746
Epoch [19/500], Train Loss: 0.2716
Epoch [20/500], Train Loss: 0.2704
Epoch [21/500], Train Loss: 0.2688
Epoch [22/500], Train Loss: 0.2652
Epoch [23/500], Train Loss: 0.2635
Epoch [24/500], Train Loss: 0.2610
E

[I 2024-06-18 12:24:30,589] A new study created in RDB with name: chr14_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  918
PRS313 SNPs:  16
Total SNPs used for Training:  902


[I 2024-06-18 12:26:10,623] Trial 5 finished with value: 0.17179837673902512 and parameters: {'learning_rate': 0.0069193401475989074, 'l1_coef': 0.0003295158549997448, 'patience': 5, 'batch_size': 128}. Best is trial 5 with value: 0.17179837673902512.
[I 2024-06-18 12:26:12,154] Trial 1 finished with value: 0.059962207451462746 and parameters: {'learning_rate': 0.011496963189324617, 'l1_coef': 1.5360886664405904e-05, 'patience': 13, 'batch_size': 128}. Best is trial 1 with value: 0.059962207451462746.
[I 2024-06-18 12:26:28,062] Trial 3 finished with value: 0.07152817882597447 and parameters: {'learning_rate': 0.010230874250860153, 'l1_coef': 2.2036618579558507e-05, 'patience': 16, 'batch_size': 128}. Best is trial 1 with value: 0.059962207451462746.
[I 2024-06-18 12:26:46,337] Trial 0 finished with value: 0.2046399235725403 and parameters: {'learning_rate': 0.0046584544168202, 'l1_coef': 0.0006339367972791854, 'patience': 8, 'batch_size': 128}. Best is trial 1 with value: 0.0599622074

Chr 14 - Best hyperparameters: {'learning_rate': 0.011496963189324617, 'l1_coef': 1.5360886664405904e-05, 'patience': 13, 'batch_size': 128}
Chr 14 - Best value: 0.0600
Epoch [1/500], Train Loss: 0.5514
Epoch [2/500], Train Loss: 0.3333
Epoch [3/500], Train Loss: 0.2540
Epoch [4/500], Train Loss: 0.2079
Epoch [5/500], Train Loss: 0.1794
Epoch [6/500], Train Loss: 0.1591
Epoch [7/500], Train Loss: 0.1450
Epoch [8/500], Train Loss: 0.1320
Epoch [9/500], Train Loss: 0.1227
Epoch [10/500], Train Loss: 0.1137
Epoch [11/500], Train Loss: 0.1079
Epoch [12/500], Train Loss: 0.1016
Epoch [13/500], Train Loss: 0.0972
Epoch [14/500], Train Loss: 0.0934
Epoch [15/500], Train Loss: 0.0896
Epoch [16/500], Train Loss: 0.0871
Epoch [17/500], Train Loss: 0.0834
Epoch [18/500], Train Loss: 0.0809
Epoch [19/500], Train Loss: 0.0783
Epoch [20/500], Train Loss: 0.0751
Epoch [21/500], Train Loss: 0.0737
Epoch [22/500], Train Loss: 0.0714
Epoch [23/500], Train Loss: 0.0701
Epoch [24/500], Train Loss: 0.0688


[I 2024-06-18 12:27:18,909] A new study created in RDB with name: chr15_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  620
PRS313 SNPs:  14
Total SNPs used for Training:  606


[I 2024-06-18 12:27:59,304] Trial 2 finished with value: 0.7909249633550643 and parameters: {'learning_rate': 0.05581513999350153, 'l1_coef': 0.021888442824926885, 'patience': 5, 'batch_size': 256}. Best is trial 2 with value: 0.7909249633550643.
[I 2024-06-18 12:28:01,302] Trial 1 finished with value: 0.08222407586872578 and parameters: {'learning_rate': 0.05642866013363076, 'l1_coef': 1.2780374992422405e-05, 'patience': 9, 'batch_size': 128}. Best is trial 1 with value: 0.08222407586872578.
[I 2024-06-18 12:29:10,914] Trial 4 finished with value: 0.4632629483938217 and parameters: {'learning_rate': 0.003959967752757223, 'l1_coef': 0.01464351935976468, 'patience': 11, 'batch_size': 256}. Best is trial 1 with value: 0.08222407586872578.
[I 2024-06-18 12:29:26,494] Trial 5 finished with value: 0.1082878717354366 and parameters: {'learning_rate': 0.00101408606705295, 'l1_coef': 2.017426294433991e-05, 'patience': 14, 'batch_size': 64}. Best is trial 1 with value: 0.08222407586872578.
[I 2

Chr 15 - Best hyperparameters: {'learning_rate': 0.05642866013363076, 'l1_coef': 1.2780374992422405e-05, 'patience': 9, 'batch_size': 128}
Chr 15 - Best value: 0.0822
Epoch [1/500], Train Loss: 0.8400
Epoch [2/500], Train Loss: 0.4196
Epoch [3/500], Train Loss: 0.3395
Epoch [4/500], Train Loss: 0.3057
Epoch [5/500], Train Loss: 0.2634
Epoch [6/500], Train Loss: 0.2426
Epoch [7/500], Train Loss: 0.2340
Epoch [8/500], Train Loss: 0.2179
Epoch [9/500], Train Loss: 0.2068
Epoch [10/500], Train Loss: 0.1999
Epoch [11/500], Train Loss: 0.1926
Epoch [12/500], Train Loss: 0.1841
Epoch [13/500], Train Loss: 0.1506
Epoch [14/500], Train Loss: 0.1310
Epoch [15/500], Train Loss: 0.1112
Epoch [16/500], Train Loss: 0.1007
Epoch [17/500], Train Loss: 0.0865
Epoch [18/500], Train Loss: 0.0839
Epoch [19/500], Train Loss: 0.0802
Epoch [20/500], Train Loss: 0.0775
Epoch [21/500], Train Loss: 0.0755
Epoch [22/500], Train Loss: 0.0735
Epoch [23/500], Train Loss: 0.0737
Epoch [24/500], Train Loss: 0.0728
Ep

[I 2024-06-18 12:29:54,458] A new study created in RDB with name: chr16_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  1212
PRS313 SNPs:  28
Total SNPs used for Training:  1184


[I 2024-06-18 12:31:05,040] Trial 4 finished with value: 0.4093294978141785 and parameters: {'learning_rate': 0.010938333539073437, 'l1_coef': 0.002184288403028775, 'patience': 20, 'batch_size': 256}. Best is trial 4 with value: 0.4093294978141785.
[I 2024-06-18 12:31:06,672] Trial 3 finished with value: 0.7055125539119427 and parameters: {'learning_rate': 0.023854678810027622, 'l1_coef': 0.009085837311063506, 'patience': 6, 'batch_size': 32}. Best is trial 4 with value: 0.4093294978141785.
[I 2024-06-18 12:31:06,866] Trial 1 finished with value: 0.4301452413201332 and parameters: {'learning_rate': 0.0020546956998084825, 'l1_coef': 0.0025115154298340344, 'patience': 6, 'batch_size': 128}. Best is trial 4 with value: 0.4093294978141785.
[I 2024-06-18 12:31:22,537] Trial 6 finished with value: 0.300434535741806 and parameters: {'learning_rate': 0.0009134423117999843, 'l1_coef': 0.0007515362867752688, 'patience': 9, 'batch_size': 256}. Best is trial 6 with value: 0.300434535741806.
[I 202

Chr 16 - Best hyperparameters: {'learning_rate': 0.0002145507468295692, 'l1_coef': 2.987027699346444e-05, 'patience': 13, 'batch_size': 32}
Chr 16 - Best value: 0.1164
Epoch [1/500], Train Loss: 0.5234
Epoch [2/500], Train Loss: 0.4661
Epoch [3/500], Train Loss: 0.4478
Epoch [4/500], Train Loss: 0.4321
Epoch [5/500], Train Loss: 0.4191
Epoch [6/500], Train Loss: 0.4072
Epoch [7/500], Train Loss: 0.3957
Epoch [8/500], Train Loss: 0.3858
Epoch [9/500], Train Loss: 0.3761
Epoch [10/500], Train Loss: 0.3671
Epoch [11/500], Train Loss: 0.3583
Epoch [12/500], Train Loss: 0.3506
Epoch [13/500], Train Loss: 0.3433
Epoch [14/500], Train Loss: 0.3363
Epoch [15/500], Train Loss: 0.3296
Epoch [16/500], Train Loss: 0.3230
Epoch [17/500], Train Loss: 0.3174
Epoch [18/500], Train Loss: 0.3118
Epoch [19/500], Train Loss: 0.3065
Epoch [20/500], Train Loss: 0.3014
Epoch [21/500], Train Loss: 0.2964
Epoch [22/500], Train Loss: 0.2916
Epoch [23/500], Train Loss: 0.2871
Epoch [24/500], Train Loss: 0.2827
E

[I 2024-06-18 12:32:21,653] A new study created in RDB with name: chr17_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  760
PRS313 SNPs:  18
Total SNPs used for Training:  742


[I 2024-06-18 12:33:14,270] Trial 4 finished with value: 0.036340780183672906 and parameters: {'learning_rate': 0.0370475206140463, 'l1_coef': 1.0852769314312855e-05, 'patience': 8, 'batch_size': 256}. Best is trial 4 with value: 0.036340780183672906.
[I 2024-06-18 12:33:35,567] Trial 2 finished with value: 0.03826434466128166 and parameters: {'learning_rate': 0.0036997686260389195, 'l1_coef': 1.0265277132651641e-05, 'patience': 5, 'batch_size': 32}. Best is trial 4 with value: 0.036340780183672906.
[I 2024-06-18 12:33:49,375] Trial 1 finished with value: 0.1528091617992946 and parameters: {'learning_rate': 0.003102010890004305, 'l1_coef': 0.0004818415981078156, 'patience': 6, 'batch_size': 64}. Best is trial 4 with value: 0.036340780183672906.
[I 2024-06-18 12:34:14,737] Trial 3 finished with value: 0.3092131957411766 and parameters: {'learning_rate': 0.005885993196287397, 'l1_coef': 0.015554906582993612, 'patience': 17, 'batch_size': 128}. Best is trial 4 with value: 0.03634078018367

Chr 17 - Best hyperparameters: {'learning_rate': 0.0370475206140463, 'l1_coef': 1.0852769314312855e-05, 'patience': 8, 'batch_size': 256}
Chr 17 - Best value: 0.0363
Epoch [1/500], Train Loss: 0.6761
Epoch [2/500], Train Loss: 0.4088
Epoch [3/500], Train Loss: 0.2454
Epoch [4/500], Train Loss: 0.1596
Epoch [5/500], Train Loss: 0.1190
Epoch [6/500], Train Loss: 0.1001
Epoch [7/500], Train Loss: 0.0878
Epoch [8/500], Train Loss: 0.0787
Epoch [9/500], Train Loss: 0.0722
Epoch [10/500], Train Loss: 0.0675
Epoch [11/500], Train Loss: 0.0636
Epoch [12/500], Train Loss: 0.0604
Epoch [13/500], Train Loss: 0.0575
Epoch [14/500], Train Loss: 0.0540
Epoch [15/500], Train Loss: 0.0518
Epoch [16/500], Train Loss: 0.0495
Epoch [17/500], Train Loss: 0.0481
Epoch [18/500], Train Loss: 0.0464
Epoch [19/500], Train Loss: 0.0449
Epoch [20/500], Train Loss: 0.0432
Epoch [21/500], Train Loss: 0.0418
Epoch [22/500], Train Loss: 0.0405
Epoch [23/500], Train Loss: 0.0392
Epoch [24/500], Train Loss: 0.0383
Epo

[I 2024-06-18 12:34:45,108] A new study created in RDB with name: chr18_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  1178
PRS313 SNPs:  18
Total SNPs used for Training:  1160


[I 2024-06-18 12:36:35,532] Trial 6 finished with value: 0.5164165005087853 and parameters: {'learning_rate': 0.061295343930814226, 'l1_coef': 0.004610129002978482, 'patience': 18, 'batch_size': 128}. Best is trial 6 with value: 0.5164165005087853.
[I 2024-06-18 12:36:44,851] Trial 7 finished with value: 0.575837322643825 and parameters: {'learning_rate': 0.02458608161481166, 'l1_coef': 0.01905125767364561, 'patience': 14, 'batch_size': 64}. Best is trial 6 with value: 0.5164165005087853.
[I 2024-06-18 12:36:46,940] Trial 2 finished with value: 0.587911206483841 and parameters: {'learning_rate': 0.002671332204713552, 'l1_coef': 0.02896195453975584, 'patience': 9, 'batch_size': 256}. Best is trial 6 with value: 0.5164165005087853.
[I 2024-06-18 12:36:47,257] Trial 1 finished with value: 0.5758445409628061 and parameters: {'learning_rate': 0.0053941730725796545, 'l1_coef': 0.019374519831483885, 'patience': 8, 'batch_size': 32}. Best is trial 6 with value: 0.5164165005087853.
[I 2024-06-1

Chr 18 - Best hyperparameters: {'learning_rate': 0.0002883877331261967, 'l1_coef': 2.2053185487347745e-05, 'patience': 10, 'batch_size': 128}
Chr 18 - Best value: 0.1272
Epoch [1/500], Train Loss: 0.6201
Epoch [2/500], Train Loss: 0.5487
Epoch [3/500], Train Loss: 0.5219
Epoch [4/500], Train Loss: 0.5059
Epoch [5/500], Train Loss: 0.4948
Epoch [6/500], Train Loss: 0.4855
Epoch [7/500], Train Loss: 0.4773
Epoch [8/500], Train Loss: 0.4707
Epoch [9/500], Train Loss: 0.4641
Epoch [10/500], Train Loss: 0.4581
Epoch [11/500], Train Loss: 0.4519
Epoch [12/500], Train Loss: 0.4467
Epoch [13/500], Train Loss: 0.4412
Epoch [14/500], Train Loss: 0.4357
Epoch [15/500], Train Loss: 0.4305
Epoch [16/500], Train Loss: 0.4257
Epoch [17/500], Train Loss: 0.4209
Epoch [18/500], Train Loss: 0.4158
Epoch [19/500], Train Loss: 0.4119
Epoch [20/500], Train Loss: 0.4072
Epoch [21/500], Train Loss: 0.4032
Epoch [22/500], Train Loss: 0.3987
Epoch [23/500], Train Loss: 0.3950
Epoch [24/500], Train Loss: 0.3910

[I 2024-06-18 12:37:21,188] A new study created in RDB with name: chr19_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  696
PRS313 SNPs:  14
Total SNPs used for Training:  682


[I 2024-06-18 12:38:29,586] Trial 0 finished with value: 0.5925331383943557 and parameters: {'learning_rate': 0.006858832732953398, 'l1_coef': 0.014649477422537407, 'patience': 5, 'batch_size': 128}. Best is trial 0 with value: 0.5925331383943557.
[I 2024-06-18 12:38:31,621] Trial 9 finished with value: 0.6982713103294372 and parameters: {'learning_rate': 0.015126462988827504, 'l1_coef': 0.021682178057474553, 'patience': 5, 'batch_size': 128}. Best is trial 0 with value: 0.5925331383943557.
[I 2024-06-18 12:38:32,236] Trial 5 finished with value: 0.11571979373693467 and parameters: {'learning_rate': 0.009134210488595348, 'l1_coef': 0.00017770448263949664, 'patience': 6, 'batch_size': 256}. Best is trial 5 with value: 0.11571979373693467.
[I 2024-06-18 12:38:52,618] Trial 1 finished with value: 0.3598858565092087 and parameters: {'learning_rate': 0.012353183367505644, 'l1_coef': 0.0037526926266675574, 'patience': 14, 'batch_size': 256}. Best is trial 5 with value: 0.11571979373693467.
[

Chr 19 - Best hyperparameters: {'learning_rate': 0.009134210488595348, 'l1_coef': 0.00017770448263949664, 'patience': 6, 'batch_size': 256}
Chr 19 - Best value: 0.1157
Epoch [1/500], Train Loss: 0.5906
Epoch [2/500], Train Loss: 0.4675
Epoch [3/500], Train Loss: 0.4032
Epoch [4/500], Train Loss: 0.3585
Epoch [5/500], Train Loss: 0.3231
Epoch [6/500], Train Loss: 0.2986
Epoch [7/500], Train Loss: 0.2788
Epoch [8/500], Train Loss: 0.2639
Epoch [9/500], Train Loss: 0.2506
Epoch [10/500], Train Loss: 0.2392
Epoch [11/500], Train Loss: 0.2291
Epoch [12/500], Train Loss: 0.2211
Epoch [13/500], Train Loss: 0.2141
Epoch [14/500], Train Loss: 0.2073
Epoch [15/500], Train Loss: 0.2011
Epoch [16/500], Train Loss: 0.1962
Epoch [17/500], Train Loss: 0.1911
Epoch [18/500], Train Loss: 0.1866
Epoch [19/500], Train Loss: 0.1829
Epoch [20/500], Train Loss: 0.1791
Epoch [21/500], Train Loss: 0.1769
Epoch [22/500], Train Loss: 0.1739
Epoch [23/500], Train Loss: 0.1701
Epoch [24/500], Train Loss: 0.1671
E

[I 2024-06-18 12:39:45,851] A new study created in RDB with name: chr20_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  216
PRS313 SNPs:  8
Total SNPs used for Training:  208


[I 2024-06-18 12:40:17,003] Trial 1 finished with value: 0.09218241758644581 and parameters: {'learning_rate': 0.01528516426025495, 'l1_coef': 1.1188529679391656e-05, 'patience': 9, 'batch_size': 128}. Best is trial 1 with value: 0.09218241758644581.
[I 2024-06-18 12:40:20,611] Trial 7 finished with value: 0.2089025765657425 and parameters: {'learning_rate': 0.08308703436266394, 'l1_coef': 1.3002263069719351e-05, 'patience': 14, 'batch_size': 256}. Best is trial 1 with value: 0.09218241758644581.
[I 2024-06-18 12:40:34,783] Trial 0 finished with value: 0.0951929233968258 and parameters: {'learning_rate': 0.04450661484595062, 'l1_coef': 2.6094386175834953e-05, 'patience': 20, 'batch_size': 128}. Best is trial 1 with value: 0.09218241758644581.
[I 2024-06-18 12:40:38,060] Trial 4 finished with value: 0.15643969625234605 and parameters: {'learning_rate': 0.0711970484147741, 'l1_coef': 0.0005512454337160089, 'patience': 13, 'batch_size': 128}. Best is trial 1 with value: 0.0921824175864458

Chr 20 - Best hyperparameters: {'learning_rate': 0.01528516426025495, 'l1_coef': 1.1188529679391656e-05, 'patience': 9, 'batch_size': 128}
Chr 20 - Best value: 0.0922
Epoch [1/500], Train Loss: 0.2987
Epoch [2/500], Train Loss: 0.2337
Epoch [3/500], Train Loss: 0.2021
Epoch [4/500], Train Loss: 0.1794
Epoch [5/500], Train Loss: 0.1654
Epoch [6/500], Train Loss: 0.1551
Epoch [7/500], Train Loss: 0.1468
Epoch [8/500], Train Loss: 0.1399
Epoch [9/500], Train Loss: 0.1342
Epoch [10/500], Train Loss: 0.1288
Epoch [11/500], Train Loss: 0.1246
Epoch [12/500], Train Loss: 0.1220
Epoch [13/500], Train Loss: 0.1193
Epoch [14/500], Train Loss: 0.1150
Epoch [15/500], Train Loss: 0.1127
Epoch [16/500], Train Loss: 0.1103
Epoch [17/500], Train Loss: 0.1091
Epoch [18/500], Train Loss: 0.1057
Epoch [19/500], Train Loss: 0.1052
Epoch [20/500], Train Loss: 0.1034
Epoch [21/500], Train Loss: 0.1024
Epoch [22/500], Train Loss: 0.1008
Epoch [23/500], Train Loss: 0.1001
Epoch [24/500], Train Loss: 0.0989
Ep

[I 2024-06-18 12:41:23,910] A new study created in RDB with name: chr21_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  188
PRS313 SNPs:  8
Total SNPs used for Training:  180


[I 2024-06-18 12:42:16,206] Trial 2 finished with value: 0.0930142675127302 and parameters: {'learning_rate': 0.014736038144745061, 'l1_coef': 7.112082725602636e-05, 'patience': 15, 'batch_size': 64}. Best is trial 2 with value: 0.0930142675127302.
[I 2024-06-18 12:42:33,863] Trial 3 finished with value: 0.373406446831567 and parameters: {'learning_rate': 0.007944531806017544, 'l1_coef': 0.04873923493121475, 'patience': 15, 'batch_size': 64}. Best is trial 2 with value: 0.0930142675127302.
[I 2024-06-18 12:43:07,865] Trial 5 finished with value: 0.09902706742286682 and parameters: {'learning_rate': 0.0020142264168256235, 'l1_coef': 7.99761713561818e-05, 'patience': 10, 'batch_size': 64}. Best is trial 2 with value: 0.0930142675127302.
[I 2024-06-18 12:43:09,658] Trial 7 finished with value: 0.08571214601397514 and parameters: {'learning_rate': 0.0024498760032761035, 'l1_coef': 1.4500821052766964e-05, 'patience': 5, 'batch_size': 256}. Best is trial 7 with value: 0.08571214601397514.
[I

Chr 21 - Best hyperparameters: {'learning_rate': 0.0024498760032761035, 'l1_coef': 1.4500821052766964e-05, 'patience': 5, 'batch_size': 256}
Chr 21 - Best value: 0.0857
Epoch [1/500], Train Loss: 0.5425
Epoch [2/500], Train Loss: 0.4060
Epoch [3/500], Train Loss: 0.3725
Epoch [4/500], Train Loss: 0.3580
Epoch [5/500], Train Loss: 0.3462
Epoch [6/500], Train Loss: 0.3365
Epoch [7/500], Train Loss: 0.3285
Epoch [8/500], Train Loss: 0.3219
Epoch [9/500], Train Loss: 0.3149
Epoch [10/500], Train Loss: 0.3087
Epoch [11/500], Train Loss: 0.3033
Epoch [12/500], Train Loss: 0.2979
Epoch [13/500], Train Loss: 0.2933
Epoch [14/500], Train Loss: 0.2883
Epoch [15/500], Train Loss: 0.2831
Epoch [16/500], Train Loss: 0.2788
Epoch [17/500], Train Loss: 0.2746
Epoch [18/500], Train Loss: 0.2706
Epoch [19/500], Train Loss: 0.2666
Epoch [20/500], Train Loss: 0.2629
Epoch [21/500], Train Loss: 0.2591
Epoch [22/500], Train Loss: 0.2559
Epoch [23/500], Train Loss: 0.2527
Epoch [24/500], Train Loss: 0.2493


[I 2024-06-18 12:43:56,543] A new study created in RDB with name: chr22_study


Individual AUC ROC curves saved in: ../../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Total SNPs:  1084
PRS313 SNPs:  22
Total SNPs used for Training:  1062


[I 2024-06-18 12:44:47,983] Trial 2 finished with value: 2.283341681957245 and parameters: {'learning_rate': 0.09474070974068888, 'l1_coef': 2.6601248821391705e-05, 'patience': 11, 'batch_size': 128}. Best is trial 2 with value: 2.283341681957245.
[I 2024-06-18 12:46:14,489] Trial 7 finished with value: 0.36068163743385906 and parameters: {'learning_rate': 0.002899901938520511, 'l1_coef': 0.01476076256388643, 'patience': 13, 'batch_size': 32}. Best is trial 7 with value: 0.36068163743385906.
[I 2024-06-18 12:46:20,381] Trial 5 finished with value: 0.35517905205488204 and parameters: {'learning_rate': 0.055626803048252724, 'l1_coef': 0.09561134654796818, 'patience': 15, 'batch_size': 128}. Best is trial 5 with value: 0.35517905205488204.
[I 2024-06-18 12:46:28,877] Trial 3 finished with value: 0.33546311350969166 and parameters: {'learning_rate': 0.062442912744401224, 'l1_coef': 0.00419050465414359, 'patience': 13, 'batch_size': 32}. Best is trial 3 with value: 0.33546311350969166.
[I 2

Chr 22 - Best hyperparameters: {'learning_rate': 0.0009574867321547664, 'l1_coef': 0.000185657780206275, 'patience': 17, 'batch_size': 32}
Chr 22 - Best value: 0.1480
Epoch [1/500], Train Loss: 0.4211
Epoch [2/500], Train Loss: 0.3464
Epoch [3/500], Train Loss: 0.3202
Epoch [4/500], Train Loss: 0.3010
Epoch [5/500], Train Loss: 0.2864
Epoch [6/500], Train Loss: 0.2740
Epoch [7/500], Train Loss: 0.2639
Epoch [8/500], Train Loss: 0.2546
Epoch [9/500], Train Loss: 0.2474
Epoch [10/500], Train Loss: 0.2408
Epoch [11/500], Train Loss: 0.2353
Epoch [12/500], Train Loss: 0.2300
Epoch [13/500], Train Loss: 0.2255
Epoch [14/500], Train Loss: 0.2214
Epoch [15/500], Train Loss: 0.2177
Epoch [16/500], Train Loss: 0.2143
Epoch [17/500], Train Loss: 0.2113
Epoch [18/500], Train Loss: 0.2087
Epoch [19/500], Train Loss: 0.2056
Epoch [20/500], Train Loss: 0.2038
Epoch [21/500], Train Loss: 0.2015
Epoch [22/500], Train Loss: 0.1994
Epoch [23/500], Train Loss: 0.1972
Epoch [24/500], Train Loss: 0.1956
Ep

In [10]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, r2_score as sklearn_r2_score
import os
import csv

# Define the logistic regression model class
class LogisticRegression(nn.Module):
    def __init__(self, input_dim, output_dim, l1_coef=0.0):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        self.sigmoid = nn.Sigmoid()
        self.l1_coef = l1_coef

    def forward(self, x):
        out = self.linear(x)
        out = self.sigmoid(out)
        return out

    def l1_loss(self):
        return self.l1_coef * torch.norm(self.linear.weight, p=1)

# Function to load and evaluate the model
def load_and_evaluate_model(chromosome_number, data_directory, model_folder):
    # Load the data
    file_name = data_directory + f"23AndMe_PRS313_merged_chr{chromosome_number}_matching_split.parquet"
    data = pd.read_parquet(file_name)

    # Split the data into features and target
    X = torch.tensor(data.filter(regex='^(?!.*PRS313_)').values, dtype=torch.float32)
    y = torch.tensor(data.filter(regex='PRS313_').values, dtype=torch.float32)

    # Split the data into train-validation and test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Set the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the saved model
    input_dim = X_train_val.shape[1]
    output_dim = y_train_val.shape[1]
    model_save_path = model_folder + f'chr{chromosome_number}/final_model_chr{chromosome_number}.pth'
    model = LogisticRegression(input_dim, output_dim).to(device)
    model.load_state_dict(torch.load(model_save_path))
    model.eval()

    # Evaluate the model on the test set
    with torch.no_grad():
        test_outputs = model(X_test.to(device))
        test_preds = (test_outputs > 0.5).float()
        test_accuracy = float(((test_preds > 0.5) == y_test).float().mean())
        test_precision = precision_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_recall = recall_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_f1 = f1_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_roc_auc = roc_auc_score(y_test.cpu().numpy(), test_outputs.cpu().numpy(), average='micro')
        test_r2 = sklearn_r2_score(y_test.cpu().numpy(), test_outputs.cpu().numpy())
        test_iqs = calculate_iqs(y_test.cpu().numpy(), test_outputs.cpu().numpy())

        # Calculate false positive rate
        cm = confusion_matrix(y_test.cpu().numpy().ravel(), test_preds.cpu().numpy().ravel())
        tn, fp, fn, tp = cm.ravel()
        test_fpr = fp / (fp + tn)

    return {
        'Chromosome': chromosome_number,
        'R2 Score': test_r2,
        'IQS Score': test_iqs,
        'Accuracy': test_accuracy,
        # 'Precision': test_precision,
        # 'Recall': test_recall,
        # 'False Positive Rate': test_fpr,
        'AUC ROC': test_roc_auc,
    }

# Main function to load and evaluate models for all chromosomes and save results to a CSV file
def evaluate_all_chromosomes_and_save_to_csv(start_chromosome, end_chromosome, data_directory, model_folder, csv_file):
    results = []
    for chromosome_number in range(start_chromosome, end_chromosome + 1):
        result = load_and_evaluate_model(chromosome_number, data_directory, model_folder)
        results.append(result)

    # Create a DataFrame to store the performance metrics for each chromosome
    performance_df = pd.DataFrame(results)

    # Save the performance metrics to a CSV file
    performance_df.to_csv(csv_file, index=False)
    print(f"Performance metrics saved at: {csv_file}")

# Example usage
start_chromosome = 1  # Replace with the starting chromosome number
end_chromosome = 22   # Replace with the ending chromosome number
data_directory = '../../../Data/Filtered_split_training_data/'
model_folder = "../../../Data/model_results/logistic_regression/models/"
csv_file = "../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv"

evaluate_all_chromosomes_and_save_to_csv(start_chromosome, end_chromosome, data_directory, model_folder, csv_file)


Performance metrics saved at: ../../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
