# Logistic Regression Model

In [16]:
import numpy as np

def calculate_iqs_phased(true_haplotypes, imputed_haplotypes, threshold=0.5):
    """
    Calculate the Imputation Quality Score (IQS) for phased haplotypes.

    Args:
        true_haplotypes (numpy.ndarray): 2D array of true haplotypes, where each haplotype is represented by binary values (0 or 1).
        imputed_haplotypes (numpy.ndarray): 2D array of imputed haplotypes, where each haplotype is represented by floating-point values between 0 and 1.
        threshold (float): Threshold for converting imputed probabilities to binary values (default: 0.5).

    Returns:
        float: Imputation Quality Score (IQS).
    """
    # Check if the shapes of true and imputed haplotypes are the same
    if true_haplotypes.shape != imputed_haplotypes.shape:
        raise ValueError("Shape of true haplotypes and imputed haplotypes must be the same.")

    # Convert imputed probabilities to binary values based on the threshold
    imputed_binary = (imputed_haplotypes >= threshold).astype(int)

    # Create a contingency table
    contingency_table = np.zeros((2, 2), dtype=int)

    # Fill the contingency table
    for true_hap, imputed_hap in zip(true_haplotypes, imputed_binary):
        for true_allele, imputed_allele in zip(true_hap, imputed_hap):
            contingency_table[int(true_allele), int(imputed_allele)] += 1

    # Calculate the total number of alleles
    total_alleles = np.sum(contingency_table)

    # Calculate the observed agreement (Po)
    po = np.sum(np.diag(contingency_table)) / total_alleles

    # Calculate the expected agreement by chance (Pc)
    true_counts = np.sum(contingency_table, axis=1)
    imputed_counts = np.sum(contingency_table, axis=0)
    pc = np.sum(true_counts * imputed_counts) / (total_alleles ** 2)

    # Calculate the Imputation Quality Score (IQS)
    iqs = (po - pc) / (1 - pc)

    return iqs

In [40]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve
from sklearn.metrics import r2_score as sklearn_r2_score
import optuna
from matplotlib import pyplot as plt
import os
import csv

# Load and preprocess the data
data_directory = '../../Data/Filtered_phased_training_data/'
start = 1

# Initialize lists to store the performance metrics for each chromosome
accuracies = []
precisions = []
recalls = []
false_positive_rates = []
auc_rocs = []
r2_scores = []
iqs_scores = []

# Create folders for saving files
output_folder = "../../Data/model_results/logistic_regression/"
model_folder = output_folder + "models/"
csv_folder = output_folder + "csv_files/"
curve_folder = output_folder + "roc_curves/"

os.makedirs(model_folder, exist_ok=True)
os.makedirs(csv_folder, exist_ok=True)
os.makedirs(curve_folder, exist_ok=True)

for chromosome_number in range(start, 23):
    # Create subfolders for the current chromosome
    chr_model_folder = model_folder + f"chr{chromosome_number}/"
    chr_csv_folder = csv_folder + f"chr{chromosome_number}/"
    chr_curve_folder = curve_folder + f"chr{chromosome_number}/"

    os.makedirs(chr_model_folder, exist_ok=True)
    os.makedirs(chr_csv_folder, exist_ok=True)
    os.makedirs(chr_curve_folder, exist_ok=True)

    file_name = data_directory + f"23AndMe_PRS313_merged_chr{chromosome_number}_matching_split.parquet"
    data = pd.read_parquet(file_name)


    # Split the data into features and target
    X = torch.tensor(data.filter(regex='^(?!.*Unknown)').values, dtype=torch.float32)
    y = torch.tensor(data.filter(regex='Unknown').values, dtype=torch.float32)

    print("Unknown PRS313 SNPs: ", y.shape[1])
    print("Known PRS313 SNPs: ", data[[col for col in data.columns if "PRS313_Known" in col]].shape[1])
    print("23AndMe SNPs with LD to Unknown PRS313 SNPs: ", data[[col for col in data.columns if "PRS313_" not in col]].shape[1])
    print("Total SNPs used for Training: ", X.shape[1])

    # Split the data into train-validation and test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the logistic regression model with lasso regularization
    class LogisticRegression(nn.Module):
        def __init__(self, input_dim, output_dim, l1_coef=0.0):
            super(LogisticRegression, self).__init__()
            self.linear = nn.Linear(input_dim, output_dim)
            self.sigmoid = nn.Sigmoid()
            self.l1_coef = l1_coef

        def forward(self, x):
            out = self.linear(x)
            out = self.sigmoid(out)
            return out

        def l1_loss(self):
            return self.l1_coef * torch.norm(self.linear.weight, p=1)
        
    # Set the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Set the hyperparameters for tuning
    input_dim = X_train_val.shape[1]
    output_dim = y_train_val.shape[1]
    num_epochs = 500
    batch_size = 128

    # Define the objective function for Optuna with cross-validation and early stopping
    def objective(trial):
        learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
        l1_coef = trial.suggest_float('l1_coef', 1e-5, 1e-1, log=True)
        patience = trial.suggest_int('patience', 5, 20)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

        model = LogisticRegression(input_dim, output_dim, l1_coef).to(device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.BCELoss()
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=False)

        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        fold_losses = []

        for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_val, y_train_val.argmax(dim=1))):
            X_train, X_val = X_train_val[train_idx], X_train_val[val_idx]
            y_train, y_val = y_train_val[train_idx], y_train_val[val_idx]

            train_dataset = TensorDataset(X_train, y_train)
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

            best_val_loss = float('inf')
            counter = 0

            for epoch in range(num_epochs):
                train_loss = 0.0
                for batch_X, batch_y in train_loader:
                    batch_X, batch_y = batch_X.to(device), batch_y.to(device)

                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y) + model.l1_loss()

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    train_loss += loss.item()

                train_loss /= len(train_loader)

                val_dataset = TensorDataset(X_val, y_val)
                val_loader = DataLoader(val_dataset, batch_size=batch_size)

                with torch.no_grad():
                    val_loss = 0.0
                    for batch_X, batch_y in val_loader:
                        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                        outputs = model(batch_X)
                        loss = criterion(outputs, batch_y) + model.l1_loss()
                        val_loss += loss.item()

                    val_loss /= len(val_loader)
                    scheduler.step(val_loss)

                    if val_loss < best_val_loss:
                        best_val_loss = val_loss
                        counter = 0
                    else:
                        counter += 1

                    if counter >= patience:
                        # print(f"Early stopping at epoch {epoch+1}")
                        break

            fold_losses.append(best_val_loss)

        return np.mean(fold_losses)

    # Create the "optuna_studies" folder if it doesn't exist
    os.makedirs("optuna_studies", exist_ok=True)

    # Create an Optuna study and optimize the hyperparameters
    study_name = f"chr{chromosome_number}_study"
    storage_name = f"sqlite:///optuna_studies/{study_name}.db"

    # Check if the study exists

    current_dir = os.getcwd()
    study_exists = os.path.exists(current_dir + f"/optuna_studies/{study_name}.db")
    
    if study_exists:
        # Load the existing study
        study = optuna.load_study(study_name=study_name, storage=storage_name)
    else:
        # Create a new study
        study = optuna.create_study(direction='minimize', study_name=study_name, storage=storage_name)

    study.optimize(objective, n_trials=50, n_jobs=-1)

    # Print the best hyperparameters and best value
    print(f"Chr {chromosome_number} - Best hyperparameters: {study.best_params}")
    print(f"Chr {chromosome_number} - Best value: {study.best_value:.4f}")

    # Train the final model with the best hyperparameters and early stopping
    best_learning_rate = study.best_params['learning_rate']
    best_l1_coef = study.best_params['l1_coef']
    best_patience = study.best_params['patience']
    best_batch_size = study.best_params['batch_size']

    model = LogisticRegression(input_dim, output_dim, best_l1_coef).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_learning_rate)
    criterion = nn.BCELoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=False)

    train_dataset = TensorDataset(X_train_val, y_train_val)
    train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)

    best_train_loss = float('inf')
    counter = 0

    for epoch in range(num_epochs):
        train_loss = 0.0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            outputs = model(batch_X)
            loss = criterion(outputs, batch_y) + model.l1_loss()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}")

        if train_loss < best_train_loss:
            best_train_loss = train_loss
            counter = 0
        else:
            counter += 1

        if counter >= best_patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        scheduler.step(train_loss)

    # Save the final model
    model_save_path = chr_model_folder + f'final_model_chr{chromosome_number}.pth'
    torch.save(model.state_dict(), model_save_path)
    print(f"Final model saved at: {model_save_path}")

    # Evaluate the final model on the test set
    with torch.no_grad():
        test_outputs = model(X_test.to(device))
        test_preds = (test_outputs > 0.5).float()
        test_accuracy = float(((test_preds > 0.5) == y_test).float().mean())
        test_precision = precision_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_recall = recall_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_f1 = f1_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_roc_auc = roc_auc_score(y_test.cpu().numpy(), test_outputs.cpu().numpy(), average='micro')
        test_r2 = sklearn_r2_score(y_test.cpu().numpy(), test_outputs.cpu().numpy())
        test_iqs = calculate_iqs_phased(y_test.cpu().numpy(), test_outputs.cpu().numpy())

        # Calculate false positive rate
        cm = confusion_matrix(y_test.cpu().numpy().ravel(), test_preds.cpu().numpy().ravel())
        tn, fp, fn, tp = cm.ravel()
        test_fpr = fp / (fp + tn)

        # Append performance metrics to the lists
        accuracies.append(test_accuracy)
        precisions.append(test_precision)
        recalls.append(test_recall)
        false_positive_rates.append(test_fpr)
        auc_rocs.append(test_roc_auc)
        r2_scores.append(test_r2)
        iqs_scores.append(test_iqs)

        # Calculate individual R^2 scores for each SNP
        individual_r2_scores = sklearn_r2_score(y_test.cpu().numpy(), test_outputs.cpu().numpy(), multioutput='raw_values')

        # Calculate individual IQS scores for each SNP
        individual_iqs_scores = np.array([calculate_iqs_phased(y_test.cpu().numpy()[:, i].reshape(-1, 1), test_outputs.cpu().numpy()[:, i].reshape(-1, 1)) for i in range(y_test.shape[1])])

        # Get the names of the SNPs from the original dataframe
        snp_names = data.filter(regex='Unknown').columns

        # Save individual R^2 scores to a CSV file
        csv_file = chr_csv_folder + f'individual_r2_scores_chr{chromosome_number}.csv'

        with open(csv_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['SNP', 'R2 Score'])
            for snp, r2_score in zip(snp_names, individual_r2_scores):
                writer.writerow([snp, r2_score])

        print(f"Individual R^2 scores saved at: {csv_file}")

        # Save individual IQS scores to a CSV file
        iqs_csv_file = chr_csv_folder + f'individual_iqs_scores_chr{chromosome_number}.csv'

        with open(iqs_csv_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['SNP', 'IQS Score'])
            for snp, iqs_score in zip(snp_names, individual_iqs_scores):
                writer.writerow([snp, iqs_score])

        print(f"Individual IQS scores saved at: {iqs_csv_file}")

        # Save individual AUC ROC curves for each SNP
        for i, snp in enumerate(snp_names):
            try: 
                fpr, tpr, _ = roc_curve(y_test.cpu().numpy()[:, i], test_outputs.cpu().numpy()[:, i])
                plt.figure()
                plt.plot(fpr, tpr, label=f'AUC ROC = {roc_auc_score(y_test.cpu().numpy()[:, i], test_outputs.cpu().numpy()[:, i]):.4f}')
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title(f'AUC ROC Curve - {snp}')
                plt.legend()
                
                curve_file = chr_curve_folder + f'auc_roc_curve_{snp}_chr{chromosome_number}.png'
                plt.savefig(curve_file)
                plt.close()
            except ValueError:
                # Save a placeholder image if there is insufficient data
                plt.figure()
                plt.axis('off')
                plt.text(0.5, 0.5, "Insufficient data for ROC curve", ha='center', va='center')
                curve_file = chr_curve_folder + f'auc_roc_curve_{snp}_chr{chrom0osome_number}.png'
                plt.savefig(curve_file)
                plt.close()

                print(f"Skipping SNP {snp} due to insufficient data")


        print(f"Individual AUC ROC curves saved in: {curve_folder}")

        # Create a DataFrame to store the performance metrics for each chromosome
        performance_df = pd.DataFrame({
            'Chromosome': list(range(start, chromosome_number + 1)),
            'Accuracy': accuracies,
            'Precision': precisions,
            'Recall': recalls,
            'False Positive Rate': false_positive_rates,
            'AUC ROC': auc_rocs,
            'R2 Score': r2_scores,
            'IQS Score': iqs_scores
        })

        # Save the performance metrics to a CSV file
        performance_csv_file = csv_folder + 'performance_metrics.csv'
        performance_df.to_csv(performance_csv_file, index=False)
        print(f"Performance metrics saved at: {performance_csv_file}")

Unknown PRS313 SNPs:  40
Known PRS313 SNPs:  20
23AndMe SNPs with LD to Unknown PRS313 SNPs:  1700
Total SNPs used for Training:  1720


[I 2024-04-29 15:17:11,337] Trial 19 finished with value: 0.32225602865219116 and parameters: {'learning_rate': 0.012522677556946608, 'l1_coef': 0.00040924418675837457, 'patience': 5, 'batch_size': 256}. Best is trial 11 with value: 0.10575263649225235.
[I 2024-04-29 15:17:59,164] Trial 14 finished with value: 0.42857140451669695 and parameters: {'learning_rate': 0.0020386706514414245, 'l1_coef': 0.0013907581070924482, 'patience': 7, 'batch_size': 128}. Best is trial 11 with value: 0.10575263649225235.
[I 2024-04-29 15:18:30,480] Trial 15 finished with value: 0.26101318299770354 and parameters: {'learning_rate': 0.002810366338554551, 'l1_coef': 0.00030448107177139704, 'patience': 10, 'batch_size': 128}. Best is trial 11 with value: 0.10575263649225235.
[I 2024-04-29 15:18:50,117] Trial 8 finished with value: 0.194367332641895 and parameters: {'learning_rate': 0.08752526442762275, 'l1_coef': 0.0001266706876516088, 'patience': 13, 'batch_size': 32}. Best is trial 11 with value: 0.1057526

Chr 1 - Best hyperparameters: {'learning_rate': 0.01823591759267881, 'l1_coef': 1.0049346589997416e-05, 'patience': 9, 'batch_size': 32}
Chr 1 - Best value: 0.0865
Epoch [1/500], Train Loss: 0.6078
Epoch [2/500], Train Loss: 0.2365
Epoch [3/500], Train Loss: 0.1788
Epoch [4/500], Train Loss: 0.1473
Epoch [5/500], Train Loss: 0.1346
Epoch [6/500], Train Loss: 0.1262
Epoch [7/500], Train Loss: 0.1224
Epoch [8/500], Train Loss: 0.1164
Epoch [9/500], Train Loss: 0.1078
Epoch [10/500], Train Loss: 0.1023
Epoch [11/500], Train Loss: 0.1006
Epoch [12/500], Train Loss: 0.1012
Epoch [13/500], Train Loss: 0.0985
Epoch [14/500], Train Loss: 0.1017
Epoch [15/500], Train Loss: 0.1041
Epoch [16/500], Train Loss: 0.0979
Epoch [17/500], Train Loss: 0.0983
Epoch [18/500], Train Loss: 0.0929
Epoch [19/500], Train Loss: 0.0936
Epoch [20/500], Train Loss: 0.0987
Epoch [21/500], Train Loss: 0.0966
Epoch [22/500], Train Loss: 0.0973
Epoch [23/500], Train Loss: 0.1004
Epoch [24/500], Train Loss: 0.0996
Epoch

[I 2024-04-29 15:42:01,660] Trial 1 finished with value: 0.42094608396291733 and parameters: {'learning_rate': 0.01961288896675574, 'l1_coef': 0.001601740272418487, 'patience': 9, 'batch_size': 128}. Best is trial 1 with value: 0.42094608396291733.
[I 2024-04-29 15:42:15,036] Trial 6 finished with value: 0.48818083852529526 and parameters: {'learning_rate': 0.009324259434815622, 'l1_coef': 0.0028534946619540092, 'patience': 11, 'batch_size': 128}. Best is trial 1 with value: 0.42094608396291733.
[I 2024-04-29 15:42:18,948] Trial 3 finished with value: 0.5323898077011109 and parameters: {'learning_rate': 0.07858175246218102, 'l1_coef': 0.015365060583572477, 'patience': 12, 'batch_size': 128}. Best is trial 1 with value: 0.42094608396291733.
[I 2024-04-29 15:42:23,012] Trial 5 finished with value: 0.532659575343132 and parameters: {'learning_rate': 0.018765258160924867, 'l1_coef': 0.045659657991690895, 'patience': 8, 'batch_size': 128}. Best is trial 1 with value: 0.42094608396291733.
[I

Chr 2 - Best hyperparameters: {'learning_rate': 0.008236314117298912, 'l1_coef': 1.0089572204728918e-05, 'patience': 10, 'batch_size': 32}
Chr 2 - Best value: 0.1196
Epoch [1/500], Train Loss: 0.4539
Epoch [2/500], Train Loss: 0.2691
Epoch [3/500], Train Loss: 0.2185
Epoch [4/500], Train Loss: 0.1887
Epoch [5/500], Train Loss: 0.1690
Epoch [6/500], Train Loss: 0.1574
Epoch [7/500], Train Loss: 0.1471
Epoch [8/500], Train Loss: 0.1398
Epoch [9/500], Train Loss: 0.1343
Epoch [10/500], Train Loss: 0.1292
Epoch [11/500], Train Loss: 0.1226
Epoch [12/500], Train Loss: 0.1189
Epoch [13/500], Train Loss: 0.1166
Epoch [14/500], Train Loss: 0.1141
Epoch [15/500], Train Loss: 0.1116
Epoch [16/500], Train Loss: 0.1105
Epoch [17/500], Train Loss: 0.1064
Epoch [18/500], Train Loss: 0.1053
Epoch [19/500], Train Loss: 0.1036
Epoch [20/500], Train Loss: 0.1017
Epoch [21/500], Train Loss: 0.1013
Epoch [22/500], Train Loss: 0.0990
Epoch [23/500], Train Loss: 0.0982
Epoch [24/500], Train Loss: 0.0967
Epo

[I 2024-04-29 15:58:58,399] A new study created in RDB with name: chr3_study
[I 2024-04-29 16:00:52,332] Trial 6 finished with value: 0.4874948740005493 and parameters: {'learning_rate': 0.04891006066191164, 'l1_coef': 0.03785721925871398, 'patience': 15, 'batch_size': 256}. Best is trial 6 with value: 0.4874948740005493.
[I 2024-04-29 16:01:00,750] Trial 8 finished with value: 0.3518802528197949 and parameters: {'learning_rate': 0.02791958078402292, 'l1_coef': 0.0021412104186589666, 'patience': 8, 'batch_size': 32}. Best is trial 8 with value: 0.3518802528197949.
[I 2024-04-29 16:01:18,888] Trial 1 finished with value: 0.48823182124357956 and parameters: {'learning_rate': 0.017418786326637868, 'l1_coef': 0.011412419109616913, 'patience': 15, 'batch_size': 32}. Best is trial 8 with value: 0.3518802528197949.
[I 2024-04-29 16:01:46,834] Trial 4 finished with value: 0.48902975746563504 and parameters: {'learning_rate': 0.003822443883350119, 'l1_coef': 0.023774135255901288, 'patience': 19

Chr 3 - Best hyperparameters: {'learning_rate': 0.01168727761615579, 'l1_coef': 1.1084312581961695e-05, 'patience': 15, 'batch_size': 64}
Chr 3 - Best value: 0.0453
Epoch [1/500], Train Loss: 0.4887
Epoch [2/500], Train Loss: 0.2192
Epoch [3/500], Train Loss: 0.1587
Epoch [4/500], Train Loss: 0.1279
Epoch [5/500], Train Loss: 0.1114
Epoch [6/500], Train Loss: 0.0983
Epoch [7/500], Train Loss: 0.0906
Epoch [8/500], Train Loss: 0.0810
Epoch [9/500], Train Loss: 0.0753
Epoch [10/500], Train Loss: 0.0711
Epoch [11/500], Train Loss: 0.0680
Epoch [12/500], Train Loss: 0.0655
Epoch [13/500], Train Loss: 0.0625
Epoch [14/500], Train Loss: 0.0592
Epoch [15/500], Train Loss: 0.0583
Epoch [16/500], Train Loss: 0.0561
Epoch [17/500], Train Loss: 0.0546
Epoch [18/500], Train Loss: 0.0543
Epoch [19/500], Train Loss: 0.0520
Epoch [20/500], Train Loss: 0.0505
Epoch [21/500], Train Loss: 0.0496
Epoch [22/500], Train Loss: 0.0488
Epoch [23/500], Train Loss: 0.0476
Epoch [24/500], Train Loss: 0.0470
Epoc

[I 2024-04-29 16:17:24,295] A new study created in RDB with name: chr4_study
[I 2024-04-29 16:18:59,683] Trial 8 finished with value: 0.3901371121406555 and parameters: {'learning_rate': 0.014818033106791374, 'l1_coef': 0.002210923233450108, 'patience': 16, 'batch_size': 256}. Best is trial 8 with value: 0.3901371121406555.
[I 2024-04-29 16:19:02,231] Trial 9 finished with value: 0.4684937306812831 and parameters: {'learning_rate': 0.005731056526514239, 'l1_coef': 0.0035936285594536737, 'patience': 9, 'batch_size': 64}. Best is trial 8 with value: 0.3901371121406555.
[I 2024-04-29 16:19:14,368] Trial 4 finished with value: 0.12821575552225112 and parameters: {'learning_rate': 0.011547835828017853, 'l1_coef': 9.622091745989452e-05, 'patience': 17, 'batch_size': 256}. Best is trial 4 with value: 0.12821575552225112.
[I 2024-04-29 16:19:34,516] Trial 5 finished with value: 0.5970595002174377 and parameters: {'learning_rate': 0.001695605234410547, 'l1_coef': 0.009942691126444544, 'patience

Chr 4 - Best hyperparameters: {'learning_rate': 0.010022005750497007, 'l1_coef': 1.0999251330619296e-05, 'patience': 13, 'batch_size': 64}
Chr 4 - Best value: 0.0564
Epoch [1/500], Train Loss: 0.5748
Epoch [2/500], Train Loss: 0.2917
Epoch [3/500], Train Loss: 0.2224
Epoch [4/500], Train Loss: 0.1800
Epoch [5/500], Train Loss: 0.1547
Epoch [6/500], Train Loss: 0.1389
Epoch [7/500], Train Loss: 0.1237
Epoch [8/500], Train Loss: 0.1157
Epoch [9/500], Train Loss: 0.1054
Epoch [10/500], Train Loss: 0.0996
Epoch [11/500], Train Loss: 0.0932
Epoch [12/500], Train Loss: 0.0883
Epoch [13/500], Train Loss: 0.0846
Epoch [14/500], Train Loss: 0.0804
Epoch [15/500], Train Loss: 0.0783
Epoch [16/500], Train Loss: 0.0743
Epoch [17/500], Train Loss: 0.0721
Epoch [18/500], Train Loss: 0.0701
Epoch [19/500], Train Loss: 0.0689
Epoch [20/500], Train Loss: 0.0665
Epoch [21/500], Train Loss: 0.0655
Epoch [22/500], Train Loss: 0.0629
Epoch [23/500], Train Loss: 0.0619
Epoch [24/500], Train Loss: 0.0602
Epo

[I 2024-04-29 16:34:09,637] A new study created in RDB with name: chr5_study


Unknown PRS313 SNPs:  54
Known PRS313 SNPs:  14
23AndMe SNPs with LD to Unknown PRS313 SNPs:  2600
Total SNPs used for Training:  2614


[I 2024-04-29 16:35:39,358] Trial 0 finished with value: 0.4416621685028076 and parameters: {'learning_rate': 0.006338269643865029, 'l1_coef': 0.0013978010256287382, 'patience': 8, 'batch_size': 256}. Best is trial 0 with value: 0.4416621685028076.
[I 2024-04-29 16:35:45,249] Trial 6 finished with value: 0.5240630030632019 and parameters: {'learning_rate': 0.006025645594656122, 'l1_coef': 0.01012664451885839, 'patience': 15, 'batch_size': 256}. Best is trial 0 with value: 0.4416621685028076.
[I 2024-04-29 16:36:01,538] Trial 3 finished with value: 0.5177002365772542 and parameters: {'learning_rate': 0.01339274208779705, 'l1_coef': 0.005646765587912697, 'patience': 9, 'batch_size': 32}. Best is trial 0 with value: 0.4416621685028076.
[I 2024-04-29 16:36:09,263] Trial 8 finished with value: 0.3304446518421173 and parameters: {'learning_rate': 0.004514031608388543, 'l1_coef': 0.0006234220787323494, 'patience': 11, 'batch_size': 128}. Best is trial 8 with value: 0.3304446518421173.
[I 2024

Chr 5 - Best hyperparameters: {'learning_rate': 0.010757867162206202, 'l1_coef': 1.0440017068267564e-05, 'patience': 9, 'batch_size': 64}
Chr 5 - Best value: 0.0727
Epoch [1/500], Train Loss: 0.6384
Epoch [2/500], Train Loss: 0.3042
Epoch [3/500], Train Loss: 0.2282
Epoch [4/500], Train Loss: 0.1901
Epoch [5/500], Train Loss: 0.1666
Epoch [6/500], Train Loss: 0.1502
Epoch [7/500], Train Loss: 0.1396
Epoch [8/500], Train Loss: 0.1283
Epoch [9/500], Train Loss: 0.1220
Epoch [10/500], Train Loss: 0.1161
Epoch [11/500], Train Loss: 0.1126
Epoch [12/500], Train Loss: 0.1063
Epoch [13/500], Train Loss: 0.1031
Epoch [14/500], Train Loss: 0.1000
Epoch [15/500], Train Loss: 0.0976
Epoch [16/500], Train Loss: 0.0949
Epoch [17/500], Train Loss: 0.0929
Epoch [18/500], Train Loss: 0.0920
Epoch [19/500], Train Loss: 0.0905
Epoch [20/500], Train Loss: 0.0884
Epoch [21/500], Train Loss: 0.0863
Epoch [22/500], Train Loss: 0.0854
Epoch [23/500], Train Loss: 0.0832
Epoch [24/500], Train Loss: 0.0819
Epoc

[I 2024-04-29 16:59:33,841] Trial 8 finished with value: 0.5444069325923919 and parameters: {'learning_rate': 0.03641086436501182, 'l1_coef': 0.006126571728054542, 'patience': 12, 'batch_size': 256}. Best is trial 0 with value: 0.28551640510559084.
[I 2024-04-29 16:59:49,330] Trial 7 finished with value: 0.5601417660713196 and parameters: {'learning_rate': 0.0034266786535780557, 'l1_coef': 0.037810092671732506, 'patience': 7, 'batch_size': 256}. Best is trial 0 with value: 0.28551640510559084.
[I 2024-04-29 17:00:01,998] Trial 2 finished with value: 0.5463922125952585 and parameters: {'learning_rate': 0.002021302445675407, 'l1_coef': 0.010771776168107157, 'patience': 8, 'batch_size': 64}. Best is trial 0 with value: 0.28551640510559084.
[I 2024-04-29 17:00:08,649] Trial 6 finished with value: 0.47329054795778713 and parameters: {'learning_rate': 0.0067575295573570685, 'l1_coef': 0.0019008427725244253, 'patience': 10, 'batch_size': 32}. Best is trial 0 with value: 0.28551640510559084.
[

Chr 6 - Best hyperparameters: {'learning_rate': 0.0005159914294373423, 'l1_coef': 1.0687801532020404e-05, 'patience': 8, 'batch_size': 64}
Chr 6 - Best value: 0.1476
Epoch [1/500], Train Loss: 0.5682
Epoch [2/500], Train Loss: 0.5109
Epoch [3/500], Train Loss: 0.4886
Epoch [4/500], Train Loss: 0.4691
Epoch [5/500], Train Loss: 0.4535
Epoch [6/500], Train Loss: 0.4380
Epoch [7/500], Train Loss: 0.4243
Epoch [8/500], Train Loss: 0.4118
Epoch [9/500], Train Loss: 0.4002
Epoch [10/500], Train Loss: 0.3904
Epoch [11/500], Train Loss: 0.3796
Epoch [12/500], Train Loss: 0.3712
Epoch [13/500], Train Loss: 0.3621
Epoch [14/500], Train Loss: 0.3541
Epoch [15/500], Train Loss: 0.3465
Epoch [16/500], Train Loss: 0.3392
Epoch [17/500], Train Loss: 0.3324
Epoch [18/500], Train Loss: 0.3267
Epoch [19/500], Train Loss: 0.3209
Epoch [20/500], Train Loss: 0.3150
Epoch [21/500], Train Loss: 0.3094
Epoch [22/500], Train Loss: 0.3042
Epoch [23/500], Train Loss: 0.2993
Epoch [24/500], Train Loss: 0.2945
Epo

[I 2024-04-29 17:14:13,008] A new study created in RDB with name: chr7_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  22
Known PRS313 SNPs:  6
23AndMe SNPs with LD to Unknown PRS313 SNPs:  946
Total SNPs used for Training:  952


[I 2024-04-29 17:15:12,481] Trial 3 finished with value: 0.20168656408786773 and parameters: {'learning_rate': 0.0320922575088999, 'l1_coef': 0.00025039684335643287, 'patience': 7, 'batch_size': 128}. Best is trial 3 with value: 0.20168656408786773.
[I 2024-04-29 17:15:40,436] Trial 6 finished with value: 0.19174406230449675 and parameters: {'learning_rate': 0.004022502634185784, 'l1_coef': 0.00018287038532150104, 'patience': 6, 'batch_size': 256}. Best is trial 6 with value: 0.19174406230449675.
[I 2024-04-29 17:15:51,977] Trial 8 finished with value: 0.3490034580230713 and parameters: {'learning_rate': 0.09058313635110345, 'l1_coef': 0.0017570162303808515, 'patience': 16, 'batch_size': 64}. Best is trial 6 with value: 0.19174406230449675.
[I 2024-04-29 17:16:05,420] Trial 0 finished with value: 0.14472352883645467 and parameters: {'learning_rate': 0.00415457257335683, 'l1_coef': 5.9010307078180496e-05, 'patience': 12, 'batch_size': 64}. Best is trial 0 with value: 0.14472352883645467

Chr 7 - Best hyperparameters: {'learning_rate': 0.00022929815822353796, 'l1_coef': 1.1733499979155006e-05, 'patience': 12, 'batch_size': 32}
Chr 7 - Best value: 0.1067
Epoch [1/500], Train Loss: 0.5506
Epoch [2/500], Train Loss: 0.4780
Epoch [3/500], Train Loss: 0.4621
Epoch [4/500], Train Loss: 0.4493
Epoch [5/500], Train Loss: 0.4375
Epoch [6/500], Train Loss: 0.4262
Epoch [7/500], Train Loss: 0.4164
Epoch [8/500], Train Loss: 0.4070
Epoch [9/500], Train Loss: 0.3982
Epoch [10/500], Train Loss: 0.3900
Epoch [11/500], Train Loss: 0.3820
Epoch [12/500], Train Loss: 0.3746
Epoch [13/500], Train Loss: 0.3673
Epoch [14/500], Train Loss: 0.3606
Epoch [15/500], Train Loss: 0.3538
Epoch [16/500], Train Loss: 0.3480
Epoch [17/500], Train Loss: 0.3419
Epoch [18/500], Train Loss: 0.3361
Epoch [19/500], Train Loss: 0.3307
Epoch [20/500], Train Loss: 0.3255
Epoch [21/500], Train Loss: 0.3201
Epoch [22/500], Train Loss: 0.3152
Epoch [23/500], Train Loss: 0.3107
Epoch [24/500], Train Loss: 0.3062
E

[I 2024-04-29 17:36:07,731] A new study created in RDB with name: chr8_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  28
Known PRS313 SNPs:  14
23AndMe SNPs with LD to Unknown PRS313 SNPs:  908
Total SNPs used for Training:  922


[I 2024-04-29 17:36:43,986] Trial 4 finished with value: 0.289887635409832 and parameters: {'learning_rate': 0.01790690715044604, 'l1_coef': 0.0004282566754737251, 'patience': 5, 'batch_size': 128}. Best is trial 4 with value: 0.289887635409832.
[I 2024-04-29 17:36:53,570] Trial 7 finished with value: 0.15082411020994185 and parameters: {'learning_rate': 0.0096923069763245, 'l1_coef': 3.4982941463216134e-05, 'patience': 5, 'batch_size': 256}. Best is trial 7 with value: 0.15082411020994185.
[I 2024-04-29 17:37:32,639] Trial 8 finished with value: 0.21456335932016374 and parameters: {'learning_rate': 0.06006630088632777, 'l1_coef': 0.00018072561401126532, 'patience': 16, 'batch_size': 256}. Best is trial 7 with value: 0.15082411020994185.
[I 2024-04-29 17:37:34,803] Trial 6 finished with value: 0.5038841935304494 and parameters: {'learning_rate': 0.021090994749825818, 'l1_coef': 0.0038948558449949607, 'patience': 9, 'batch_size': 32}. Best is trial 7 with value: 0.15082411020994185.
[I 

Chr 8 - Best hyperparameters: {'learning_rate': 0.0012197976599892346, 'l1_coef': 1.0397855148759009e-05, 'patience': 6, 'batch_size': 32}
Chr 8 - Best value: 0.1160
Epoch [1/500], Train Loss: 0.5154
Epoch [2/500], Train Loss: 0.4383
Epoch [3/500], Train Loss: 0.3924
Epoch [4/500], Train Loss: 0.3569
Epoch [5/500], Train Loss: 0.3292
Epoch [6/500], Train Loss: 0.3069
Epoch [7/500], Train Loss: 0.2878
Epoch [8/500], Train Loss: 0.2726
Epoch [9/500], Train Loss: 0.2578
Epoch [10/500], Train Loss: 0.2463
Epoch [11/500], Train Loss: 0.2363
Epoch [12/500], Train Loss: 0.2269
Epoch [13/500], Train Loss: 0.2188
Epoch [14/500], Train Loss: 0.2113
Epoch [15/500], Train Loss: 0.2046
Epoch [16/500], Train Loss: 0.1985
Epoch [17/500], Train Loss: 0.1926
Epoch [18/500], Train Loss: 0.1873
Epoch [19/500], Train Loss: 0.1831
Epoch [20/500], Train Loss: 0.1782
Epoch [21/500], Train Loss: 0.1740
Epoch [22/500], Train Loss: 0.1701
Epoch [23/500], Train Loss: 0.1669
Epoch [24/500], Train Loss: 0.1630
Epo

[I 2024-04-29 17:50:27,057] A new study created in RDB with name: chr9_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  16
Known PRS313 SNPs:  12
23AndMe SNPs with LD to Unknown PRS313 SNPs:  748
Total SNPs used for Training:  760


[I 2024-04-29 17:51:23,781] Trial 9 finished with value: 0.14986894503235818 and parameters: {'learning_rate': 0.010137114981788231, 'l1_coef': 2.8881708715997526e-05, 'patience': 13, 'batch_size': 256}. Best is trial 9 with value: 0.14986894503235818.
[I 2024-04-29 17:51:28,996] Trial 7 finished with value: 0.5691749657903399 and parameters: {'learning_rate': 0.019417578047721398, 'l1_coef': 0.02262553184260463, 'patience': 7, 'batch_size': 64}. Best is trial 9 with value: 0.14986894503235818.
[I 2024-04-29 17:51:58,480] Trial 3 finished with value: 0.5708930373191834 and parameters: {'learning_rate': 0.0015748954757609744, 'l1_coef': 0.035771838732392036, 'patience': 12, 'batch_size': 64}. Best is trial 9 with value: 0.14986894503235818.
[I 2024-04-29 17:52:59,376] Trial 4 finished with value: 0.15538781514534583 and parameters: {'learning_rate': 0.001986614333002369, 'l1_coef': 6.830271975067248e-05, 'patience': 18, 'batch_size': 32}. Best is trial 9 with value: 0.14986894503235818.

Chr 9 - Best hyperparameters: {'learning_rate': 0.05822594013574142, 'l1_coef': 1.5307383394547932e-05, 'patience': 14, 'batch_size': 256}
Chr 9 - Best value: 0.1107
Epoch [1/500], Train Loss: 3.2559
Epoch [2/500], Train Loss: 2.7893
Epoch [3/500], Train Loss: 2.2511
Epoch [4/500], Train Loss: 2.0736
Epoch [5/500], Train Loss: 1.9805
Epoch [6/500], Train Loss: 1.9376
Epoch [7/500], Train Loss: 1.9090
Epoch [8/500], Train Loss: 1.8927
Epoch [9/500], Train Loss: 1.8788
Epoch [10/500], Train Loss: 1.8706
Epoch [11/500], Train Loss: 1.8593
Epoch [12/500], Train Loss: 1.8601
Epoch [13/500], Train Loss: 1.8655
Epoch [14/500], Train Loss: 1.8530
Epoch [15/500], Train Loss: 1.8529
Epoch [16/500], Train Loss: 1.8448
Epoch [17/500], Train Loss: 1.8361
Epoch [18/500], Train Loss: 1.4134
Epoch [19/500], Train Loss: 1.0113
Epoch [20/500], Train Loss: 0.9970
Epoch [21/500], Train Loss: 0.9930
Epoch [22/500], Train Loss: 0.9919
Epoch [23/500], Train Loss: 0.9924
Epoch [24/500], Train Loss: 0.9757
Epo

[I 2024-04-29 18:00:56,722] A new study created in RDB with name: chr10_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  26
Known PRS313 SNPs:  10
23AndMe SNPs with LD to Unknown PRS313 SNPs:  1056
Total SNPs used for Training:  1066


[I 2024-04-29 18:02:18,365] Trial 3 finished with value: 0.5117952704429627 and parameters: {'learning_rate': 0.09179540036233391, 'l1_coef': 0.011793910523981571, 'patience': 17, 'batch_size': 256}. Best is trial 3 with value: 0.5117952704429627.
[I 2024-04-29 18:02:20,246] Trial 8 finished with value: 0.15496711432933807 and parameters: {'learning_rate': 0.05740666187583184, 'l1_coef': 8.51143407136203e-05, 'patience': 7, 'batch_size': 128}. Best is trial 8 with value: 0.15496711432933807.
[I 2024-04-29 18:02:31,128] Trial 2 finished with value: 0.24031963497400283 and parameters: {'learning_rate': 0.05266501640958999, 'l1_coef': 0.00040295397523053164, 'patience': 19, 'batch_size': 256}. Best is trial 8 with value: 0.15496711432933807.
[I 2024-04-29 18:03:10,169] Trial 11 finished with value: 0.4101851016283035 and parameters: {'learning_rate': 0.030377258822650636, 'l1_coef': 0.00251472051305718, 'patience': 7, 'batch_size': 128}. Best is trial 8 with value: 0.15496711432933807.
[I

Chr 10 - Best hyperparameters: {'learning_rate': 0.00022093387007935644, 'l1_coef': 1.0430280711765954e-05, 'patience': 5, 'batch_size': 32}
Chr 10 - Best value: 0.1004
Epoch [1/500], Train Loss: 0.5452
Epoch [2/500], Train Loss: 0.4747
Epoch [3/500], Train Loss: 0.4538
Epoch [4/500], Train Loss: 0.4376
Epoch [5/500], Train Loss: 0.4241
Epoch [6/500], Train Loss: 0.4119
Epoch [7/500], Train Loss: 0.4010
Epoch [8/500], Train Loss: 0.3906
Epoch [9/500], Train Loss: 0.3806
Epoch [10/500], Train Loss: 0.3719
Epoch [11/500], Train Loss: 0.3632
Epoch [12/500], Train Loss: 0.3549
Epoch [13/500], Train Loss: 0.3471
Epoch [14/500], Train Loss: 0.3398
Epoch [15/500], Train Loss: 0.3327
Epoch [16/500], Train Loss: 0.3258
Epoch [17/500], Train Loss: 0.3196
Epoch [18/500], Train Loss: 0.3134
Epoch [19/500], Train Loss: 0.3076
Epoch [20/500], Train Loss: 0.3017
Epoch [21/500], Train Loss: 0.2965
Epoch [22/500], Train Loss: 0.2914
Epoch [23/500], Train Loss: 0.2864
Epoch [24/500], Train Loss: 0.2818


[I 2024-04-29 18:18:52,582] A new study created in RDB with name: chr11_study
[I 2024-04-29 18:19:38,614] Trial 2 finished with value: 0.7207237940568192 and parameters: {'learning_rate': 0.014907400301912815, 'l1_coef': 0.006344133398932885, 'patience': 5, 'batch_size': 32}. Best is trial 2 with value: 0.7207237940568192.
[I 2024-04-29 18:20:22,936] Trial 9 finished with value: 0.9041348099708557 and parameters: {'learning_rate': 0.0040011771985099965, 'l1_coef': 0.05613908232806854, 'patience': 5, 'batch_size': 256}. Best is trial 2 with value: 0.7207237940568192.
[I 2024-04-29 18:20:39,014] Trial 0 finished with value: 0.2951857958521162 and parameters: {'learning_rate': 0.027670445267021417, 'l1_coef': 0.0007386161547168055, 'patience': 10, 'batch_size': 64}. Best is trial 0 with value: 0.2951857958521162.
[I 2024-04-29 18:20:43,092] Trial 6 finished with value: 0.5326493084430695 and parameters: {'learning_rate': 0.018683977609812503, 'l1_coef': 0.08611638791518988, 'patience': 15

Chr 11 - Best hyperparameters: {'learning_rate': 0.0002329404200995808, 'l1_coef': 1.2601501185021566e-05, 'patience': 9, 'batch_size': 32}
Chr 11 - Best value: 0.0578
Epoch [1/500], Train Loss: 0.5389
Epoch [2/500], Train Loss: 0.4823
Epoch [3/500], Train Loss: 0.4595
Epoch [4/500], Train Loss: 0.4408
Epoch [5/500], Train Loss: 0.4241
Epoch [6/500], Train Loss: 0.4089
Epoch [7/500], Train Loss: 0.3953
Epoch [8/500], Train Loss: 0.3820
Epoch [9/500], Train Loss: 0.3700
Epoch [10/500], Train Loss: 0.3587
Epoch [11/500], Train Loss: 0.3485
Epoch [12/500], Train Loss: 0.3385
Epoch [13/500], Train Loss: 0.3295
Epoch [14/500], Train Loss: 0.3209
Epoch [15/500], Train Loss: 0.3127
Epoch [16/500], Train Loss: 0.3050
Epoch [17/500], Train Loss: 0.2980
Epoch [18/500], Train Loss: 0.2909
Epoch [19/500], Train Loss: 0.2844
Epoch [20/500], Train Loss: 0.2785
Epoch [21/500], Train Loss: 0.2724
Epoch [22/500], Train Loss: 0.2666
Epoch [23/500], Train Loss: 0.2614
Epoch [24/500], Train Loss: 0.2562
E

[I 2024-04-29 18:40:52,446] A new study created in RDB with name: chr12_study
[I 2024-04-29 18:42:26,250] Trial 6 finished with value: 0.44912033081054686 and parameters: {'learning_rate': 0.0848232050382313, 'l1_coef': 0.020452292566714995, 'patience': 15, 'batch_size': 256}. Best is trial 6 with value: 0.44912033081054686.
[I 2024-04-29 18:42:29,376] Trial 5 finished with value: 0.44796993902751375 and parameters: {'learning_rate': 0.053894585924454494, 'l1_coef': 0.00848794238338786, 'patience': 15, 'batch_size': 64}. Best is trial 5 with value: 0.44796993902751375.
[I 2024-04-29 18:42:29,638] Trial 4 finished with value: 0.4483575001358986 and parameters: {'learning_rate': 0.0065601160997320156, 'l1_coef': 0.024457224100551952, 'patience': 16, 'batch_size': 128}. Best is trial 5 with value: 0.44796993902751375.
[I 2024-04-29 18:42:30,109] Trial 2 finished with value: 0.4488718816212246 and parameters: {'learning_rate': 0.05130635773065441, 'l1_coef': 0.04534621349647284, 'patience'

Chr 12 - Best hyperparameters: {'learning_rate': 0.013368341073967381, 'l1_coef': 1.0168876770412278e-05, 'patience': 9, 'batch_size': 128}
Chr 12 - Best value: 0.0591
Epoch [1/500], Train Loss: 0.5554
Epoch [2/500], Train Loss: 0.3039
Epoch [3/500], Train Loss: 0.2260
Epoch [4/500], Train Loss: 0.1852
Epoch [5/500], Train Loss: 0.1623
Epoch [6/500], Train Loss: 0.1472
Epoch [7/500], Train Loss: 0.1344
Epoch [8/500], Train Loss: 0.1245
Epoch [9/500], Train Loss: 0.1160
Epoch [10/500], Train Loss: 0.1085
Epoch [11/500], Train Loss: 0.1033
Epoch [12/500], Train Loss: 0.0993
Epoch [13/500], Train Loss: 0.0945
Epoch [14/500], Train Loss: 0.0905
Epoch [15/500], Train Loss: 0.0873
Epoch [16/500], Train Loss: 0.0840
Epoch [17/500], Train Loss: 0.0807
Epoch [18/500], Train Loss: 0.0785
Epoch [19/500], Train Loss: 0.0761
Epoch [20/500], Train Loss: 0.0739
Epoch [21/500], Train Loss: 0.0724
Epoch [22/500], Train Loss: 0.0710
Epoch [23/500], Train Loss: 0.0701
Epoch [24/500], Train Loss: 0.0679
E

[I 2024-04-29 18:54:13,743] A new study created in RDB with name: chr13_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  8
Known PRS313 SNPs:  2
23AndMe SNPs with LD to Unknown PRS313 SNPs:  210
Total SNPs used for Training:  212


[I 2024-04-29 18:54:37,426] Trial 6 finished with value: 0.1486952190215771 and parameters: {'learning_rate': 0.09263716433749118, 'l1_coef': 3.089659182141569e-05, 'patience': 5, 'batch_size': 32}. Best is trial 6 with value: 0.1486952190215771.
[I 2024-04-29 18:55:00,961] Trial 2 finished with value: 0.42754327058792113 and parameters: {'learning_rate': 0.04150924742347782, 'l1_coef': 0.015372443584776031, 'patience': 6, 'batch_size': 256}. Best is trial 6 with value: 0.1486952190215771.
[I 2024-04-29 18:55:48,327] Trial 5 finished with value: 0.4001734733581543 and parameters: {'learning_rate': 0.005602566443606198, 'l1_coef': 0.02790086075210445, 'patience': 10, 'batch_size': 64}. Best is trial 6 with value: 0.1486952190215771.
[I 2024-04-29 18:56:04,300] Trial 4 finished with value: 0.1912694309766476 and parameters: {'learning_rate': 0.009675183329935676, 'l1_coef': 0.0006248820067672184, 'patience': 7, 'batch_size': 32}. Best is trial 6 with value: 0.1486952190215771.
[I 2024-04

Chr 13 - Best hyperparameters: {'learning_rate': 0.030234974894438254, 'l1_coef': 1.0118528459470791e-05, 'patience': 11, 'batch_size': 64}
Chr 13 - Best value: 0.1120
Epoch [1/500], Train Loss: 0.3510
Epoch [2/500], Train Loss: 0.2215
Epoch [3/500], Train Loss: 0.1848
Epoch [4/500], Train Loss: 0.1673
Epoch [5/500], Train Loss: 0.1544
Epoch [6/500], Train Loss: 0.1453
Epoch [7/500], Train Loss: 0.1371
Epoch [8/500], Train Loss: 0.1329
Epoch [9/500], Train Loss: 0.1281
Epoch [10/500], Train Loss: 0.1286
Epoch [11/500], Train Loss: 0.1251
Epoch [12/500], Train Loss: 0.1238
Epoch [13/500], Train Loss: 0.1198
Epoch [14/500], Train Loss: 0.1194
Epoch [15/500], Train Loss: 0.1175
Epoch [16/500], Train Loss: 0.1155
Epoch [17/500], Train Loss: 0.1175
Epoch [18/500], Train Loss: 0.1137
Epoch [19/500], Train Loss: 0.1148
Epoch [20/500], Train Loss: 0.1167
Epoch [21/500], Train Loss: 0.1157
Epoch [22/500], Train Loss: 0.1114
Epoch [23/500], Train Loss: 0.1133
Epoch [24/500], Train Loss: 0.1123
E

[I 2024-04-29 19:04:29,044] A new study created in RDB with name: chr14_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  8
Known PRS313 SNPs:  8
23AndMe SNPs with LD to Unknown PRS313 SNPs:  312
Total SNPs used for Training:  320


[I 2024-04-29 19:05:20,192] Trial 5 finished with value: 0.09430188345057625 and parameters: {'learning_rate': 0.03309263668434581, 'l1_coef': 5.417479221911823e-05, 'patience': 11, 'batch_size': 64}. Best is trial 5 with value: 0.09430188345057625.
[I 2024-04-29 19:05:43,606] Trial 4 finished with value: 0.09585341326892376 and parameters: {'learning_rate': 0.004456766601075676, 'l1_coef': 1.5001014940584e-05, 'patience': 15, 'batch_size': 128}. Best is trial 5 with value: 0.09430188345057625.
[I 2024-04-29 19:06:06,865] Trial 7 finished with value: 0.11026291652367666 and parameters: {'learning_rate': 0.0018564886307087314, 'l1_coef': 0.00011413880094708647, 'patience': 8, 'batch_size': 32}. Best is trial 5 with value: 0.09430188345057625.
[I 2024-04-29 19:06:08,468] Trial 1 finished with value: 0.298729008436203 and parameters: {'learning_rate': 0.005442104572157073, 'l1_coef': 0.0024768101906878235, 'patience': 10, 'batch_size': 64}. Best is trial 5 with value: 0.09430188345057625.

Chr 14 - Best hyperparameters: {'learning_rate': 0.04900486554871739, 'l1_coef': 1.160024455652371e-05, 'patience': 20, 'batch_size': 64}
Chr 14 - Best value: 0.0734
Epoch [1/500], Train Loss: 0.4548
Epoch [2/500], Train Loss: 0.1747
Epoch [3/500], Train Loss: 0.1319
Epoch [4/500], Train Loss: 0.1136
Epoch [5/500], Train Loss: 0.1084
Epoch [6/500], Train Loss: 0.1006
Epoch [7/500], Train Loss: 0.0992
Epoch [8/500], Train Loss: 0.0911
Epoch [9/500], Train Loss: 0.0863
Epoch [10/500], Train Loss: 0.0780
Epoch [11/500], Train Loss: 0.0761
Epoch [12/500], Train Loss: 0.0750
Epoch [13/500], Train Loss: 0.0716
Epoch [14/500], Train Loss: 0.0737
Epoch [15/500], Train Loss: 0.0711
Epoch [16/500], Train Loss: 0.0687
Epoch [17/500], Train Loss: 0.0691
Epoch [18/500], Train Loss: 0.0675
Epoch [19/500], Train Loss: 0.0661
Epoch [20/500], Train Loss: 0.0638
Epoch [21/500], Train Loss: 0.0671
Epoch [22/500], Train Loss: 0.0628
Epoch [23/500], Train Loss: 0.0607
Epoch [24/500], Train Loss: 0.0632
Epo

[I 2024-04-29 19:14:46,761] A new study created in RDB with name: chr15_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  10
Known PRS313 SNPs:  4
23AndMe SNPs with LD to Unknown PRS313 SNPs:  380
Total SNPs used for Training:  384


[I 2024-04-29 19:15:55,686] Trial 5 finished with value: 0.4349869183131627 and parameters: {'learning_rate': 0.05575593969568602, 'l1_coef': 0.009221470770799675, 'patience': 10, 'batch_size': 64}. Best is trial 5 with value: 0.4349869183131627.
[I 2024-04-29 19:16:15,413] Trial 0 finished with value: 0.41417612944330484 and parameters: {'learning_rate': 0.07041363904302442, 'l1_coef': 0.007610379973855361, 'patience': 16, 'batch_size': 64}. Best is trial 0 with value: 0.41417612944330484.
[I 2024-04-29 19:16:39,779] Trial 2 finished with value: 0.09571805332715695 and parameters: {'learning_rate': 0.002803754974969709, 'l1_coef': 7.612007805032404e-05, 'patience': 8, 'batch_size': 32}. Best is trial 2 with value: 0.09571805332715695.
[I 2024-04-29 19:17:02,388] Trial 8 finished with value: 0.1758909970521927 and parameters: {'learning_rate': 0.017839079072242126, 'l1_coef': 0.0007128267633340507, 'patience': 11, 'batch_size': 32}. Best is trial 2 with value: 0.09571805332715695.
[I 2

Chr 15 - Best hyperparameters: {'learning_rate': 0.00031046985754389273, 'l1_coef': 1.7067190785696914e-05, 'patience': 5, 'batch_size': 32}
Chr 15 - Best value: 0.0729
Epoch [1/500], Train Loss: 0.5543
Epoch [2/500], Train Loss: 0.4629
Epoch [3/500], Train Loss: 0.4338
Epoch [4/500], Train Loss: 0.4134
Epoch [5/500], Train Loss: 0.3959
Epoch [6/500], Train Loss: 0.3817
Epoch [7/500], Train Loss: 0.3687
Epoch [8/500], Train Loss: 0.3563
Epoch [9/500], Train Loss: 0.3458
Epoch [10/500], Train Loss: 0.3358
Epoch [11/500], Train Loss: 0.3260
Epoch [12/500], Train Loss: 0.3173
Epoch [13/500], Train Loss: 0.3085
Epoch [14/500], Train Loss: 0.3005
Epoch [15/500], Train Loss: 0.2933
Epoch [16/500], Train Loss: 0.2867
Epoch [17/500], Train Loss: 0.2803
Epoch [18/500], Train Loss: 0.2738
Epoch [19/500], Train Loss: 0.2679
Epoch [20/500], Train Loss: 0.2621
Epoch [21/500], Train Loss: 0.2567
Epoch [22/500], Train Loss: 0.2515
Epoch [23/500], Train Loss: 0.2469
Epoch [24/500], Train Loss: 0.2424


[I 2024-04-29 19:31:03,669] A new study created in RDB with name: chr16_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  22
Known PRS313 SNPs:  6
23AndMe SNPs with LD to Unknown PRS313 SNPs:  684
Total SNPs used for Training:  690


[I 2024-04-29 19:32:06,262] Trial 3 finished with value: 0.4193107753992081 and parameters: {'learning_rate': 0.01308536146198817, 'l1_coef': 0.0035502151983793874, 'patience': 7, 'batch_size': 256}. Best is trial 3 with value: 0.4193107753992081.
[I 2024-04-29 19:32:17,605] Trial 4 finished with value: 0.4690867021679878 and parameters: {'learning_rate': 0.00404585431135319, 'l1_coef': 0.012058178007171006, 'patience': 7, 'batch_size': 128}. Best is trial 3 with value: 0.4193107753992081.
[I 2024-04-29 19:32:35,704] Trial 9 finished with value: 0.0910138359436622 and parameters: {'learning_rate': 0.0020616770987948213, 'l1_coef': 1.3005129000927816e-05, 'patience': 12, 'batch_size': 32}. Best is trial 9 with value: 0.0910138359436622.
[I 2024-04-29 19:32:54,319] Trial 5 finished with value: 0.4687295243144035 and parameters: {'learning_rate': 0.0022222263447522934, 'l1_coef': 0.030521662585361195, 'patience': 7, 'batch_size': 128}. Best is trial 9 with value: 0.0910138359436622.
[I 20

Chr 16 - Best hyperparameters: {'learning_rate': 0.0003280120292913132, 'l1_coef': 1.0215158769106902e-05, 'patience': 14, 'batch_size': 32}
Chr 16 - Best value: 0.0885
Epoch [1/500], Train Loss: 0.5113
Epoch [2/500], Train Loss: 0.4429
Epoch [3/500], Train Loss: 0.4234
Epoch [4/500], Train Loss: 0.4068
Epoch [5/500], Train Loss: 0.3918
Epoch [6/500], Train Loss: 0.3788
Epoch [7/500], Train Loss: 0.3664
Epoch [8/500], Train Loss: 0.3554
Epoch [9/500], Train Loss: 0.3450
Epoch [10/500], Train Loss: 0.3351
Epoch [11/500], Train Loss: 0.3259
Epoch [12/500], Train Loss: 0.3175
Epoch [13/500], Train Loss: 0.3093
Epoch [14/500], Train Loss: 0.3015
Epoch [15/500], Train Loss: 0.2944
Epoch [16/500], Train Loss: 0.2879
Epoch [17/500], Train Loss: 0.2811
Epoch [18/500], Train Loss: 0.2753
Epoch [19/500], Train Loss: 0.2696
Epoch [20/500], Train Loss: 0.2643
Epoch [21/500], Train Loss: 0.2586
Epoch [22/500], Train Loss: 0.2540
Epoch [23/500], Train Loss: 0.2493
Epoch [24/500], Train Loss: 0.2447


[I 2024-04-29 19:49:00,315] A new study created in RDB with name: chr17_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  14
Known PRS313 SNPs:  4
23AndMe SNPs with LD to Unknown PRS313 SNPs:  338
Total SNPs used for Training:  342


[I 2024-04-29 19:49:46,390] Trial 7 finished with value: 0.10023323318787983 and parameters: {'learning_rate': 0.009301793128644315, 'l1_coef': 0.00027987758277683145, 'patience': 6, 'batch_size': 64}. Best is trial 7 with value: 0.10023323318787983.
[I 2024-04-29 19:49:55,041] Trial 9 finished with value: 0.07243624201842716 and parameters: {'learning_rate': 0.03265620201609559, 'l1_coef': 0.00010993681014909236, 'patience': 9, 'batch_size': 64}. Best is trial 9 with value: 0.07243624201842716.
[I 2024-04-29 19:49:57,818] Trial 6 finished with value: 0.03745094425976277 and parameters: {'learning_rate': 0.03339398065686498, 'l1_coef': 1.2812549284855398e-05, 'patience': 16, 'batch_size': 64}. Best is trial 6 with value: 0.03745094425976277.
[I 2024-04-29 19:50:01,923] Trial 0 finished with value: 0.13237580209970473 and parameters: {'learning_rate': 0.06368970746430995, 'l1_coef': 0.0009671661438484299, 'patience': 8, 'batch_size': 128}. Best is trial 6 with value: 0.03745094425976277

Chr 17 - Best hyperparameters: {'learning_rate': 0.01875699494745296, 'l1_coef': 1.0107606908789018e-05, 'patience': 16, 'batch_size': 64}
Chr 17 - Best value: 0.0373
Epoch [1/500], Train Loss: 0.1989
Epoch [2/500], Train Loss: 0.0943
Epoch [3/500], Train Loss: 0.0715
Epoch [4/500], Train Loss: 0.0598
Epoch [5/500], Train Loss: 0.0530
Epoch [6/500], Train Loss: 0.0488
Epoch [7/500], Train Loss: 0.0453
Epoch [8/500], Train Loss: 0.0426
Epoch [9/500], Train Loss: 0.0408
Epoch [10/500], Train Loss: 0.0395
Epoch [11/500], Train Loss: 0.0384
Epoch [12/500], Train Loss: 0.0367
Epoch [13/500], Train Loss: 0.0357
Epoch [14/500], Train Loss: 0.0351
Epoch [15/500], Train Loss: 0.0341
Epoch [16/500], Train Loss: 0.0333
Epoch [17/500], Train Loss: 0.0333
Epoch [18/500], Train Loss: 0.0331
Epoch [19/500], Train Loss: 0.0328
Epoch [20/500], Train Loss: 0.0314
Epoch [21/500], Train Loss: 0.0310
Epoch [22/500], Train Loss: 0.0309
Epoch [23/500], Train Loss: 0.0303
Epoch [24/500], Train Loss: 0.0308
Ep

  iqs = (po - pc) / (1 - pc)


Individual R^2 scores saved at: ../../Data/model_results/logistic_regression/csv_files/chr17/individual_r2_scores_chr17.csv
Individual IQS scores saved at: ../../Data/model_results/logistic_regression/csv_files/chr17/individual_iqs_scores_chr17.csv




Skipping SNP chr17_40744470_G_A_PRS313_Unknown_maternal due to insufficient data
Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  14
Known PRS313 SNPs:  4
23AndMe SNPs with LD to Unknown PRS313 SNPs:  530
Total SNPs used for Training:  534


[I 2024-04-29 19:59:21,297] A new study created in RDB with name: chr18_study
[I 2024-04-29 20:00:21,467] Trial 7 finished with value: 0.12149006985127926 and parameters: {'learning_rate': 0.022083494763382873, 'l1_coef': 7.677008929427689e-05, 'patience': 7, 'batch_size': 128}. Best is trial 7 with value: 0.12149006985127926.
[I 2024-04-29 20:00:42,230] Trial 5 finished with value: 0.5792624422482082 and parameters: {'learning_rate': 0.007530846927999032, 'l1_coef': 0.09571298372232974, 'patience': 10, 'batch_size': 64}. Best is trial 7 with value: 0.12149006985127926.
[I 2024-04-29 20:00:46,892] Trial 2 finished with value: 0.5799273282289505 and parameters: {'learning_rate': 0.007428490956064944, 'l1_coef': 0.01972277369696378, 'patience': 20, 'batch_size': 128}. Best is trial 7 with value: 0.12149006985127926.
[I 2024-04-29 20:00:48,157] Trial 3 finished with value: 0.5789377093315125 and parameters: {'learning_rate': 0.03499297451550964, 'l1_coef': 0.07647830193101936, 'patience':

Chr 18 - Best hyperparameters: {'learning_rate': 0.06940442887866152, 'l1_coef': 2.1499468833674833e-05, 'patience': 17, 'batch_size': 32}
Chr 18 - Best value: 0.0839
Epoch [1/500], Train Loss: 0.4538
Epoch [2/500], Train Loss: 0.1771
Epoch [3/500], Train Loss: 0.1533
Epoch [4/500], Train Loss: 0.1456
Epoch [5/500], Train Loss: 0.1374
Epoch [6/500], Train Loss: 0.1348
Epoch [7/500], Train Loss: 0.1366
Epoch [8/500], Train Loss: 0.1360
Epoch [9/500], Train Loss: 0.1434
Epoch [10/500], Train Loss: 0.1477
Epoch [11/500], Train Loss: 0.1390
Epoch [12/500], Train Loss: 0.1398
Epoch [13/500], Train Loss: 0.1004
Epoch [14/500], Train Loss: 0.0880
Epoch [15/500], Train Loss: 0.0835
Epoch [16/500], Train Loss: 0.0807
Epoch [17/500], Train Loss: 0.0780
Epoch [18/500], Train Loss: 0.0767
Epoch [19/500], Train Loss: 0.0752
Epoch [20/500], Train Loss: 0.0741
Epoch [21/500], Train Loss: 0.0734
Epoch [22/500], Train Loss: 0.0728
Epoch [23/500], Train Loss: 0.0724
Epoch [24/500], Train Loss: 0.0714
Ep

[I 2024-04-29 20:13:00,089] A new study created in RDB with name: chr19_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  14
Known PRS313 SNPs:  0
23AndMe SNPs with LD to Unknown PRS313 SNPs:  546
Total SNPs used for Training:  546


[I 2024-04-29 20:13:41,623] Trial 5 finished with value: 0.056032163091003895 and parameters: {'learning_rate': 0.010365152464200503, 'l1_coef': 1.2520309440952282e-05, 'patience': 7, 'batch_size': 128}. Best is trial 5 with value: 0.056032163091003895.
[I 2024-04-29 20:14:00,204] Trial 3 finished with value: 0.5410230142729623 and parameters: {'learning_rate': 0.01374941928863871, 'l1_coef': 0.01671100000883803, 'patience': 7, 'batch_size': 64}. Best is trial 5 with value: 0.056032163091003895.
[I 2024-04-29 20:14:35,533] Trial 10 finished with value: 0.07156809195876121 and parameters: {'learning_rate': 0.04097569500985306, 'l1_coef': 4.060128772944876e-05, 'patience': 12, 'batch_size': 256}. Best is trial 5 with value: 0.056032163091003895.
[I 2024-04-29 20:15:02,355] Trial 9 finished with value: 0.14486465305089952 and parameters: {'learning_rate': 0.0063126544599693395, 'l1_coef': 0.00048926426455746, 'patience': 8, 'batch_size': 128}. Best is trial 5 with value: 0.056032163091003

Chr 19 - Best hyperparameters: {'learning_rate': 0.07098016523175216, 'l1_coef': 1.0298921090840162e-05, 'patience': 11, 'batch_size': 128}
Chr 19 - Best value: 0.0443
Epoch [1/500], Train Loss: 1.0180
Epoch [2/500], Train Loss: 0.3405
Epoch [3/500], Train Loss: 0.2416
Epoch [4/500], Train Loss: 0.2077
Epoch [5/500], Train Loss: 0.1901
Epoch [6/500], Train Loss: 0.1786
Epoch [7/500], Train Loss: 0.1713
Epoch [8/500], Train Loss: 0.1683
Epoch [9/500], Train Loss: 0.1588
Epoch [10/500], Train Loss: 0.1526
Epoch [11/500], Train Loss: 0.1479
Epoch [12/500], Train Loss: 0.1413
Epoch [13/500], Train Loss: 0.1376
Epoch [14/500], Train Loss: 0.1350
Epoch [15/500], Train Loss: 0.1255
Epoch [16/500], Train Loss: 0.1224
Epoch [17/500], Train Loss: 0.1186
Epoch [18/500], Train Loss: 0.1125
Epoch [19/500], Train Loss: 0.1099
Epoch [20/500], Train Loss: 0.1051
Epoch [21/500], Train Loss: 0.0975
Epoch [22/500], Train Loss: 0.0776
Epoch [23/500], Train Loss: 0.0611
Epoch [24/500], Train Loss: 0.0450
E

[I 2024-04-29 20:24:18,507] A new study created in RDB with name: chr20_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  6
Known PRS313 SNPs:  2
23AndMe SNPs with LD to Unknown PRS313 SNPs:  114
Total SNPs used for Training:  116


[I 2024-04-29 20:25:28,482] Trial 3 finished with value: 0.3367856392493615 and parameters: {'learning_rate': 0.006824558695075257, 'l1_coef': 0.09944667346778027, 'patience': 5, 'batch_size': 32}. Best is trial 3 with value: 0.3367856392493615.
[I 2024-04-29 20:25:32,170] Trial 0 finished with value: 0.3106903821229935 and parameters: {'learning_rate': 0.03240973392742578, 'l1_coef': 0.010631284966781798, 'patience': 14, 'batch_size': 256}. Best is trial 0 with value: 0.3106903821229935.
[I 2024-04-29 20:25:39,967] Trial 6 finished with value: 0.2673515021800995 and parameters: {'learning_rate': 0.02638680338298886, 'l1_coef': 0.0033481866648036327, 'patience': 13, 'batch_size': 256}. Best is trial 6 with value: 0.2673515021800995.
[I 2024-04-29 20:25:52,812] Trial 9 finished with value: 0.31321465969085693 and parameters: {'learning_rate': 0.06698621399554416, 'l1_coef': 0.09852802867893738, 'patience': 19, 'batch_size': 256}. Best is trial 6 with value: 0.2673515021800995.
[I 2024-0

Chr 20 - Best hyperparameters: {'learning_rate': 0.011727770284572503, 'l1_coef': 1.0588262176994207e-05, 'patience': 9, 'batch_size': 128}
Chr 20 - Best value: 0.1148
Epoch [1/500], Train Loss: 0.3698
Epoch [2/500], Train Loss: 0.2776
Epoch [3/500], Train Loss: 0.2524
Epoch [4/500], Train Loss: 0.2355
Epoch [5/500], Train Loss: 0.2198
Epoch [6/500], Train Loss: 0.2075
Epoch [7/500], Train Loss: 0.1965
Epoch [8/500], Train Loss: 0.1874
Epoch [9/500], Train Loss: 0.1809
Epoch [10/500], Train Loss: 0.1759
Epoch [11/500], Train Loss: 0.1692
Epoch [12/500], Train Loss: 0.1646
Epoch [13/500], Train Loss: 0.1602
Epoch [14/500], Train Loss: 0.1571
Epoch [15/500], Train Loss: 0.1536
Epoch [16/500], Train Loss: 0.1496
Epoch [17/500], Train Loss: 0.1475
Epoch [18/500], Train Loss: 0.1451
Epoch [19/500], Train Loss: 0.1437
Epoch [20/500], Train Loss: 0.1415
Epoch [21/500], Train Loss: 0.1397
Epoch [22/500], Train Loss: 0.1379
Epoch [23/500], Train Loss: 0.1372
Epoch [24/500], Train Loss: 0.1356
E

[I 2024-04-29 20:35:36,855] A new study created in RDB with name: chr21_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  6
Known PRS313 SNPs:  4
23AndMe SNPs with LD to Unknown PRS313 SNPs:  78
Total SNPs used for Training:  82


[I 2024-04-29 20:36:32,538] Trial 1 finished with value: 0.07891834434121847 and parameters: {'learning_rate': 0.0324643186931176, 'l1_coef': 0.00026455472499356054, 'patience': 12, 'batch_size': 128}. Best is trial 1 with value: 0.07891834434121847.
[I 2024-04-29 20:36:49,522] Trial 0 finished with value: 0.08611242243876824 and parameters: {'learning_rate': 0.015971993834067895, 'l1_coef': 0.0004398579217543572, 'patience': 8, 'batch_size': 32}. Best is trial 1 with value: 0.07891834434121847.
[I 2024-04-29 20:37:13,780] Trial 5 finished with value: 0.13180186823010445 and parameters: {'learning_rate': 0.012846776509940115, 'l1_coef': 0.0011413456629516359, 'patience': 17, 'batch_size': 128}. Best is trial 1 with value: 0.07891834434121847.
[I 2024-04-29 20:37:46,406] Trial 7 finished with value: 0.2226619057930433 and parameters: {'learning_rate': 0.03928548980286149, 'l1_coef': 0.003989013564118772, 'patience': 20, 'batch_size': 32}. Best is trial 1 with value: 0.07891834434121847.

Chr 21 - Best hyperparameters: {'learning_rate': 0.07634654140233994, 'l1_coef': 1.331717466479133e-05, 'patience': 19, 'batch_size': 64}
Chr 21 - Best value: 0.0401
Epoch [1/500], Train Loss: 0.2650
Epoch [2/500], Train Loss: 0.0955
Epoch [3/500], Train Loss: 0.0732
Epoch [4/500], Train Loss: 0.0609
Epoch [5/500], Train Loss: 0.0566
Epoch [6/500], Train Loss: 0.0523
Epoch [7/500], Train Loss: 0.0503
Epoch [8/500], Train Loss: 0.0464
Epoch [9/500], Train Loss: 0.0467
Epoch [10/500], Train Loss: 0.0460
Epoch [11/500], Train Loss: 0.0442
Epoch [12/500], Train Loss: 0.0433
Epoch [13/500], Train Loss: 0.0424
Epoch [14/500], Train Loss: 0.0422
Epoch [15/500], Train Loss: 0.0420
Epoch [16/500], Train Loss: 0.0423
Epoch [17/500], Train Loss: 0.0421
Epoch [18/500], Train Loss: 0.0405
Epoch [19/500], Train Loss: 0.0393
Epoch [20/500], Train Loss: 0.0397
Epoch [21/500], Train Loss: 0.0403
Epoch [22/500], Train Loss: 0.0420
Epoch [23/500], Train Loss: 0.0398
Epoch [24/500], Train Loss: 0.0404
Epo

[I 2024-04-29 20:49:53,613] A new study created in RDB with name: chr22_study


Individual AUC ROC curves saved in: ../../Data/model_results/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results/logistic_regression/csv_files/performance_metrics.csv
Unknown PRS313 SNPs:  16
Known PRS313 SNPs:  6
23AndMe SNPs with LD to Unknown PRS313 SNPs:  772
Total SNPs used for Training:  778


[I 2024-04-29 20:51:35,386] Trial 6 finished with value: 0.3216399495418255 and parameters: {'learning_rate': 0.0036843250580409466, 'l1_coef': 0.004277796589249102, 'patience': 10, 'batch_size': 32}. Best is trial 6 with value: 0.3216399495418255.
[I 2024-04-29 20:51:50,377] Trial 2 finished with value: 0.36244591176509855 and parameters: {'learning_rate': 0.021896072433815455, 'l1_coef': 0.09018192881124869, 'patience': 18, 'batch_size': 256}. Best is trial 6 with value: 0.3216399495418255.
[I 2024-04-29 20:52:49,264] Trial 3 finished with value: 0.23542637079954148 and parameters: {'learning_rate': 0.002061411685453651, 'l1_coef': 0.001157162313014243, 'patience': 7, 'batch_size': 256}. Best is trial 3 with value: 0.23542637079954148.
[I 2024-04-29 20:52:54,122] Trial 1 finished with value: 0.08027062576550704 and parameters: {'learning_rate': 0.0597263960351438, 'l1_coef': 2.5664944761999238e-05, 'patience': 13, 'batch_size': 32}. Best is trial 1 with value: 0.08027062576550704.
[I

Chr 22 - Best hyperparameters: {'learning_rate': 0.02943043957881216, 'l1_coef': 1.0232605917875621e-05, 'patience': 12, 'batch_size': 32}
Chr 22 - Best value: 0.0679
Epoch [1/500], Train Loss: 0.3801
Epoch [2/500], Train Loss: 0.1965
Epoch [3/500], Train Loss: 0.1698
Epoch [4/500], Train Loss: 0.1514
Epoch [5/500], Train Loss: 0.1297
Epoch [6/500], Train Loss: 0.0986
Epoch [7/500], Train Loss: 0.0921
Epoch [8/500], Train Loss: 0.0858
Epoch [9/500], Train Loss: 0.0784
Epoch [10/500], Train Loss: 0.0793
Epoch [11/500], Train Loss: 0.0795
Epoch [12/500], Train Loss: 0.0724
Epoch [13/500], Train Loss: 0.0716
Epoch [14/500], Train Loss: 0.0735
Epoch [15/500], Train Loss: 0.0705
Epoch [16/500], Train Loss: 0.0696
Epoch [17/500], Train Loss: 0.0672
Epoch [18/500], Train Loss: 0.0664
Epoch [19/500], Train Loss: 0.0628
Epoch [20/500], Train Loss: 0.0620
Epoch [21/500], Train Loss: 0.0613
Epoch [22/500], Train Loss: 0.0617
Epoch [23/500], Train Loss: 0.0631
Epoch [24/500], Train Loss: 0.0694
Ep

<Figure size 640x480 with 0 Axes>

In [None]:
import numpy as np

def calculate_iqs_unphased(true_genotypes, imputed_genotypes, threshold=0.5):
    """
    Calculate the Imputation Quality Score (IQS) for unphased genotypes.

    Args:
        true_genotypes (numpy.ndarray): 2D array of true genotypes, where each genotype is represented by values 0, 1, or 2.
        imputed_genotypes (numpy.ndarray): 2D array of imputed genotypes, where each genotype is represented by floating-point values between 0 and 2.
        threshold (float): Threshold for converting imputed probabilities to discrete genotypes (default: 0.5).

    Returns:
        float: Imputation Quality Score (IQS).
    """
    # Check if the shapes of true and imputed genotypes are the same
    if true_genotypes.shape != imputed_genotypes.shape:
        raise ValueError("Shape of true genotypes and imputed genotypes must be the same.")

    # Convert imputed probabilities to discrete genotypes based on the threshold
    imputed_discrete = np.round(imputed_genotypes).astype(int)

    # Create a contingency table
    contingency_table = np.zeros((3, 3), dtype=int)

    # Fill the contingency table
    for true_geno, imputed_geno in zip(true_genotypes, imputed_discrete):
        for true_allele, imputed_allele in zip(true_geno, imputed_geno):
            contingency_table[int(true_allele), int(imputed_allele)] += 1

    # Calculate the total number of alleles
    total_alleles = np.sum(contingency_table)

    # Calculate the observed agreement (Po)
    po = np.sum(np.diag(contingency_table)) / total_alleles

    # Calculate the expected agreement by chance (Pc)
    true_counts = np.sum(contingency_table, axis=1)
    imputed_counts = np.sum(contingency_table, axis=0)
    pc = np.sum(true_counts * imputed_counts) / (total_alleles ** 2)

    # Calculate the Imputation Quality Score (IQS)
    iqs = (po - pc) / (1 - pc)

    return iqs