# Logistic Regression Model Unphased

In [5]:
import numpy as np

def calculate_iqs_unphased(true_genotypes, imputed_genotypes):
    """
    Calculate the Imputation Quality Score (IQS) for ununphased genotypes.

    Args:
        true_genotypes (numpy.ndarray): 2D array of true genotypes, where each genotype is represented by values 0, 1, or 2.
        imputed_genotypes (numpy.ndarray): 2D array of imputed genotypes, where each genotype is represented by floating-point values between 0 and 2.
        threshold (float): Threshold for converting imputed probabilities to discrete genotypes (default: 0.5).

    Returns:
        float: Imputation Quality Score (IQS).
    """
    # Check if the shapes of true and imputed genotypes are the same
    if true_genotypes.shape != imputed_genotypes.shape:
        raise ValueError("Shape of true genotypes and imputed genotypes must be the same.")

    # Convert imputed probabilities to discrete genotypes based on the threshold
    imputed_discrete = np.round(imputed_genotypes).astype(int)

    # Create a contingency table
    contingency_table = np.zeros((3, 3), dtype=int)

    # Fill the contingency table
    for true_geno, imputed_geno in zip(true_genotypes, imputed_discrete):
        for true_allele, imputed_allele in zip(true_geno, imputed_geno):
            contingency_table[int(true_allele), int(imputed_allele)] += 1

    # Calculate the total number of alleles
    total_alleles = np.sum(contingency_table)

    # Calculate the observed agreement (Po)
    po = np.sum(np.diag(contingency_table)) / total_alleles

    # Calculate the expected agreement by chance (Pc)
    true_counts = np.sum(contingency_table, axis=1)
    imputed_counts = np.sum(contingency_table, axis=0)
    pc = np.sum(true_counts * imputed_counts) / (total_alleles ** 2)

    # Calculate the Imputation Quality Score (IQS)
    iqs = (po - pc) / (1 - pc)

    return iqs

In [7]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve
from sklearn.metrics import r2_score as sklearn_r2_score
import optuna
from matplotlib import pyplot as plt
import os
import csv

# Load and preprocess the data
data_directory = '../../Data/Filtered_unphased_training_data_union/'
start = 1

# Initialize lists to store the performance metrics for each chromosome
accuracies = []
precisions = []
recalls = []
false_positive_rates = []
auc_rocs = []
r2_scores = []
iqs_scores = []

# Create folders for saving files
output_folder = "../../Data/model_results_unphased_all_PRS/logistic_regression/"
model_folder = output_folder + "models_unphased/"
csv_folder = output_folder + "csv_files/"
curve_folder = output_folder + "roc_curves/"

os.makedirs(model_folder, exist_ok=True)
os.makedirs(csv_folder, exist_ok=True)
os.makedirs(curve_folder, exist_ok=True)

for chromosome_number in range(start, 23):
    # Create subfolders for the current chromosome
    chr_model_folder = model_folder + f"chr{chromosome_number}/"
    chr_csv_folder = csv_folder + f"chr{chromosome_number}/"
    chr_curve_folder = curve_folder + f"chr{chromosome_number}/"

    os.makedirs(chr_model_folder, exist_ok=True)
    os.makedirs(chr_csv_folder, exist_ok=True)
    os.makedirs(chr_curve_folder, exist_ok=True)

    file_name = data_directory + f"23AndMe_PRS313_merged_chr{chromosome_number}_matching_combined.parquet"
    data = pd.read_parquet(file_name)


    # Split the data into features and target
    X = torch.tensor(data.filter(regex='^(?!.*PRS)').values, dtype=torch.float32)
    y = torch.tensor(data.filter(regex='PRS').values, dtype=torch.float32)

    print("PRS313 SNPs: ", y.shape[1])
    print("Total SNPs used for Training: ", X.shape[1])
    print("Total number of data points: ", data.shape[1])


    # Split the data into train-validation and test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the logistic regression model with lasso regularization
    class LogisticRegression(nn.Module):
        def __init__(self, input_dim, output_dim, l1_coef=0.0):
            super(LogisticRegression, self).__init__()
            self.linear = nn.Linear(input_dim, output_dim)
            self.sigmoid = nn.Sigmoid()
            self.l1_coef = l1_coef

        def forward(self, x):
            out = self.linear(x)
            out = self.sigmoid(out)
            return out

        def l1_loss(self):
            return self.l1_coef * torch.norm(self.linear.weight, p=1)
        
    # Set the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Set the hyperparameters for tuning
    input_dim = X_train_val.shape[1]
    output_dim = y_train_val.shape[1]
    num_epochs = 500
    batch_size = 128

    # Define the objective function for Optuna with cross-validation and early stopping
    def objective(trial):
        learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
        l1_coef = trial.suggest_float('l1_coef', 1e-5, 1e-1, log=True)
        patience = trial.suggest_int('patience', 5, 20)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

        model = LogisticRegression(input_dim, output_dim, l1_coef).to(device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.BCELoss()
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=False)

        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        fold_losses = []

        for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_val, y_train_val.argmax(dim=1))):
            X_train, X_val = X_train_val[train_idx], X_train_val[val_idx]
            y_train, y_val = y_train_val[train_idx], y_train_val[val_idx]

            train_dataset = TensorDataset(X_train, y_train)
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

            best_val_loss = float('inf')
            counter = 0

            for epoch in range(num_epochs):
                train_loss = 0.0
                for batch_X, batch_y in train_loader:
                    batch_X, batch_y = batch_X.to(device), batch_y.to(device)

                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y) + model.l1_loss()

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    train_loss += loss.item()

                train_loss /= len(train_loader)

                val_dataset = TensorDataset(X_val, y_val)
                val_loader = DataLoader(val_dataset, batch_size=batch_size)

                with torch.no_grad():
                    val_loss = 0.0
                    for batch_X, batch_y in val_loader:
                        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                        outputs = model(batch_X)
                        loss = criterion(outputs, batch_y) + model.l1_loss()
                        val_loss += loss.item()

                    val_loss /= len(val_loader)
                    scheduler.step(val_loss)

                    if val_loss < best_val_loss:
                        best_val_loss = val_loss
                        counter = 0
                    else:
                        counter += 1

                    if counter >= patience:
                        # print(f"Early stopping at epoch {epoch+1}")
                        break

            fold_losses.append(best_val_loss)

        return np.mean(fold_losses)

    # Create the "optuna_studies" folder if it doesn't exist
    os.makedirs("optuna_studies", exist_ok=True)

    # Create an Optuna study and optimize the hyperparameters
    study_name = f"unphased_full_23andMe_chr{chromosome_number}_study"
    storage_name = f"sqlite:///optuna_studies/{study_name}.db"

    # Check if the study exists

    current_dir = os.getcwd()
    study_exists = os.path.exists(current_dir + f"/optuna_studies/{study_name}.db")
    
    if study_exists:
        # Load the existing study
        study = optuna.load_study(study_name=study_name, storage=storage_name)
    else:
        # Create a new study
        study = optuna.create_study(direction='minimize', study_name=study_name, storage=storage_name)

    study.optimize(objective, n_trials=50, n_jobs=-1)

    # Print the best hyperparameters and best value
    print(f"Chr {chromosome_number} - Best hyperparameters: {study.best_params}")
    print(f"Chr {chromosome_number} - Best value: {study.best_value:.4f}")

    # Train the final model with the best hyperparameters and early stopping
    best_learning_rate = study.best_params['learning_rate']
    best_l1_coef = study.best_params['l1_coef']
    best_patience = study.best_params['patience']
    best_batch_size = study.best_params['batch_size']

    model = LogisticRegression(input_dim, output_dim, best_l1_coef).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_learning_rate)
    criterion = nn.BCELoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=False)

    train_dataset = TensorDataset(X_train_val, y_train_val)
    train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)

    best_train_loss = float('inf')
    counter = 0

    for epoch in range(num_epochs):
        train_loss = 0.0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            outputs = model(batch_X)
            loss = criterion(outputs, batch_y) + model.l1_loss()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}")

        if train_loss < best_train_loss:
            best_train_loss = train_loss
            counter = 0
        else:
            counter += 1

        if counter >= best_patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        scheduler.step(train_loss)

    # Save the final model
    model_save_path = chr_model_folder + f'final_model_chr{chromosome_number}.pth'
    torch.save(model.state_dict(), model_save_path)
    print(f"Final model saved at: {model_save_path}")

    # Evaluate the final model on the test set
    with torch.no_grad():
        test_outputs = model(X_test.to(device))
        test_preds = (test_outputs > 0.5).float()
        test_accuracy = float(((test_preds > 0.5) == y_test).float().mean())
        test_precision = precision_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_recall = recall_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_f1 = f1_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_roc_auc = roc_auc_score(y_test.cpu().numpy(), test_outputs.cpu().numpy(), average='micro')
        test_r2 = sklearn_r2_score(y_test.cpu().numpy(), test_outputs.cpu().numpy())
        test_iqs = calculate_iqs_unphased(y_test.cpu().numpy(), test_outputs.cpu().numpy())

        # Calculate false positive rate
        cm = confusion_matrix(y_test.cpu().numpy().ravel(), test_preds.cpu().numpy().ravel())
        tn, fp, fn, tp = cm.ravel()
        test_fpr = fp / (fp + tn)

        # Append performance metrics to the lists
        accuracies.append(test_accuracy)
        precisions.append(test_precision)
        recalls.append(test_recall)
        false_positive_rates.append(test_fpr)
        auc_rocs.append(test_roc_auc)
        r2_scores.append(test_r2)
        iqs_scores.append(test_iqs)

        # Calculate individual R^2 scores for each SNP
        individual_r2_scores = sklearn_r2_score(y_test.cpu().numpy(), test_outputs.cpu().numpy(), multioutput='raw_values')

        # Calculate individual IQS scores for each SNP
        individual_iqs_scores = np.array([calculate_iqs_unphased(y_test.cpu().numpy()[:, i].reshape(-1, 1), test_outputs.cpu().numpy()[:, i].reshape(-1, 1)) for i in range(y_test.shape[1])])

        # Get the names of the SNPs from the original dataframe
        snp_names = data.filter(regex='Unknown').columns

        # Save individual R^2 scores to a CSV file
        csv_file = chr_csv_folder + f'individual_r2_scores_chr{chromosome_number}.csv'

        with open(csv_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['SNP', 'R2 Score'])
            for snp, r2_score in zip(snp_names, individual_r2_scores):
                writer.writerow([snp, r2_score])

        print(f"Individual R^2 scores saved at: {csv_file}")

        # Save individual IQS scores to a CSV file
        iqs_csv_file = chr_csv_folder + f'individual_iqs_scores_chr{chromosome_number}.csv'

        with open(iqs_csv_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['SNP', 'IQS Score'])
            for snp, iqs_score in zip(snp_names, individual_iqs_scores):
                writer.writerow([snp, iqs_score])

        print(f"Individual IQS scores saved at: {iqs_csv_file}")

        # Save individual AUC ROC curves for each SNP
        for i, snp in enumerate(snp_names):
            try: 
                fpr, tpr, _ = roc_curve(y_test.cpu().numpy()[:, i], test_outputs.cpu().numpy()[:, i])
                plt.figure()
                plt.plot(fpr, tpr, label=f'AUC ROC = {roc_auc_score(y_test.cpu().numpy()[:, i], test_outputs.cpu().numpy()[:, i]):.4f}')
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title(f'AUC ROC Curve - {snp}')
                plt.legend()
                
                curve_file = chr_curve_folder + f'auc_roc_curve_{snp}_chr{chromosome_number}.png'
                plt.savefig(curve_file)
                plt.close()
            except ValueError:
                # Save a placeholder image if there is insufficient data
                plt.figure()
                plt.axis('off')
                plt.text(0.5, 0.5, "Insufficient data for ROC curve", ha='center', va='center')
                curve_file = chr_curve_folder + f'auc_roc_curve_{snp}_chr{chromosome_number}.png'
                plt.savefig(curve_file)
                plt.close()

                print(f"Skipping SNP {snp} due to insufficient data")


        print(f"Individual AUC ROC curves saved in: {curve_folder}")

        # Create a DataFrame to store the performance metrics for each chromosome
        performance_df = pd.DataFrame({
            'Chromosome': list(range(start, chromosome_number + 1)),
            'Accuracy': accuracies,
            'Precision': precisions,
            'Recall': recalls,
            'False Positive Rate': false_positive_rates,
            'AUC ROC': auc_rocs,
            'R2 Score': r2_scores,
            'IQS Score': iqs_scores
        })

        # Save the performance metrics to a CSV file
        performance_csv_file = csv_folder + 'performance_metrics.csv'
        performance_df.to_csv(performance_csv_file, index=False)
        print(f"Performance metrics saved at: {performance_csv_file}")

PRS313 SNPs:  30
Total SNPs used for Training:  899
Total number of data points:  929


[I 2024-05-01 22:35:44,271] Trial 31 finished with value: 0.23869766294956202 and parameters: {'learning_rate': 0.00046618282217675543, 'l1_coef': 3.396736296361112e-05, 'patience': 18, 'batch_size': 32}. Best is trial 23 with value: 0.21533680076782521.
[I 2024-05-01 22:35:50,665] Trial 29 finished with value: 0.2430205228237005 and parameters: {'learning_rate': 0.0004732787587089775, 'l1_coef': 4.0839782147331704e-05, 'patience': 18, 'batch_size': 32}. Best is trial 23 with value: 0.21533680076782521.
[I 2024-05-01 22:35:57,363] Trial 30 finished with value: 0.23566499604628635 and parameters: {'learning_rate': 0.0004751003952521202, 'l1_coef': 3.3434276272614254e-05, 'patience': 18, 'batch_size': 32}. Best is trial 23 with value: 0.21533680076782521.
[I 2024-05-01 22:36:00,420] Trial 26 finished with value: 0.24412241807350746 and parameters: {'learning_rate': 0.00041979787968619473, 'l1_coef': 3.706922865854204e-05, 'patience': 18, 'batch_size': 32}. Best is trial 23 with value: 0.

Chr 1 - Best hyperparameters: {'learning_rate': 0.0002735965461351243, 'l1_coef': 1.1907411265535051e-05, 'patience': 11, 'batch_size': 32}
Chr 1 - Best value: 0.2153
Epoch [1/500], Train Loss: 0.5450
Epoch [2/500], Train Loss: 0.4828
Epoch [3/500], Train Loss: 0.4648
Epoch [4/500], Train Loss: 0.4520
Epoch [5/500], Train Loss: 0.4414
Epoch [6/500], Train Loss: 0.4323
Epoch [7/500], Train Loss: 0.4240
Epoch [8/500], Train Loss: 0.4167
Epoch [9/500], Train Loss: 0.4094
Epoch [10/500], Train Loss: 0.4025
Epoch [11/500], Train Loss: 0.3960
Epoch [12/500], Train Loss: 0.3902
Epoch [13/500], Train Loss: 0.3842
Epoch [14/500], Train Loss: 0.3789
Epoch [15/500], Train Loss: 0.3738
Epoch [16/500], Train Loss: 0.3690
Epoch [17/500], Train Loss: 0.3640
Epoch [18/500], Train Loss: 0.3597
Epoch [19/500], Train Loss: 0.3554
Epoch [20/500], Train Loss: 0.3510
Epoch [21/500], Train Loss: 0.3472
Epoch [22/500], Train Loss: 0.3433
Epoch [23/500], Train Loss: 0.3397
Epoch [24/500], Train Loss: 0.3362
Ep

[I 2024-05-01 22:47:47,824] A new study created in RDB with name: unphased_full_23andMe_chr2_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  21
Total SNPs used for Training:  608
Total number of data points:  629


[I 2024-05-01 22:48:11,941] Trial 6 finished with value: 0.5903141796588898 and parameters: {'learning_rate': 0.07058893891218238, 'l1_coef': 8.63212948781524e-05, 'patience': 7, 'batch_size': 256}. Best is trial 6 with value: 0.5903141796588898.
[I 2024-05-01 22:48:20,491] Trial 9 finished with value: 0.2697315735476358 and parameters: {'learning_rate': 0.043099580404332975, 'l1_coef': 0.00038838215072498577, 'patience': 5, 'batch_size': 64}. Best is trial 9 with value: 0.2697315735476358.
[I 2024-05-01 22:48:50,207] Trial 2 finished with value: 0.48126552999019623 and parameters: {'learning_rate': 0.05880969160056573, 'l1_coef': 0.014396346870948757, 'patience': 14, 'batch_size': 256}. Best is trial 9 with value: 0.2697315735476358.
[I 2024-05-01 22:48:58,049] Trial 4 finished with value: 0.489489971101284 and parameters: {'learning_rate': 0.0039833370511761605, 'l1_coef': 0.05735786386396264, 'patience': 8, 'batch_size': 128}. Best is trial 9 with value: 0.2697315735476358.
[I 2024-

Chr 2 - Best hyperparameters: {'learning_rate': 0.016950899718852348, 'l1_coef': 1.0411996344469413e-05, 'patience': 20, 'batch_size': 64}
Chr 2 - Best value: 0.1462
Epoch [1/500], Train Loss: 0.5204
Epoch [2/500], Train Loss: 0.3065
Epoch [3/500], Train Loss: 0.2603
Epoch [4/500], Train Loss: 0.2303
Epoch [5/500], Train Loss: 0.2160
Epoch [6/500], Train Loss: 0.2040
Epoch [7/500], Train Loss: 0.1951
Epoch [8/500], Train Loss: 0.1847
Epoch [9/500], Train Loss: 0.1790
Epoch [10/500], Train Loss: 0.1745
Epoch [11/500], Train Loss: 0.1705
Epoch [12/500], Train Loss: 0.1672
Epoch [13/500], Train Loss: 0.1682
Epoch [14/500], Train Loss: 0.1610
Epoch [15/500], Train Loss: 0.1570
Epoch [16/500], Train Loss: 0.1540
Epoch [17/500], Train Loss: 0.1526
Epoch [18/500], Train Loss: 0.1554
Epoch [19/500], Train Loss: 0.1511
Epoch [20/500], Train Loss: 0.1467
Epoch [21/500], Train Loss: 0.1498
Epoch [22/500], Train Loss: 0.1461
Epoch [23/500], Train Loss: 0.1439
Epoch [24/500], Train Loss: 0.1448
Epo

  iqs = (po - pc) / (1 - pc)


Skipping SNP chr2_217955896_GA_G_PRS313_Unknown_combined due to insufficient data
Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv


[I 2024-05-01 22:57:30,571] A new study created in RDB with name: unphased_full_23andMe_chr3_study


PRS313 SNPs:  16
Total SNPs used for Training:  947
Total number of data points:  963


[I 2024-05-01 22:57:55,403] Trial 5 finished with value: 0.20662130543163845 and parameters: {'learning_rate': 0.022237615450329613, 'l1_coef': 0.00017690166102499598, 'patience': 6, 'batch_size': 64}. Best is trial 5 with value: 0.20662130543163845.
[I 2024-05-01 22:58:11,792] Trial 6 finished with value: 0.4383753925561905 and parameters: {'learning_rate': 0.023777909548230114, 'l1_coef': 0.0034194364383762664, 'patience': 6, 'batch_size': 128}. Best is trial 5 with value: 0.20662130543163845.
[I 2024-05-01 22:59:02,435] Trial 10 finished with value: 0.15343958213925363 and parameters: {'learning_rate': 0.04114686181219268, 'l1_coef': 6.919700742297884e-05, 'patience': 16, 'batch_size': 128}. Best is trial 10 with value: 0.15343958213925363.
[I 2024-05-01 22:59:04,785] Trial 4 finished with value: 0.16739339679479598 and parameters: {'learning_rate': 0.02701173563298861, 'l1_coef': 0.00010226821583208718, 'patience': 20, 'batch_size': 256}. Best is trial 10 with value: 0.153439582139

Chr 3 - Best hyperparameters: {'learning_rate': 0.00018920791905328765, 'l1_coef': 1.516420570921819e-05, 'patience': 10, 'batch_size': 32}
Chr 3 - Best value: 0.1376
Epoch [1/500], Train Loss: 0.5633
Epoch [2/500], Train Loss: 0.4988
Epoch [3/500], Train Loss: 0.4751
Epoch [4/500], Train Loss: 0.4586
Epoch [5/500], Train Loss: 0.4453
Epoch [6/500], Train Loss: 0.4334
Epoch [7/500], Train Loss: 0.4225
Epoch [8/500], Train Loss: 0.4134
Epoch [9/500], Train Loss: 0.4042
Epoch [10/500], Train Loss: 0.3958
Epoch [11/500], Train Loss: 0.3877
Epoch [12/500], Train Loss: 0.3802
Epoch [13/500], Train Loss: 0.3731
Epoch [14/500], Train Loss: 0.3666
Epoch [15/500], Train Loss: 0.3597
Epoch [16/500], Train Loss: 0.3542
Epoch [17/500], Train Loss: 0.3482
Epoch [18/500], Train Loss: 0.3424
Epoch [19/500], Train Loss: 0.3373
Epoch [20/500], Train Loss: 0.3316
Epoch [21/500], Train Loss: 0.3269
Epoch [22/500], Train Loss: 0.3219
Epoch [23/500], Train Loss: 0.3174
Epoch [24/500], Train Loss: 0.3131
Ep

[I 2024-05-01 23:12:58,563] A new study created in RDB with name: unphased_full_23andMe_chr4_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  11
Total SNPs used for Training:  1250
Total number of data points:  1261


[I 2024-05-01 23:13:58,551] Trial 6 finished with value: 0.6014754712581635 and parameters: {'learning_rate': 0.007970713368009046, 'l1_coef': 0.016625634265583437, 'patience': 10, 'batch_size': 256}. Best is trial 6 with value: 0.6014754712581635.
[I 2024-05-01 23:14:02,655] Trial 0 finished with value: 0.17399817556142808 and parameters: {'learning_rate': 0.00722364277588652, 'l1_coef': 0.0001500657296059251, 'patience': 12, 'batch_size': 256}. Best is trial 0 with value: 0.17399817556142808.
[I 2024-05-01 23:14:08,383] Trial 2 finished with value: 0.47130376696586607 and parameters: {'learning_rate': 0.015351369765233472, 'l1_coef': 0.006559149720303834, 'patience': 16, 'batch_size': 128}. Best is trial 0 with value: 0.17399817556142808.
[I 2024-05-01 23:14:08,972] Trial 3 finished with value: 1.0062456863267082 and parameters: {'learning_rate': 0.055072247345776315, 'l1_coef': 0.00039306925219795454, 'patience': 11, 'batch_size': 64}. Best is trial 0 with value: 0.17399817556142808

Chr 4 - Best hyperparameters: {'learning_rate': 0.0017558824520097246, 'l1_coef': 3.2949370797177006e-05, 'patience': 14, 'batch_size': 32}
Chr 4 - Best value: 0.1063
Epoch [1/500], Train Loss: 0.5394
Epoch [2/500], Train Loss: 0.4349
Epoch [3/500], Train Loss: 0.3780
Epoch [4/500], Train Loss: 0.3372
Epoch [5/500], Train Loss: 0.3065
Epoch [6/500], Train Loss: 0.2818
Epoch [7/500], Train Loss: 0.2613
Epoch [8/500], Train Loss: 0.2467
Epoch [9/500], Train Loss: 0.2335
Epoch [10/500], Train Loss: 0.2226
Epoch [11/500], Train Loss: 0.2113
Epoch [12/500], Train Loss: 0.2025
Epoch [13/500], Train Loss: 0.1965
Epoch [14/500], Train Loss: 0.1892
Epoch [15/500], Train Loss: 0.1827
Epoch [16/500], Train Loss: 0.1795
Epoch [17/500], Train Loss: 0.1712
Epoch [18/500], Train Loss: 0.1664
Epoch [19/500], Train Loss: 0.1618
Epoch [20/500], Train Loss: 0.1585
Epoch [21/500], Train Loss: 0.1543
Epoch [22/500], Train Loss: 0.1507
Epoch [23/500], Train Loss: 0.1474
Epoch [24/500], Train Loss: 0.1448
Ep

[I 2024-05-01 23:24:30,780] A new study created in RDB with name: unphased_full_23andMe_chr5_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  34
Total SNPs used for Training:  1255
Total number of data points:  1289


[I 2024-05-01 23:24:59,157] Trial 1 finished with value: 17.857323455810548 and parameters: {'learning_rate': 0.09780523884281815, 'l1_coef': 1.2583043144615872e-05, 'patience': 7, 'batch_size': 256}. Best is trial 1 with value: 17.857323455810548.
[I 2024-05-01 23:24:59,516] Trial 6 finished with value: 16.12308692932129 and parameters: {'learning_rate': 0.08474361493847583, 'l1_coef': 1.187511127849756e-05, 'patience': 6, 'batch_size': 256}. Best is trial 6 with value: 16.12308692932129.
[I 2024-05-01 23:25:30,492] Trial 5 finished with value: 0.5623886687414987 and parameters: {'learning_rate': 0.007641588401460431, 'l1_coef': 0.0047119091694144905, 'patience': 7, 'batch_size': 64}. Best is trial 5 with value: 0.5623886687414987.
[I 2024-05-01 23:25:32,449] Trial 3 finished with value: 0.3296238467097282 and parameters: {'learning_rate': 0.08048656407252053, 'l1_coef': 0.000374342985938701, 'patience': 9, 'batch_size': 128}. Best is trial 3 with value: 0.3296238467097282.
[I 2024-05

Chr 5 - Best hyperparameters: {'learning_rate': 0.006175753624595355, 'l1_coef': 1.632282423878164e-05, 'patience': 17, 'batch_size': 256}
Chr 5 - Best value: 0.1959
Epoch [1/500], Train Loss: 0.6486
Epoch [2/500], Train Loss: 0.5205
Epoch [3/500], Train Loss: 0.4695
Epoch [4/500], Train Loss: 0.4319
Epoch [5/500], Train Loss: 0.4062
Epoch [6/500], Train Loss: 0.3843
Epoch [7/500], Train Loss: 0.3664
Epoch [8/500], Train Loss: 0.3511
Epoch [9/500], Train Loss: 0.3374
Epoch [10/500], Train Loss: 0.3255
Epoch [11/500], Train Loss: 0.3155
Epoch [12/500], Train Loss: 0.3067
Epoch [13/500], Train Loss: 0.2989
Epoch [14/500], Train Loss: 0.2902
Epoch [15/500], Train Loss: 0.2841
Epoch [16/500], Train Loss: 0.2779
Epoch [17/500], Train Loss: 0.2721
Epoch [18/500], Train Loss: 0.2664
Epoch [19/500], Train Loss: 0.2618
Epoch [20/500], Train Loss: 0.2567
Epoch [21/500], Train Loss: 0.2523
Epoch [22/500], Train Loss: 0.2487
Epoch [23/500], Train Loss: 0.2454
Epoch [24/500], Train Loss: 0.2418
Epo

[I 2024-05-01 23:36:23,489] A new study created in RDB with name: unphased_full_23andMe_chr6_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  20
Total SNPs used for Training:  619
Total number of data points:  639


[I 2024-05-01 23:37:02,512] Trial 7 finished with value: 0.6497740626335144 and parameters: {'learning_rate': 0.03298751206929459, 'l1_coef': 0.011192500686418371, 'patience': 6, 'batch_size': 128}. Best is trial 7 with value: 0.6497740626335144.
[I 2024-05-01 23:37:05,076] Trial 8 finished with value: 0.483114954829216 and parameters: {'learning_rate': 0.017525446229979005, 'l1_coef': 0.003478358275726014, 'patience': 5, 'batch_size': 256}. Best is trial 8 with value: 0.483114954829216.
[I 2024-05-01 23:37:43,005] Trial 2 finished with value: 0.22227719724178313 and parameters: {'learning_rate': 0.0017528804512805985, 'l1_coef': 0.00010287805778372903, 'patience': 6, 'batch_size': 128}. Best is trial 2 with value: 0.22227719724178313.
[I 2024-05-01 23:38:05,684] Trial 11 finished with value: 0.19437559843063354 and parameters: {'learning_rate': 0.003530703793682379, 'l1_coef': 3.121121722202165e-05, 'patience': 14, 'batch_size': 128}. Best is trial 11 with value: 0.19437559843063354.


Chr 6 - Best hyperparameters: {'learning_rate': 0.009355606222633771, 'l1_coef': 1.064624334740972e-05, 'patience': 10, 'batch_size': 64}
Chr 6 - Best value: 0.1559
Epoch [1/500], Train Loss: 0.4956
Epoch [2/500], Train Loss: 0.3395
Epoch [3/500], Train Loss: 0.2871
Epoch [4/500], Train Loss: 0.2576
Epoch [5/500], Train Loss: 0.2357
Epoch [6/500], Train Loss: 0.2220
Epoch [7/500], Train Loss: 0.2118
Epoch [8/500], Train Loss: 0.2010
Epoch [9/500], Train Loss: 0.1951
Epoch [10/500], Train Loss: 0.1877
Epoch [11/500], Train Loss: 0.1840
Epoch [12/500], Train Loss: 0.1803
Epoch [13/500], Train Loss: 0.1758
Epoch [14/500], Train Loss: 0.1712
Epoch [15/500], Train Loss: 0.1705
Epoch [16/500], Train Loss: 0.1672
Epoch [17/500], Train Loss: 0.1617
Epoch [18/500], Train Loss: 0.1615
Epoch [19/500], Train Loss: 0.1599
Epoch [20/500], Train Loss: 0.1586
Epoch [21/500], Train Loss: 0.1558
Epoch [22/500], Train Loss: 0.1538
Epoch [23/500], Train Loss: 0.1531
Epoch [24/500], Train Loss: 0.1506
Epoc

[I 2024-05-01 23:46:47,082] A new study created in RDB with name: unphased_full_23andMe_chr7_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  14
Total SNPs used for Training:  451
Total number of data points:  465


[I 2024-05-01 23:47:45,355] Trial 6 finished with value: 0.5852889929498944 and parameters: {'learning_rate': 0.0015328305217893487, 'l1_coef': 0.014883217275816325, 'patience': 5, 'batch_size': 64}. Best is trial 6 with value: 0.5852889929498944.
[I 2024-05-01 23:47:45,541] Trial 4 finished with value: 0.34082130193710325 and parameters: {'learning_rate': 0.03525410670458659, 'l1_coef': 0.0005831042549032662, 'patience': 7, 'batch_size': 256}. Best is trial 4 with value: 0.34082130193710325.
[I 2024-05-01 23:47:49,176] Trial 5 finished with value: 0.23308630841118952 and parameters: {'learning_rate': 0.035614348621650825, 'l1_coef': 2.5911919176472674e-05, 'patience': 15, 'batch_size': 64}. Best is trial 5 with value: 0.23308630841118952.
[I 2024-05-01 23:48:13,272] Trial 3 finished with value: 0.23323389612711393 and parameters: {'learning_rate': 0.0029142950408033047, 'l1_coef': 1.2907782471947815e-05, 'patience': 18, 'batch_size': 32}. Best is trial 5 with value: 0.2330863084111895

Chr 7 - Best hyperparameters: {'learning_rate': 0.012846110188472303, 'l1_coef': 1.0897159611229411e-05, 'patience': 10, 'batch_size': 64}
Chr 7 - Best value: 0.2221
Epoch [1/500], Train Loss: 0.5212
Epoch [2/500], Train Loss: 0.3948
Epoch [3/500], Train Loss: 0.3450
Epoch [4/500], Train Loss: 0.3183
Epoch [5/500], Train Loss: 0.2956
Epoch [6/500], Train Loss: 0.2825
Epoch [7/500], Train Loss: 0.2720
Epoch [8/500], Train Loss: 0.2647
Epoch [9/500], Train Loss: 0.2581
Epoch [10/500], Train Loss: 0.2523
Epoch [11/500], Train Loss: 0.2485
Epoch [12/500], Train Loss: 0.2447
Epoch [13/500], Train Loss: 0.2398
Epoch [14/500], Train Loss: 0.2393
Epoch [15/500], Train Loss: 0.2377
Epoch [16/500], Train Loss: 0.2334
Epoch [17/500], Train Loss: 0.2304
Epoch [18/500], Train Loss: 0.2272
Epoch [19/500], Train Loss: 0.2267
Epoch [20/500], Train Loss: 0.2241
Epoch [21/500], Train Loss: 0.2226
Epoch [22/500], Train Loss: 0.2237
Epoch [23/500], Train Loss: 0.2196
Epoch [24/500], Train Loss: 0.2206
Epo

[I 2024-05-01 23:55:28,798] A new study created in RDB with name: unphased_full_23andMe_chr8_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  21
Total SNPs used for Training:  433
Total number of data points:  454


[I 2024-05-01 23:56:19,961] Trial 5 finished with value: 0.6256189346313477 and parameters: {'learning_rate': 0.010973024291466764, 'l1_coef': 0.035586432785081334, 'patience': 5, 'batch_size': 256}. Best is trial 5 with value: 0.6256189346313477.
[I 2024-05-01 23:56:44,669] Trial 3 finished with value: 0.5427202999591827 and parameters: {'learning_rate': 0.005834974023570938, 'l1_coef': 0.08486395033056966, 'patience': 18, 'batch_size': 256}. Best is trial 3 with value: 0.5427202999591827.
[I 2024-05-01 23:56:48,340] Trial 1 finished with value: 0.529572319984436 and parameters: {'learning_rate': 0.004210343214803222, 'l1_coef': 0.009998662586007746, 'patience': 9, 'batch_size': 256}. Best is trial 1 with value: 0.529572319984436.
[I 2024-05-01 23:56:48,506] Trial 9 finished with value: 0.34033633172512057 and parameters: {'learning_rate': 0.07834233983623591, 'l1_coef': 0.0003448983887646344, 'patience': 18, 'batch_size': 256}. Best is trial 9 with value: 0.34033633172512057.
[I 2024

Chr 8 - Best hyperparameters: {'learning_rate': 0.030673539645964285, 'l1_coef': 1.72701683025551e-05, 'patience': 14, 'batch_size': 64}
Chr 8 - Best value: 0.2455
Epoch [1/500], Train Loss: 0.6367
Epoch [2/500], Train Loss: 0.4235
Epoch [3/500], Train Loss: 0.3909
Epoch [4/500], Train Loss: 0.3683
Epoch [5/500], Train Loss: 0.3316
Epoch [6/500], Train Loss: 0.2980
Epoch [7/500], Train Loss: 0.2871
Epoch [8/500], Train Loss: 0.2758
Epoch [9/500], Train Loss: 0.2708
Epoch [10/500], Train Loss: 0.2761
Epoch [11/500], Train Loss: 0.2739
Epoch [12/500], Train Loss: 0.2723
Epoch [13/500], Train Loss: 0.2665
Epoch [14/500], Train Loss: 0.2667
Epoch [15/500], Train Loss: 0.2646
Epoch [16/500], Train Loss: 0.2628
Epoch [17/500], Train Loss: 0.2656
Epoch [18/500], Train Loss: 0.2594
Epoch [19/500], Train Loss: 0.2589
Epoch [20/500], Train Loss: 0.2573
Epoch [21/500], Train Loss: 0.2758
Epoch [22/500], Train Loss: 0.2665
Epoch [23/500], Train Loss: 0.2654
Epoch [24/500], Train Loss: 0.2549
Epoch

[I 2024-05-02 00:05:19,504] A new study created in RDB with name: unphased_full_23andMe_chr9_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  14
Total SNPs used for Training:  390
Total number of data points:  404


[I 2024-05-02 00:06:19,281] Trial 2 finished with value: 0.48859723657369614 and parameters: {'learning_rate': 0.02818206155934389, 'l1_coef': 0.003971857485063151, 'patience': 14, 'batch_size': 128}. Best is trial 2 with value: 0.48859723657369614.
[I 2024-05-02 00:06:19,395] Trial 3 finished with value: 0.2575140718902861 and parameters: {'learning_rate': 0.0033013585698940836, 'l1_coef': 1.852256658866105e-05, 'patience': 16, 'batch_size': 64}. Best is trial 3 with value: 0.2575140718902861.
[I 2024-05-02 00:06:23,039] Trial 1 finished with value: 0.5239649444818497 and parameters: {'learning_rate': 0.0062780998872387864, 'l1_coef': 0.016675780627409285, 'patience': 9, 'batch_size': 128}. Best is trial 3 with value: 0.2575140718902861.
[I 2024-05-02 00:06:27,139] Trial 8 finished with value: 0.5210845172405243 and parameters: {'learning_rate': 0.07638154062413109, 'l1_coef': 0.01055350643085741, 'patience': 17, 'batch_size': 256}. Best is trial 3 with value: 0.2575140718902861.
[I 2

Chr 9 - Best hyperparameters: {'learning_rate': 0.004852861570618455, 'l1_coef': 1.0255320838677007e-05, 'patience': 11, 'batch_size': 32}
Chr 9 - Best value: 0.2366
Epoch [1/500], Train Loss: 0.4823
Epoch [2/500], Train Loss: 0.3947
Epoch [3/500], Train Loss: 0.3590
Epoch [4/500], Train Loss: 0.3335
Epoch [5/500], Train Loss: 0.3159
Epoch [6/500], Train Loss: 0.3039
Epoch [7/500], Train Loss: 0.2972
Epoch [8/500], Train Loss: 0.2838
Epoch [9/500], Train Loss: 0.2775
Epoch [10/500], Train Loss: 0.2721
Epoch [11/500], Train Loss: 0.2676
Epoch [12/500], Train Loss: 0.2624
Epoch [13/500], Train Loss: 0.2582
Epoch [14/500], Train Loss: 0.2551
Epoch [15/500], Train Loss: 0.2525
Epoch [16/500], Train Loss: 0.2492
Epoch [17/500], Train Loss: 0.2488
Epoch [18/500], Train Loss: 0.2453
Epoch [19/500], Train Loss: 0.2429
Epoch [20/500], Train Loss: 0.2401
Epoch [21/500], Train Loss: 0.2390
Epoch [22/500], Train Loss: 0.2360
Epoch [23/500], Train Loss: 0.2359
Epoch [24/500], Train Loss: 0.2337
Epo

[I 2024-05-02 00:15:01,950] A new study created in RDB with name: unphased_full_23andMe_chr10_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  18
Total SNPs used for Training:  590
Total number of data points:  608


[I 2024-05-02 00:15:54,869] Trial 4 finished with value: 0.7798356592655182 and parameters: {'learning_rate': 0.012357351799371707, 'l1_coef': 0.0734838846756544, 'patience': 6, 'batch_size': 256}. Best is trial 4 with value: 0.7798356592655182.
[I 2024-05-02 00:15:55,631] Trial 9 finished with value: 0.21279608011245726 and parameters: {'learning_rate': 0.015892160126141593, 'l1_coef': 0.00011224188264173098, 'patience': 11, 'batch_size': 256}. Best is trial 9 with value: 0.21279608011245726.
[I 2024-05-02 00:15:56,445] Trial 3 finished with value: 0.5160074234008789 and parameters: {'learning_rate': 0.0853197955207981, 'l1_coef': 0.011834392620449765, 'patience': 7, 'batch_size': 256}. Best is trial 9 with value: 0.21279608011245726.
[I 2024-05-02 00:16:43,671] Trial 6 finished with value: 0.1668602862037145 and parameters: {'learning_rate': 0.0016288173319657757, 'l1_coef': 1.3790417432102332e-05, 'patience': 19, 'batch_size': 32}. Best is trial 6 with value: 0.1668602862037145.
[I 

Chr 10 - Best hyperparameters: {'learning_rate': 0.0007930217227108054, 'l1_coef': 1.0147575362540668e-05, 'patience': 15, 'batch_size': 64}
Chr 10 - Best value: 0.1654
Epoch [1/500], Train Loss: 0.5385
Epoch [2/500], Train Loss: 0.4597
Epoch [3/500], Train Loss: 0.4359
Epoch [4/500], Train Loss: 0.4206
Epoch [5/500], Train Loss: 0.4057
Epoch [6/500], Train Loss: 0.3938
Epoch [7/500], Train Loss: 0.3825
Epoch [8/500], Train Loss: 0.3727
Epoch [9/500], Train Loss: 0.3635
Epoch [10/500], Train Loss: 0.3547
Epoch [11/500], Train Loss: 0.3466
Epoch [12/500], Train Loss: 0.3395
Epoch [13/500], Train Loss: 0.3334
Epoch [14/500], Train Loss: 0.3259
Epoch [15/500], Train Loss: 0.3196
Epoch [16/500], Train Loss: 0.3143
Epoch [17/500], Train Loss: 0.3089
Epoch [18/500], Train Loss: 0.3032
Epoch [19/500], Train Loss: 0.2987
Epoch [20/500], Train Loss: 0.2937
Epoch [21/500], Train Loss: 0.2892
Epoch [22/500], Train Loss: 0.2848
Epoch [23/500], Train Loss: 0.2805
Epoch [24/500], Train Loss: 0.2774


[I 2024-05-02 00:28:00,256] A new study created in RDB with name: unphased_full_23andMe_chr11_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  19
Total SNPs used for Training:  900
Total number of data points:  919


[I 2024-05-02 00:28:48,787] Trial 1 finished with value: 1.6022169879504613 and parameters: {'learning_rate': 0.08106720871068203, 'l1_coef': 0.02851510047259923, 'patience': 6, 'batch_size': 64}. Best is trial 1 with value: 1.6022169879504613.
[I 2024-05-02 00:29:06,414] Trial 2 finished with value: 0.5094608217477798 and parameters: {'learning_rate': 0.009719835251371104, 'l1_coef': 0.031888781856404785, 'patience': 7, 'batch_size': 256}. Best is trial 2 with value: 0.5094608217477798.
[I 2024-05-02 00:29:13,940] Trial 7 finished with value: 0.22876508086919783 and parameters: {'learning_rate': 0.03843728343498024, 'l1_coef': 0.0006035764756057384, 'patience': 15, 'batch_size': 128}. Best is trial 7 with value: 0.22876508086919783.
[I 2024-05-02 00:29:14,113] Trial 4 finished with value: 0.1395937129855156 and parameters: {'learning_rate': 0.02759666239950039, 'l1_coef': 0.00012966957333261422, 'patience': 10, 'batch_size': 64}. Best is trial 4 with value: 0.1395937129855156.
[I 2024

Chr 11 - Best hyperparameters: {'learning_rate': 0.002667397857914855, 'l1_coef': 1.0730768195156911e-05, 'patience': 13, 'batch_size': 32}
Chr 11 - Best value: 0.0762
Epoch [1/500], Train Loss: 0.4335
Epoch [2/500], Train Loss: 0.3336
Epoch [3/500], Train Loss: 0.2815
Epoch [4/500], Train Loss: 0.2487
Epoch [5/500], Train Loss: 0.2235
Epoch [6/500], Train Loss: 0.2036
Epoch [7/500], Train Loss: 0.1880
Epoch [8/500], Train Loss: 0.1758
Epoch [9/500], Train Loss: 0.1639
Epoch [10/500], Train Loss: 0.1557
Epoch [11/500], Train Loss: 0.1481
Epoch [12/500], Train Loss: 0.1406
Epoch [13/500], Train Loss: 0.1343
Epoch [14/500], Train Loss: 0.1290
Epoch [15/500], Train Loss: 0.1242
Epoch [16/500], Train Loss: 0.1193
Epoch [17/500], Train Loss: 0.1157
Epoch [18/500], Train Loss: 0.1124
Epoch [19/500], Train Loss: 0.1095
Epoch [20/500], Train Loss: 0.1069
Epoch [21/500], Train Loss: 0.1032
Epoch [22/500], Train Loss: 0.1009
Epoch [23/500], Train Loss: 0.0981
Epoch [24/500], Train Loss: 0.0963
E

[I 2024-05-02 00:39:37,966] A new study created in RDB with name: unphased_full_23andMe_chr12_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  17
Total SNPs used for Training:  624
Total number of data points:  641


[I 2024-05-02 00:40:10,762] Trial 8 finished with value: 0.35217066407203673 and parameters: {'learning_rate': 0.04234962107279656, 'l1_coef': 0.0006062698492656181, 'patience': 5, 'batch_size': 256}. Best is trial 8 with value: 0.35217066407203673.
[I 2024-05-02 00:40:34,850] Trial 0 finished with value: 0.5730809390544891 and parameters: {'learning_rate': 0.015854926955274743, 'l1_coef': 0.015293991843307427, 'patience': 11, 'batch_size': 256}. Best is trial 8 with value: 0.35217066407203673.
[I 2024-05-02 00:40:39,271] Trial 7 finished with value: 0.5696351187569755 and parameters: {'learning_rate': 0.04838761585638373, 'l1_coef': 0.012357068564439806, 'patience': 7, 'batch_size': 64}. Best is trial 8 with value: 0.35217066407203673.
[I 2024-05-02 00:40:49,242] Trial 3 finished with value: 0.22321803134221296 and parameters: {'learning_rate': 0.001865598518848829, 'l1_coef': 1.331522473509812e-05, 'patience': 8, 'batch_size': 32}. Best is trial 3 with value: 0.22321803134221296.
[I 

Chr 12 - Best hyperparameters: {'learning_rate': 0.01866771437914333, 'l1_coef': 1.028544190002222e-05, 'patience': 14, 'batch_size': 32}
Chr 12 - Best value: 0.1993
Epoch [1/500], Train Loss: 0.5164
Epoch [2/500], Train Loss: 0.3355
Epoch [3/500], Train Loss: 0.2869
Epoch [4/500], Train Loss: 0.2612
Epoch [5/500], Train Loss: 0.2523
Epoch [6/500], Train Loss: 0.2421
Epoch [7/500], Train Loss: 0.2322
Epoch [8/500], Train Loss: 0.2259
Epoch [9/500], Train Loss: 0.2186
Epoch [10/500], Train Loss: 0.2258
Epoch [11/500], Train Loss: 0.2155
Epoch [12/500], Train Loss: 0.2205
Epoch [13/500], Train Loss: 0.2096
Epoch [14/500], Train Loss: 0.2083
Epoch [15/500], Train Loss: 0.2063
Epoch [16/500], Train Loss: 0.2038
Epoch [17/500], Train Loss: 0.2070
Epoch [18/500], Train Loss: 0.2023
Epoch [19/500], Train Loss: 0.2078
Epoch [20/500], Train Loss: 0.2010
Epoch [21/500], Train Loss: 0.2034
Epoch [22/500], Train Loss: 0.2070
Epoch [23/500], Train Loss: 0.2023
Epoch [24/500], Train Loss: 0.1976
Epo

[I 2024-05-02 00:50:02,574] A new study created in RDB with name: unphased_full_23andMe_chr13_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  5
Total SNPs used for Training:  107
Total number of data points:  112


[I 2024-05-02 00:50:31,174] Trial 8 finished with value: 0.11301147448165076 and parameters: {'learning_rate': 0.027439042302910208, 'l1_coef': 3.54732934313976e-05, 'patience': 7, 'batch_size': 64}. Best is trial 8 with value: 0.11301147448165076.
[I 2024-05-02 00:50:47,453] Trial 2 finished with value: 0.10634376624455819 and parameters: {'learning_rate': 0.03984260543028238, 'l1_coef': 1.1885223094048135e-05, 'patience': 11, 'batch_size': 32}. Best is trial 2 with value: 0.10634376624455819.
[I 2024-05-02 00:50:48,848] Trial 0 finished with value: 0.125721180668244 and parameters: {'learning_rate': 0.022892651851553803, 'l1_coef': 0.00014813026706890703, 'patience': 7, 'batch_size': 32}. Best is trial 2 with value: 0.10634376624455819.
[I 2024-05-02 00:50:59,580] Trial 9 finished with value: 0.11026702970266342 and parameters: {'learning_rate': 0.08695272127595947, 'l1_coef': 4.570043201350424e-05, 'patience': 20, 'batch_size': 128}. Best is trial 2 with value: 0.10634376624455819.


Chr 13 - Best hyperparameters: {'learning_rate': 0.03984260543028238, 'l1_coef': 1.1885223094048135e-05, 'patience': 11, 'batch_size': 32}
Chr 13 - Best value: 0.1063
Epoch [1/500], Train Loss: 0.2385
Epoch [2/500], Train Loss: 0.1557
Epoch [3/500], Train Loss: 0.1378
Epoch [4/500], Train Loss: 0.1289
Epoch [5/500], Train Loss: 0.1284
Epoch [6/500], Train Loss: 0.1231
Epoch [7/500], Train Loss: 0.1206
Epoch [8/500], Train Loss: 0.1226
Epoch [9/500], Train Loss: 0.1198
Epoch [10/500], Train Loss: 0.1194
Epoch [11/500], Train Loss: 0.1202
Epoch [12/500], Train Loss: 0.1184
Epoch [13/500], Train Loss: 0.1232
Epoch [14/500], Train Loss: 0.1189
Epoch [15/500], Train Loss: 0.1251
Epoch [16/500], Train Loss: 0.1177
Epoch [17/500], Train Loss: 0.1137
Epoch [18/500], Train Loss: 0.1145
Epoch [19/500], Train Loss: 0.1132
Epoch [20/500], Train Loss: 0.1142
Epoch [21/500], Train Loss: 0.1181
Epoch [22/500], Train Loss: 0.1148
Epoch [23/500], Train Loss: 0.1157
Epoch [24/500], Train Loss: 0.1137
Ep

[I 2024-05-02 00:59:21,487] A new study created in RDB with name: unphased_full_23andMe_chr14_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  8
Total SNPs used for Training:  156
Total number of data points:  164


[I 2024-05-02 00:59:44,153] Trial 7 finished with value: 0.23655600398778914 and parameters: {'learning_rate': 0.08711335753712536, 'l1_coef': 1.3439269257624567e-05, 'patience': 7, 'batch_size': 256}. Best is trial 7 with value: 0.23655600398778914.
[I 2024-05-02 01:00:06,321] Trial 0 finished with value: 0.23945876955986023 and parameters: {'learning_rate': 0.08786064242446812, 'l1_coef': 3.753241328355683e-05, 'patience': 15, 'batch_size': 256}. Best is trial 7 with value: 0.23655600398778914.
[I 2024-05-02 01:00:08,256] Trial 1 finished with value: 0.24196629896759986 and parameters: {'learning_rate': 0.010168448826053611, 'l1_coef': 1.4159644452853219e-05, 'patience': 14, 'batch_size': 128}. Best is trial 7 with value: 0.23655600398778914.
[I 2024-05-02 01:00:30,489] Trial 9 finished with value: 0.4932334567819322 and parameters: {'learning_rate': 0.011032944077553926, 'l1_coef': 0.05475297778058169, 'patience': 7, 'batch_size': 64}. Best is trial 7 with value: 0.23655600398778914

Chr 14 - Best hyperparameters: {'learning_rate': 0.007008886346494741, 'l1_coef': 1.1625969051564308e-05, 'patience': 9, 'batch_size': 32}
Chr 14 - Best value: 0.2350
Epoch [1/500], Train Loss: 0.4366
Epoch [2/500], Train Loss: 0.3530
Epoch [3/500], Train Loss: 0.3207
Epoch [4/500], Train Loss: 0.3015
Epoch [5/500], Train Loss: 0.2897
Epoch [6/500], Train Loss: 0.2790
Epoch [7/500], Train Loss: 0.2719
Epoch [8/500], Train Loss: 0.2670
Epoch [9/500], Train Loss: 0.2620
Epoch [10/500], Train Loss: 0.2574
Epoch [11/500], Train Loss: 0.2552
Epoch [12/500], Train Loss: 0.2521
Epoch [13/500], Train Loss: 0.2512
Epoch [14/500], Train Loss: 0.2481
Epoch [15/500], Train Loss: 0.2460
Epoch [16/500], Train Loss: 0.2443
Epoch [17/500], Train Loss: 0.2435
Epoch [18/500], Train Loss: 0.2418
Epoch [19/500], Train Loss: 0.2402
Epoch [20/500], Train Loss: 0.2404
Epoch [21/500], Train Loss: 0.2383
Epoch [22/500], Train Loss: 0.2380
Epoch [23/500], Train Loss: 0.2374
Epoch [24/500], Train Loss: 0.2368
Ep

[I 2024-05-02 01:07:44,751] A new study created in RDB with name: unphased_full_23andMe_chr15_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  7
Total SNPs used for Training:  193
Total number of data points:  200


[I 2024-05-02 01:08:41,540] Trial 2 finished with value: 0.1952238385166441 and parameters: {'learning_rate': 0.005444836485985091, 'l1_coef': 0.0001418289913964557, 'patience': 8, 'batch_size': 64}. Best is trial 2 with value: 0.1952238385166441.
[I 2024-05-02 01:08:50,668] Trial 4 finished with value: 0.2130939321858542 and parameters: {'learning_rate': 0.023875378013001894, 'l1_coef': 0.00031146161152856674, 'patience': 17, 'batch_size': 64}. Best is trial 2 with value: 0.1952238385166441.
[I 2024-05-02 01:09:00,142] Trial 1 finished with value: 0.17883538878881014 and parameters: {'learning_rate': 0.012555991261510302, 'l1_coef': 6.395965604258952e-05, 'patience': 10, 'batch_size': 32}. Best is trial 1 with value: 0.17883538878881014.
[I 2024-05-02 01:09:21,214] Trial 3 finished with value: 0.19025035947561264 and parameters: {'learning_rate': 0.002451283248471429, 'l1_coef': 3.258187551225192e-05, 'patience': 13, 'batch_size': 256}. Best is trial 1 with value: 0.17883538878881014.

Chr 15 - Best hyperparameters: {'learning_rate': 0.022031365291809546, 'l1_coef': 3.002232713735617e-05, 'patience': 7, 'batch_size': 32}
Chr 15 - Best value: 0.1658
Epoch [1/500], Train Loss: 0.3545
Epoch [2/500], Train Loss: 0.2417
Epoch [3/500], Train Loss: 0.2143
Epoch [4/500], Train Loss: 0.2038
Epoch [5/500], Train Loss: 0.2017
Epoch [6/500], Train Loss: 0.1918
Epoch [7/500], Train Loss: 0.1893
Epoch [8/500], Train Loss: 0.1810
Epoch [9/500], Train Loss: 0.1857
Epoch [10/500], Train Loss: 0.1832
Epoch [11/500], Train Loss: 0.1806
Epoch [12/500], Train Loss: 0.1767
Epoch [13/500], Train Loss: 0.1754
Epoch [14/500], Train Loss: 0.1788
Epoch [15/500], Train Loss: 0.1753
Epoch [16/500], Train Loss: 0.1710
Epoch [17/500], Train Loss: 0.1796
Epoch [18/500], Train Loss: 0.1742
Epoch [19/500], Train Loss: 0.1705
Epoch [20/500], Train Loss: 0.1706
Epoch [21/500], Train Loss: 0.1729
Epoch [22/500], Train Loss: 0.1732
Epoch [23/500], Train Loss: 0.1711
Epoch [24/500], Train Loss: 0.1716
Epo

[I 2024-05-02 01:16:11,494] A new study created in RDB with name: unphased_full_23andMe_chr16_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  14
Total SNPs used for Training:  347
Total number of data points:  361


[I 2024-05-02 01:17:06,115] Trial 5 finished with value: 0.16933158082621438 and parameters: {'learning_rate': 0.0014529765727989098, 'l1_coef': 2.7896299070105322e-05, 'patience': 5, 'batch_size': 64}. Best is trial 5 with value: 0.16933158082621438.
[I 2024-05-02 01:17:21,888] Trial 3 finished with value: 0.49863145053386687 and parameters: {'learning_rate': 0.010314184678317188, 'l1_coef': 0.0931848801214141, 'patience': 14, 'batch_size': 256}. Best is trial 5 with value: 0.16933158082621438.
[I 2024-05-02 01:17:38,914] Trial 10 finished with value: 0.14978709295392037 and parameters: {'learning_rate': 0.048881214618777445, 'l1_coef': 3.048339757256684e-05, 'patience': 10, 'batch_size': 256}. Best is trial 10 with value: 0.14978709295392037.
[I 2024-05-02 01:17:52,360] Trial 2 finished with value: 0.2119875199519671 and parameters: {'learning_rate': 0.005799073518017184, 'l1_coef': 0.00026127616950602473, 'patience': 20, 'batch_size': 32}. Best is trial 10 with value: 0.149787092953

Chr 16 - Best hyperparameters: {'learning_rate': 0.009124351332848587, 'l1_coef': 1.8130607864447716e-05, 'patience': 12, 'batch_size': 32}
Chr 16 - Best value: 0.1372
Epoch [1/500], Train Loss: 0.4014
Epoch [2/500], Train Loss: 0.2756
Epoch [3/500], Train Loss: 0.2341
Epoch [4/500], Train Loss: 0.2103
Epoch [5/500], Train Loss: 0.1973
Epoch [6/500], Train Loss: 0.1836
Epoch [7/500], Train Loss: 0.1757
Epoch [8/500], Train Loss: 0.1707
Epoch [9/500], Train Loss: 0.1660
Epoch [10/500], Train Loss: 0.1603
Epoch [11/500], Train Loss: 0.1571
Epoch [12/500], Train Loss: 0.1533
Epoch [13/500], Train Loss: 0.1509
Epoch [14/500], Train Loss: 0.1488
Epoch [15/500], Train Loss: 0.1469
Epoch [16/500], Train Loss: 0.1457
Epoch [17/500], Train Loss: 0.1418
Epoch [18/500], Train Loss: 0.1416
Epoch [19/500], Train Loss: 0.1421
Epoch [20/500], Train Loss: 0.1399
Epoch [21/500], Train Loss: 0.1384
Epoch [22/500], Train Loss: 0.1370
Epoch [23/500], Train Loss: 0.1355
Epoch [24/500], Train Loss: 0.1370
E

[I 2024-05-02 01:25:52,527] A new study created in RDB with name: unphased_full_23andMe_chr17_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  9
Total SNPs used for Training:  179
Total number of data points:  188


[I 2024-05-02 01:26:14,490] Trial 4 finished with value: 0.20164496824145317 and parameters: {'learning_rate': 0.013233610405715912, 'l1_coef': 9.70437928706397e-05, 'patience': 5, 'batch_size': 128}. Best is trial 4 with value: 0.20164496824145317.
[I 2024-05-02 01:26:25,767] Trial 6 finished with value: 0.4454460272422204 and parameters: {'learning_rate': 0.04572159424258445, 'l1_coef': 0.01643480516912265, 'patience': 5, 'batch_size': 32}. Best is trial 4 with value: 0.20164496824145317.
[I 2024-05-02 01:26:50,365] Trial 3 finished with value: 0.2138426572084427 and parameters: {'learning_rate': 0.009565124757238258, 'l1_coef': 0.00014980992072759856, 'patience': 18, 'batch_size': 256}. Best is trial 4 with value: 0.20164496824145317.
[I 2024-05-02 01:26:58,873] Trial 2 finished with value: 0.37153413551194325 and parameters: {'learning_rate': 0.08640782120723212, 'l1_coef': 0.09311061222606244, 'patience': 8, 'batch_size': 64}. Best is trial 4 with value: 0.20164496824145317.
[I 20

Chr 17 - Best hyperparameters: {'learning_rate': 0.02295343800050915, 'l1_coef': 1.4340142668628905e-05, 'patience': 10, 'batch_size': 128}
Chr 17 - Best value: 0.1796
Epoch [1/500], Train Loss: 0.3824
Epoch [2/500], Train Loss: 0.2665
Epoch [3/500], Train Loss: 0.2339
Epoch [4/500], Train Loss: 0.2208
Epoch [5/500], Train Loss: 0.2118
Epoch [6/500], Train Loss: 0.2048
Epoch [7/500], Train Loss: 0.1997
Epoch [8/500], Train Loss: 0.1971
Epoch [9/500], Train Loss: 0.1957
Epoch [10/500], Train Loss: 0.1914
Epoch [11/500], Train Loss: 0.1908
Epoch [12/500], Train Loss: 0.1900
Epoch [13/500], Train Loss: 0.1858
Epoch [14/500], Train Loss: 0.1864
Epoch [15/500], Train Loss: 0.1843
Epoch [16/500], Train Loss: 0.1848
Epoch [17/500], Train Loss: 0.1835
Epoch [18/500], Train Loss: 0.1853
Epoch [19/500], Train Loss: 0.1825
Epoch [20/500], Train Loss: 0.1808
Epoch [21/500], Train Loss: 0.1799
Epoch [22/500], Train Loss: 0.1805
Epoch [23/500], Train Loss: 0.1798
Epoch [24/500], Train Loss: 0.1773
E

[I 2024-05-02 01:34:08,603] A new study created in RDB with name: unphased_full_23andMe_chr18_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  9
Total SNPs used for Training:  268
Total number of data points:  277


[I 2024-05-02 01:34:38,985] Trial 7 finished with value: 0.21506836967808857 and parameters: {'learning_rate': 0.09647382981280382, 'l1_coef': 6.491100536461145e-05, 'patience': 9, 'batch_size': 64}. Best is trial 7 with value: 0.21506836967808857.
[I 2024-05-02 01:34:54,170] Trial 2 finished with value: 0.49566978720518257 and parameters: {'learning_rate': 0.05112453730409569, 'l1_coef': 0.0053862352594389925, 'patience': 6, 'batch_size': 32}. Best is trial 7 with value: 0.21506836967808857.
[I 2024-05-02 01:35:26,855] Trial 4 finished with value: 0.3548539638519287 and parameters: {'learning_rate': 0.07807752398159735, 'l1_coef': 0.00187217447239103, 'patience': 13, 'batch_size': 256}. Best is trial 7 with value: 0.21506836967808857.
[I 2024-05-02 01:35:56,311] Trial 6 finished with value: 0.5671021580696106 and parameters: {'learning_rate': 0.0015118713044495666, 'l1_coef': 0.03094490847218931, 'patience': 7, 'batch_size': 256}. Best is trial 7 with value: 0.21506836967808857.
[I 20

Chr 18 - Best hyperparameters: {'learning_rate': 0.005557531822799773, 'l1_coef': 1.016657878194119e-05, 'patience': 16, 'batch_size': 128}
Chr 18 - Best value: 0.1866
Epoch [1/500], Train Loss: 0.5216
Epoch [2/500], Train Loss: 0.4152
Epoch [3/500], Train Loss: 0.3734
Epoch [4/500], Train Loss: 0.3414
Epoch [5/500], Train Loss: 0.3183
Epoch [6/500], Train Loss: 0.3008
Epoch [7/500], Train Loss: 0.2872
Epoch [8/500], Train Loss: 0.2752
Epoch [9/500], Train Loss: 0.2656
Epoch [10/500], Train Loss: 0.2567
Epoch [11/500], Train Loss: 0.2489
Epoch [12/500], Train Loss: 0.2432
Epoch [13/500], Train Loss: 0.2373
Epoch [14/500], Train Loss: 0.2325
Epoch [15/500], Train Loss: 0.2271
Epoch [16/500], Train Loss: 0.2246
Epoch [17/500], Train Loss: 0.2198
Epoch [18/500], Train Loss: 0.2162
Epoch [19/500], Train Loss: 0.2142
Epoch [20/500], Train Loss: 0.2113
Epoch [21/500], Train Loss: 0.2086
Epoch [22/500], Train Loss: 0.2049
Epoch [23/500], Train Loss: 0.2031
Epoch [24/500], Train Loss: 0.2014
E

[I 2024-05-02 01:44:45,523] A new study created in RDB with name: unphased_full_23andMe_chr19_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  7
Total SNPs used for Training:  281
Total number of data points:  288


[I 2024-05-02 01:45:33,210] Trial 8 finished with value: 0.08914412204176188 and parameters: {'learning_rate': 0.004832207332405041, 'l1_coef': 5.907585266876678e-05, 'patience': 8, 'batch_size': 128}. Best is trial 8 with value: 0.08914412204176188.
[I 2024-05-02 01:45:36,730] Trial 0 finished with value: 0.06355574131011962 and parameters: {'learning_rate': 0.014921364512615158, 'l1_coef': 1.8112586298436225e-05, 'patience': 13, 'batch_size': 256}. Best is trial 0 with value: 0.06355574131011962.
[I 2024-05-02 01:45:44,049] Trial 1 finished with value: 0.06276330879101386 and parameters: {'learning_rate': 0.04678448807685112, 'l1_coef': 5.228599269657289e-05, 'patience': 16, 'batch_size': 32}. Best is trial 1 with value: 0.06276330879101386.
[I 2024-05-02 01:46:04,040] Trial 4 finished with value: 0.18014510401657652 and parameters: {'learning_rate': 0.008886322621215118, 'l1_coef': 0.001593872257444184, 'patience': 8, 'batch_size': 64}. Best is trial 1 with value: 0.0627633087910138

Chr 19 - Best hyperparameters: {'learning_rate': 0.09278707674061928, 'l1_coef': 1.2849231282738609e-05, 'patience': 19, 'batch_size': 256}
Chr 19 - Best value: 0.0513
Epoch [1/500], Train Loss: 1.4504
Epoch [2/500], Train Loss: 0.6577
Epoch [3/500], Train Loss: 0.4281
Epoch [4/500], Train Loss: 0.3414
Epoch [5/500], Train Loss: 0.2945
Epoch [6/500], Train Loss: 0.2775
Epoch [7/500], Train Loss: 0.2678
Epoch [8/500], Train Loss: 0.2573
Epoch [9/500], Train Loss: 0.2498
Epoch [10/500], Train Loss: 0.2440
Epoch [11/500], Train Loss: 0.2420
Epoch [12/500], Train Loss: 0.2380
Epoch [13/500], Train Loss: 0.2335
Epoch [14/500], Train Loss: 0.2297
Epoch [15/500], Train Loss: 0.2308
Epoch [16/500], Train Loss: 0.2303
Epoch [17/500], Train Loss: 0.2285
Epoch [18/500], Train Loss: 0.2247
Epoch [19/500], Train Loss: 0.2235
Epoch [20/500], Train Loss: 0.2203
Epoch [21/500], Train Loss: 0.2190
Epoch [22/500], Train Loss: 0.2176
Epoch [23/500], Train Loss: 0.2108
Epoch [24/500], Train Loss: 0.2096
E

[I 2024-05-02 01:54:01,019] A new study created in RDB with name: unphased_full_23andMe_chr20_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  4
Total SNPs used for Training:  58
Total number of data points:  62


[I 2024-05-02 01:54:52,318] Trial 2 finished with value: 0.1681298641221864 and parameters: {'learning_rate': 0.004781028321883543, 'l1_coef': 2.143023124007869e-05, 'patience': 8, 'batch_size': 64}. Best is trial 2 with value: 0.1681298641221864.
[I 2024-05-02 01:54:58,576] Trial 4 finished with value: 0.22930612042546272 and parameters: {'learning_rate': 0.07656393175325839, 'l1_coef': 0.024316822124985527, 'patience': 14, 'batch_size': 128}. Best is trial 2 with value: 0.1681298641221864.
[I 2024-05-02 01:55:03,997] Trial 0 finished with value: 0.1866744575592188 and parameters: {'learning_rate': 0.03380388120817048, 'l1_coef': 0.0006877252949502024, 'patience': 12, 'batch_size': 32}. Best is trial 2 with value: 0.1681298641221864.
[I 2024-05-02 01:55:07,209] Trial 9 finished with value: 0.23882942646741867 and parameters: {'learning_rate': 0.03886008532493944, 'l1_coef': 0.042702141863285026, 'patience': 18, 'batch_size': 256}. Best is trial 2 with value: 0.1681298641221864.
[I 202

Chr 20 - Best hyperparameters: {'learning_rate': 0.017207813849325133, 'l1_coef': 2.725422987746258e-05, 'patience': 12, 'batch_size': 32}
Chr 20 - Best value: 0.1636
Epoch [1/500], Train Loss: 0.2316
Epoch [2/500], Train Loss: 0.1926
Epoch [3/500], Train Loss: 0.1871
Epoch [4/500], Train Loss: 0.1796
Epoch [5/500], Train Loss: 0.1780
Epoch [6/500], Train Loss: 0.1757
Epoch [7/500], Train Loss: 0.1791
Epoch [8/500], Train Loss: 0.1722
Epoch [9/500], Train Loss: 0.1727
Epoch [10/500], Train Loss: 0.1705
Epoch [11/500], Train Loss: 0.1693
Epoch [12/500], Train Loss: 0.1707
Epoch [13/500], Train Loss: 0.1692
Epoch [14/500], Train Loss: 0.1691
Epoch [15/500], Train Loss: 0.1694
Epoch [16/500], Train Loss: 0.1683
Epoch [17/500], Train Loss: 0.1673
Epoch [18/500], Train Loss: 0.1688
Epoch [19/500], Train Loss: 0.1668
Epoch [20/500], Train Loss: 0.1659
Epoch [21/500], Train Loss: 0.1661
Epoch [22/500], Train Loss: 0.1671
Epoch [23/500], Train Loss: 0.1665
Epoch [24/500], Train Loss: 0.1664
Ep

[I 2024-05-02 02:02:46,373] A new study created in RDB with name: unphased_full_23andMe_chr21_study


Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv
PRS313 SNPs:  4
Total SNPs used for Training:  40
Total number of data points:  44


[I 2024-05-02 02:03:22,066] Trial 0 finished with value: 0.28262099410806385 and parameters: {'learning_rate': 0.06751618474006733, 'l1_coef': 0.001083509359569771, 'patience': 7, 'batch_size': 64}. Best is trial 0 with value: 0.28262099410806385.
[I 2024-05-02 02:03:32,013] Trial 4 finished with value: 0.23866723867563105 and parameters: {'learning_rate': 0.005823973408969031, 'l1_coef': 1.8641432905580242e-05, 'patience': 5, 'batch_size': 32}. Best is trial 4 with value: 0.23866723867563105.
[I 2024-05-02 02:03:48,795] Trial 8 finished with value: 0.2527904689311981 and parameters: {'learning_rate': 0.006938271169306284, 'l1_coef': 5.861222775669067e-05, 'patience': 11, 'batch_size': 256}. Best is trial 4 with value: 0.23866723867563105.
[I 2024-05-02 02:03:52,882] Trial 1 finished with value: 0.4971566007687495 and parameters: {'learning_rate': 0.002026275433383259, 'l1_coef': 0.08848661445652929, 'patience': 5, 'batch_size': 32}. Best is trial 4 with value: 0.23866723867563105.
[I 

Chr 21 - Best hyperparameters: {'learning_rate': 0.09547670009471211, 'l1_coef': 1.3395529903067892e-05, 'patience': 9, 'batch_size': 128}
Chr 21 - Best value: 0.2235
Epoch [1/500], Train Loss: 0.4789
Epoch [2/500], Train Loss: 0.3276
Epoch [3/500], Train Loss: 0.2856
Epoch [4/500], Train Loss: 0.2656
Epoch [5/500], Train Loss: 0.2564
Epoch [6/500], Train Loss: 0.2534
Epoch [7/500], Train Loss: 0.2525
Epoch [8/500], Train Loss: 0.2464
Epoch [9/500], Train Loss: 0.2451
Epoch [10/500], Train Loss: 0.2428
Epoch [11/500], Train Loss: 0.2451
Epoch [12/500], Train Loss: 0.2411
Epoch [13/500], Train Loss: 0.2450
Epoch [14/500], Train Loss: 0.2401
Epoch [15/500], Train Loss: 0.2426
Epoch [16/500], Train Loss: 0.2407
Epoch [17/500], Train Loss: 0.2411
Epoch [18/500], Train Loss: 0.2397
Epoch [19/500], Train Loss: 0.2425
Epoch [20/500], Train Loss: 0.2471
Epoch [21/500], Train Loss: 0.2463
Epoch [22/500], Train Loss: 0.2454
Epoch [23/500], Train Loss: 0.2397
Epoch [24/500], Train Loss: 0.2382
Ep

[I 2024-05-02 02:12:08,398] A new study created in RDB with name: unphased_full_23andMe_chr22_study


PRS313 SNPs:  11
Total SNPs used for Training:  432
Total number of data points:  443


[I 2024-05-02 02:12:59,112] Trial 8 finished with value: 0.16584223806858062 and parameters: {'learning_rate': 0.011125304316617835, 'l1_coef': 1.6327967248551786e-05, 'patience': 16, 'batch_size': 128}. Best is trial 8 with value: 0.16584223806858062.
[I 2024-05-02 02:13:48,462] Trial 1 finished with value: 0.2559344969689846 and parameters: {'learning_rate': 0.00547531691429216, 'l1_coef': 0.0009086678995717588, 'patience': 9, 'batch_size': 128}. Best is trial 8 with value: 0.16584223806858062.
[I 2024-05-02 02:13:49,248] Trial 9 finished with value: 0.42428505420684814 and parameters: {'learning_rate': 0.00514155912267641, 'l1_coef': 0.07735223991676295, 'patience': 11, 'batch_size': 64}. Best is trial 8 with value: 0.16584223806858062.
[I 2024-05-02 02:14:14,928] Trial 0 finished with value: 0.421038662470304 and parameters: {'learning_rate': 0.0645237597421749, 'l1_coef': 0.019060836173213268, 'patience': 17, 'batch_size': 32}. Best is trial 8 with value: 0.16584223806858062.
[I 2

Chr 22 - Best hyperparameters: {'learning_rate': 0.021540268092518598, 'l1_coef': 1.08778317424011e-05, 'patience': 13, 'batch_size': 32}
Chr 22 - Best value: 0.1491
Epoch [1/500], Train Loss: 0.3518
Epoch [2/500], Train Loss: 0.2395
Epoch [3/500], Train Loss: 0.2081
Epoch [4/500], Train Loss: 0.1957
Epoch [5/500], Train Loss: 0.1884
Epoch [6/500], Train Loss: 0.1801
Epoch [7/500], Train Loss: 0.1722
Epoch [8/500], Train Loss: 0.1706
Epoch [9/500], Train Loss: 0.1672
Epoch [10/500], Train Loss: 0.1650
Epoch [11/500], Train Loss: 0.1606
Epoch [12/500], Train Loss: 0.1620
Epoch [13/500], Train Loss: 0.1604
Epoch [14/500], Train Loss: 0.1660
Epoch [15/500], Train Loss: 0.1602
Epoch [16/500], Train Loss: 0.1577
Epoch [17/500], Train Loss: 0.1587
Epoch [18/500], Train Loss: 0.1589
Epoch [19/500], Train Loss: 0.1615
Epoch [20/500], Train Loss: 0.1579
Epoch [21/500], Train Loss: 0.1528
Epoch [22/500], Train Loss: 0.1505
Epoch [23/500], Train Loss: 0.1511
Epoch [24/500], Train Loss: 0.1507
Epo

  iqs = (po - pc) / (1 - pc)


Skipping SNP chr22_29135543_G_A_PRS313_Unknown_combined due to insufficient data
Individual AUC ROC curves saved in: ../../Data/model_results_unphased/logistic_regression/roc_curves/
Performance metrics saved at: ../../Data/model_results_unphased/logistic_regression/csv_files/performance_metrics.csv




<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

# ALL PRS Masked

In [4]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve
from sklearn.metrics import r2_score as sklearn_r2_score
import optuna
from matplotlib import pyplot as plt
import os
import csv

# Load and preprocess the data
data_directory = '../../Data/Filtered_unphased_training_data_union/'
start = 1

# Initialize lists to store the performance metrics for each chromosome
accuracies = []
precisions = []
recalls = []
false_positive_rates = []
auc_rocs = []
r2_scores = []
iqs_scores = []

# Create folders for saving files
output_folder = "../../Data/model_results_unphased_all_PRS/logistic_regression/"
model_folder = output_folder + "models_unphased/"
csv_folder = output_folder + "csv_files/"
curve_folder = output_folder + "roc_curves/"

os.makedirs(model_folder, exist_ok=True)
os.makedirs(csv_folder, exist_ok=True)
os.makedirs(curve_folder, exist_ok=True)

for chromosome_number in range(start, 23):
    # Create subfolders for the current chromosome
    chr_model_folder = model_folder + f"chr{chromosome_number}/"
    chr_csv_folder = csv_folder + f"chr{chromosome_number}/"
    chr_curve_folder = curve_folder + f"chr{chromosome_number}/"

    os.makedirs(chr_model_folder, exist_ok=True)
    os.makedirs(chr_csv_folder, exist_ok=True)
    os.makedirs(chr_curve_folder, exist_ok=True)

    file_name = data_directory + f"23AndMe_PRS313_merged_chr{chromosome_number}_matching_combined.parquet"
    data = pd.read_parquet(file_name)


    # Split the data into features and target
    X = torch.tensor(data.filter(regex='^(?!.*PRS313_)').values, dtype=torch.float32)
    y = torch.tensor(data.filter(regex='PRS313_').values, dtype=torch.float32)


    print("Total SNPs: ", data.shape[1])
    print("PRS313 SNPs: ", y.shape[1])
    # print("Known PRS313 SNPs: ", data[[col for col in data.columns if "PRS313_Known" in col]].shape[1])
    # print("23AndMe SNPs with LD to Unknown PRS313 SNPs: ", data[[col for col in data.columns if "PRS313_" not in col]].shape[1])
    print("Total SNPs used for Training: ", X.shape[1])

    # Split the data into train-validation and test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the logistic regression model with lasso regularization
    class LogisticRegression(nn.Module):
        def __init__(self, input_dim, output_dim, l1_coef=0.0):
            super(LogisticRegression, self).__init__()
            self.linear = nn.Linear(input_dim, output_dim)
            self.sigmoid = nn.Sigmoid()
            self.l1_coef = l1_coef

        def forward(self, x):
            out = self.linear(x)
            out = self.sigmoid(out)
            return out

        def l1_loss(self):
            return self.l1_coef * torch.norm(self.linear.weight, p=1)
        
    # Set the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Set the hyperparameters for tuning
    input_dim = X_train_val.shape[1]
    output_dim = y_train_val.shape[1]
    num_epochs = 500
    batch_size = 128

    # Define the objective function for Optuna with cross-validation and early stopping
    def objective(trial):
        learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
        l1_coef = trial.suggest_float('l1_coef', 1e-5, 1e-1, log=True)
        patience = trial.suggest_int('patience', 5, 20)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

        model = LogisticRegression(input_dim, output_dim, l1_coef).to(device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.BCELoss()
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=False)

        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        fold_losses = []

        for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_val, y_train_val.argmax(dim=1))):
            X_train, X_val = X_train_val[train_idx], X_train_val[val_idx]
            y_train, y_val = y_train_val[train_idx], y_train_val[val_idx]

            train_dataset = TensorDataset(X_train, y_train)
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

            best_val_loss = float('inf')
            counter = 0

            for epoch in range(num_epochs):
                train_loss = 0.0
                for batch_X, batch_y in train_loader:
                    batch_X, batch_y = batch_X.to(device), batch_y.to(device)

                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y) + model.l1_loss()

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    train_loss += loss.item()

                train_loss /= len(train_loader)

                val_dataset = TensorDataset(X_val, y_val)
                val_loader = DataLoader(val_dataset, batch_size=batch_size)

                with torch.no_grad():
                    val_loss = 0.0
                    for batch_X, batch_y in val_loader:
                        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                        outputs = model(batch_X)
                        loss = criterion(outputs, batch_y) + model.l1_loss()
                        val_loss += loss.item()

                    val_loss /= len(val_loader)
                    scheduler.step(val_loss)

                    if val_loss < best_val_loss:
                        best_val_loss = val_loss
                        counter = 0
                    else:
                        counter += 1

                    if counter >= patience:
                        # print(f"Early stopping at epoch {epoch+1}")
                        break

            fold_losses.append(best_val_loss)

        return np.mean(fold_losses)

    # Create the "optuna_studies" folder if it doesn't exist
    os.makedirs("optuna_studies", exist_ok=True)

    # Create an Optuna study and optimize the hyperparameters
    study_name = f"chr{chromosome_number}_study"
    storage_name = f"sqlite:///optuna_studies/{study_name}.db"

    # Check if the study exists

    current_dir = os.getcwd()
    study_exists = os.path.exists(current_dir + f"/optuna_studies/{study_name}.db")
    
    if study_exists:
        # Load the existing study
        study = optuna.load_study(study_name=study_name, storage=storage_name)
    else:
        # Create a new study
        study = optuna.create_study(direction='minimize', study_name=study_name, storage=storage_name)

    study.optimize(objective, n_trials=50, n_jobs=-1)

    # Print the best hyperparameters and best value
    print(f"Chr {chromosome_number} - Best hyperparameters: {study.best_params}")
    print(f"Chr {chromosome_number} - Best value: {study.best_value:.4f}")

    # Train the final model with the best hyperparameters and early stopping
    best_learning_rate = study.best_params['learning_rate']
    best_l1_coef = study.best_params['l1_coef']
    best_patience = study.best_params['patience']
    best_batch_size = study.best_params['batch_size']

    model = LogisticRegression(input_dim, output_dim, best_l1_coef).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_learning_rate)
    criterion = nn.BCELoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=False)

    train_dataset = TensorDataset(X_train_val, y_train_val)
    train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)

    best_train_loss = float('inf')
    counter = 0

    for epoch in range(num_epochs):
        train_loss = 0.0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            outputs = model(batch_X)
            loss = criterion(outputs, batch_y) + model.l1_loss()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        # print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}")

        if train_loss < best_train_loss:
            best_train_loss = train_loss
            counter = 0
        else:
            counter += 1

        if counter >= best_patience:
            # print(f"Early stopping at epoch {epoch+1}")
            break

        scheduler.step(train_loss)

    # Save the final model
    model_save_path = chr_model_folder + f'final_model_chr{chromosome_number}.pth'
    torch.save(model.state_dict(), model_save_path)
    print(f"Final model saved at: {model_save_path}")

    # Evaluate the final model on the test set
    with torch.no_grad():
        test_outputs = model(X_test.to(device))
        test_preds = (test_outputs > 0.5).float()
        test_accuracy = float(((test_preds > 0.5) == y_test).float().mean())
        test_precision = precision_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_recall = recall_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_f1 = f1_score(y_test.cpu().numpy(), test_preds.cpu().numpy(), average='micro')
        test_roc_auc = roc_auc_score(y_test.cpu().numpy(), test_outputs.cpu().numpy(), average='micro')
        test_r2 = sklearn_r2_score(y_test.cpu().numpy(), test_outputs.cpu().numpy())
        test_iqs = calculate_iqs_unphased(y_test.cpu().numpy(), test_outputs.cpu().numpy())

        # Calculate false positive rate
        cm = confusion_matrix(y_test.cpu().numpy().ravel(), test_preds.cpu().numpy().ravel())
        tn, fp, fn, tp = cm.ravel()
        test_fpr = fp / (fp + tn)

        # Append performance metrics to the lists
        accuracies.append(test_accuracy)
        precisions.append(test_precision)
        recalls.append(test_recall)
        false_positive_rates.append(test_fpr)
        auc_rocs.append(test_roc_auc)
        r2_scores.append(test_r2)
        iqs_scores.append(test_iqs)

        # Calculate individual R^2 scores for each SNP
        individual_r2_scores = sklearn_r2_score(y_test.cpu().numpy(), test_outputs.cpu().numpy(), multioutput='raw_values')

        # Calculate individual IQS scores for each SNP
        individual_iqs_scores = np.array([calculate_iqs_unphased(y_test.cpu().numpy()[:, i].reshape(-1, 1), test_outputs.cpu().numpy()[:, i].reshape(-1, 1)) for i in range(y_test.shape[1])])

        # Get the names of the SNPs from the original dataframe
        snp_names = data.filter(regex='Unknown').columns

        # Save individual R^2 scores to a CSV file
        csv_file = chr_csv_folder + f'individual_r2_scores_chr{chromosome_number}.csv'

        with open(csv_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['SNP', 'R2 Score'])
            for snp, r2_score in zip(snp_names, individual_r2_scores):
                writer.writerow([snp, r2_score])

        print(f"Individual R^2 scores saved at: {csv_file}")

        # Save individual IQS scores to a CSV file
        iqs_csv_file = chr_csv_folder + f'individual_iqs_scores_chr{chromosome_number}.csv'

        with open(iqs_csv_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['SNP', 'IQS Score'])
            for snp, iqs_score in zip(snp_names, individual_iqs_scores):
                writer.writerow([snp, iqs_score])

        print(f"Individual IQS scores saved at: {iqs_csv_file}")

        # Save individual AUC ROC curves for each SNP
        for i, snp in enumerate(snp_names):
            try: 
                fpr, tpr, _ = roc_curve(y_test.cpu().numpy()[:, i], test_outputs.cpu().numpy()[:, i])
                plt.figure()
                plt.plot(fpr, tpr, label=f'AUC ROC = {roc_auc_score(y_test.cpu().numpy()[:, i], test_outputs.cpu().numpy()[:, i]):.4f}')
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title(f'AUC ROC Curve - {snp}')
                plt.legend()
                
                curve_file = chr_curve_folder + f'auc_roc_curve_{snp}_chr{chromosome_number}.png'
                plt.savefig(curve_file)
                plt.close()
            except ValueError:
                # Save a placeholder image if there is insufficient data
                plt.figure()
                plt.axis('off')
                plt.text(0.5, 0.5, "Insufficient data for ROC curve", ha='center', va='center')
                curve_file = chr_curve_folder + f'auc_roc_curve_{snp}_chr{chromosome_number}.png'
                plt.savefig(curve_file)
                plt.close()

                print(f"Skipping SNP {snp} due to insufficient data")


        print(f"Individual AUC ROC curves saved in: {curve_folder}")

        # Create a DataFrame to store the performance metrics for each chromosome
        performance_df = pd.DataFrame({
            'Chromosome': list(range(start, chromosome_number + 1)),
            'Accuracy': accuracies,
            'Precision': precisions,
            'Recall': recalls,
            'False Positive Rate': false_positive_rates,
            'AUC ROC': auc_rocs,
            'R2 Score': r2_scores,
            'IQS Score': iqs_scores
        })

        # Save the performance metrics to a CSV file
        performance_csv_file = csv_folder + 'performance_metrics.csv'
        performance_df.to_csv(performance_csv_file, index=False)
        print(f"Performance metrics saved at: {performance_csv_file}")

Total SNPs:  929
PRS313 SNPs:  30
Total SNPs used for Training:  899


[I 2024-05-01 22:32:19,345] Trial 10 finished with value: 0.21762877447264534 and parameters: {'learning_rate': 0.0020859788421430012, 'l1_coef': 1.2064301402304712e-05, 'patience': 17, 'batch_size': 64}. Best is trial 10 with value: 0.21762877447264534.


Chr 1 - Best hyperparameters: {'learning_rate': 0.0020859788421430012, 'l1_coef': 1.2064301402304712e-05, 'patience': 17, 'batch_size': 64}
Chr 1 - Best value: 0.2176
Final model saved at: ../../Data/model_results_unphased_all_PRS/logistic_regression/models_unphased/chr1/final_model_chr1.pth


NameError: name 'calculate_iqs_unphased' is not defined

In [3]:
# Loop through all the training datasets and document the PRS313 SNPs in each dataset. Save this to a CSV file.

import pandas as pd
import os

data_directory = '../../Data/Filtered_unphased_training_data_union/'
output_folder = "../../Data/model_results_unphased_all_PRS/logistic_regression/csv_files/"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Initialize a list to store the PRS313 SNPs in each dataset
prs313_snps = []

for chromosome_number in range(1, 23):
    file_name = data_directory + \
        f"23AndMe_PRS313_merged_chr{chromosome_number}_matching_combined.parquet"
    data = pd.read_parquet(file_name)

    prs313_snps.append(data.filter(regex='PRS313_').columns)

# Create a DataFrame to store the PRS313 SNPs in each dataset
prs313_df = pd.DataFrame({
    'Chromosome': list(range(1, 23)),
    'PRS313 SNPs': prs313_snps,
    "Number of PRS313 SNPs": [len(snps) for snps in prs313_snps]
})

# Save the PRS313 SNPs to a CSV file
prs313_csv_file = output_folder + 'prs313_snps.csv'
prs313_df.to_csv(prs313_csv_file, index=False)
print(f"PRS313 SNPs saved at: {prs313_csv_file}")

# Print the total number of PRS313 SNPs in all datasets
total_prs313_snps = sum(prs313_df["Number of PRS313 SNPs"])
print(f"Total number of PRS313 SNPs: {total_prs313_snps}")



PRS313 SNPs saved at: ../../Data/model_results_unphased_all_PRS/logistic_regression/csv_files/prs313_snps.csv
Total number of PRS313 SNPs: 313


In [14]:
# Load the PRS313 xlsx
prs313_file = "../../Data/PRS313.xlsx"

# Load the PRS313 data
prs313_data = pd.read_excel(prs313_file)

# Get the number of PRS313 SNPs per chromosome
prs313_snps_per_chromosome = prs313_data.groupby("Chromosome")["SNPa"].count()
prs313_snps_per_chromosome

Chromosome
1     30
2     21
3     16
4     11
5     34
6     20
7     14
8     21
9     14
10    18
11    19
12    17
13     5
14     8
15     7
16    14
17     9
18     9
19     7
20     4
21     4
22    11
Name: SNPa, dtype: int64

In [15]:
prs313_df["Number of PRS313 SNPs"]

0     30
1     21
2     16
3     11
4     34
5     20
6     14
7     21
8     14
9     18
10    19
11    17
12     5
13     8
14     7
15    14
16     9
17     9
18     7
19     4
20     5
21    11
Name: Number of PRS313 SNPs, dtype: int64