# Logistic Regression

In [1]:
import torch
import torch.nn as nn
import os
import pandas as pd

# Define the multinomial logistic regression model with L1 regularization
class MulticlassLogisticRegression(nn.Module):
    def __init__(self, input_dim, output_dim, num_classes=3, l1_coef=0.0):
        super(MulticlassLogisticRegression, self).__init__()
        self.num_classes = num_classes
        self.linear = nn.ModuleList([nn.Linear(input_dim, output_dim) for _ in range(num_classes)])
        self.l1_coef = l1_coef

    def forward(self, x):
        out = torch.stack([linear(x) for linear in self.linear], dim=-1)
        out = nn.functional.softmax(out, dim=-1)
        return out

# Convert models to ONNX format
data_directory = '../../Data/Filtered_unphased_training_data_union_final/'
start = 1
model_folder = "../../Data/model_results_unphased_all_PRS/logistic_regression/models_unphased/"
onnx_folder = "../../Data/model_results_unphased_all_PRS/logistic_regression/onnx_models/"

os.makedirs(onnx_folder, exist_ok=True)

for chromosome_number in range(start, 23):
    # Paths for the current chromosome
    chr_model_folder = model_folder + f"chr{chromosome_number}/"
    chr_onnx_folder = onnx_folder
    os.makedirs(chr_onnx_folder, exist_ok=True)

    # Load data to determine input dimensions
    file_name = data_directory + f"23AndMe_PRS313_merged_chr{chromosome_number}_matching_combined.parquet"
    data = pd.read_parquet(file_name)
    X = torch.tensor(data.filter(regex='^(?!.*PRS313_)').values, dtype=torch.float32)

    # Define and load the model
    model = MulticlassLogisticRegression(X.shape[1], data.filter(regex='PRS313_').shape[1])
    model_save_path = chr_model_folder + f'final_model_chr{chromosome_number}.pth'
    model.load_state_dict(torch.load(model_save_path, map_location=torch.device('cpu')))
    model.eval()

    # Convert to ONNX
    dummy_input = torch.randn(1, X.shape[1])  # Create a dummy input tensor
    onnx_save_path = chr_onnx_folder + f'final_model_chr{chromosome_number}.onnx'
    torch.onnx.export(model, dummy_input, onnx_save_path, input_names=['input'], output_names=['output'])
    print(f"Model saved to ONNX format at: {onnx_save_path}")


Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/logistic_regression/onnx_models/final_model_chr1.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/logistic_regression/onnx_models/final_model_chr2.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/logistic_regression/onnx_models/final_model_chr3.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/logistic_regression/onnx_models/final_model_chr4.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/logistic_regression/onnx_models/final_model_chr5.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/logistic_regression/onnx_models/final_model_chr6.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/logistic_regression/onnx_models/final_model_chr7.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/logistic_regression/onnx_models/final_

# Linear Regression

In [2]:
import torch
import torch.nn as nn
import os
import pandas as pd

# Define the linear regression model with L1 regularization
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim, l1_coef=0.0):
        super(LinearRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        self.l1_coef = l1_coef

    def forward(self, x):
        out = self.linear(x)
        return out

    def l1_loss(self):
        return self.l1_coef * torch.norm(self.linear.weight, p=1)

# Convert models to ONNX format
data_directory = '../../Data/Filtered_unphased_training_data_union_final/'
start = 1
model_folder = "../../Data/model_results_unphased_all_PRS/linear_regression/models_unphased/"
onnx_folder = "../../Data/model_results_unphased_all_PRS/linear_regression/onnx_models/"

os.makedirs(onnx_folder, exist_ok=True)

for chromosome_number in range(start, 23):
    # Paths for the current chromosome
    chr_model_folder = model_folder + f"chr{chromosome_number}/"
    chr_onnx_folder = onnx_folder
    os.makedirs(chr_onnx_folder, exist_ok=True)

    # Load data to determine input dimensions
    file_name = data_directory + f"23AndMe_PRS313_merged_chr{chromosome_number}_matching_combined.parquet"
    data = pd.read_parquet(file_name)
    X = torch.tensor(data.filter(regex='^(?!.*PRS313_)').values, dtype=torch.float32)

    # Define and load the model
    model = LinearRegression(X.shape[1], data.filter(regex='PRS313_').shape[1])
    model_save_path = chr_model_folder + f'final_model_chr{chromosome_number}.pth'
    model.load_state_dict(torch.load(model_save_path, map_location=torch.device('cpu')))
    model.eval()

    # Convert to ONNX
    dummy_input = torch.randn(1, X.shape[1])  # Create a dummy input tensor
    onnx_save_path = chr_onnx_folder + f'final_model_chr{chromosome_number}.onnx'
    torch.onnx.export(model, dummy_input, onnx_save_path, input_names=['input'], output_names=['output'])
    print(f"Model saved to ONNX format at: {onnx_save_path}")


Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/linear_regression/onnx_models/final_model_chr1.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/linear_regression/onnx_models/final_model_chr2.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/linear_regression/onnx_models/final_model_chr3.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/linear_regression/onnx_models/final_model_chr4.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/linear_regression/onnx_models/final_model_chr5.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/linear_regression/onnx_models/final_model_chr6.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/linear_regression/onnx_models/final_model_chr7.onnx
Model saved to ONNX format at: ../../Data/model_results_unphased_all_PRS/linear_regression/onnx_models/final_model_chr8.onnx
