In [None]:
import os

import polars as pl
import torch
from sklearn.model_selection import train_test_split

from src.data.preprocessing.preprocessor import Preprocessor, RuleTransform
from src.data.torch_tabular_dataset import TorchTabularDataset
from src.experiments.utils import (
    make_experiment_binary_perceptrone,
    make_experiments_binary_perceptrone,
)

os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"

RANDOM_SEED = 42
torch.use_deterministic_algorithms(True)
torch.backends.cudnn.benchmark = False

In [None]:
preprocessor = Preprocessor.from_rules(
    RuleTransform(
        transformer="BinaryTarget",
        columns=[
            "Diagnosis",
        ],
    ),
    RuleTransform(
        transformer="NumericFeature",
        columns=[
            "Age",
            "Ethnicity",
            "EducationLevel",
            "BMI",
            "AlcoholConsumption",
            "PhysicalActivity",
            "DietQuality",
            "SleepQuality",
            "SystolicBP",
            "DiastolicBP",
            "CholesterolTotal",
            "CholesterolLDL",
            "CholesterolHDL",
            "CholesterolTriglycerides",
            "MMSE",
            "FunctionalAssessment",
            "ADL",
            "Gender",
            "Smoking",
            "FamilyHistoryAlzheimers",
            "CardiovascularDisease",
            "Diabetes",
            "Depression",
            "HeadInjury",
            "Hypertension",
            "MemoryComplaints",
            "BehavioralProblems",
            "Confusion",
            "Disorientation",
            "PersonalityChanges",
            "DifficultyCompletingTasks",
            "Forgetfulness",
        ],
    ),
)

In [None]:
df = pl.read_csv("../data/alzheimers_disease_data.csv")
df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)


df_train_transformed = preprocessor.fit_transform(data=df_train)
df_test_transformed = preprocessor.transform(data=df_test)

dataset_train = TorchTabularDataset(
    df=df_train_transformed,
    metadata=preprocessor.metadata,
).cuda()
dataset_test = TorchTabularDataset(
    df=df_test_transformed,
    metadata=preprocessor.metadata,
).cuda()

In [None]:
result = make_experiment_binary_perceptrone(
    dataset_train=dataset_train,
    dataset_test=dataset_test,
    dim_hidden=20,
    n_hidden=1,
    weight_decays_classic=[0, 0.0001, 0.001, 0.01, 0.1],
    batch_size_inference_bayesian=64,
    sample_size_inference_bayesian=1000,
    random_seed=RANDOM_SEED,
    log_loss_init=False,
    log_loss_train=False,
)

In [None]:
result

In [None]:
make_experiments_binary_perceptrone(
    path_to_save="../experiments/alzheimers_disease/",
    dims_hidden=list(range(1, 65)),
    dataset_train=dataset_train,
    dataset_test=dataset_test,
    n_hidden=1,
    weight_decays_classic=[0, 0.0001, 0.001, 0.01, 0.1],
    batch_size_inference_bayesian=64,
    sample_size_inference_bayesian=1000,
    random_seed=RANDOM_SEED,
)