In [1]:
!pip install optuna
!pip install catboost
!pip install imblearn
!pip install xgboost

# Data Manipulation and Visualization
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Machine Learning Libraries
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.feature_selection import RFECV
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint
from imblearn.over_sampling import SMOTE
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

# Deep Learning Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Hyperparameter Optimization
import optuna

# Utilities
import os
import time
import pickle

# Google Colab and Paths
from google.colab import drive
drive.mount('/content/drive')
dataset_path = '/content/drive/My Drive/Diabetes Dataset/diabetes_dataset.csv'
save_dir = '/content/drive/My Drive/Diabetes Dataset/optuna_studies'

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Mak

# Loading the Dataset

In [2]:
df = pd.read_csv(dataset_path)
# Binary encoding for gender and location
binary_encoder = OneHotEncoder(drop='if_binary', sparse_output=False)
binary_encoded = binary_encoder.fit_transform(df[['gender', 'location']])
binary_encoded_df = pd.DataFrame(binary_encoded, columns=binary_encoder.get_feature_names_out(['gender', 'location']))
df = pd.concat([df, binary_encoded_df], axis=1)
df.drop(columns=['gender', 'location'], inplace=True)

# Since smoking_history has multiple categories, we will use label encoding
label_encoder = LabelEncoder()
df['smoking_history'] = label_encoder.fit_transform(df['smoking_history'])
scaler = StandardScaler()
num_features = ['age', 'bmi', 'hbA1c_level', 'blood_glucose_level']
df[num_features] = scaler.fit_transform(df[num_features])

# Split the features from the label
X = df.drop(columns=['diabetes'])
y = df['diabetes']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
# Remove the 'year', 'race', and 'location_' columns
columns_to_remove = ['year']+ [col for col in X.columns if col.startswith('race:')] + [col for col in X.columns if col.startswith('location_')]
X_train_cleaned = X_train_smote.drop(columns=columns_to_remove)
X_test_cleaned = X_test.drop(columns=columns_to_remove)


Results

In [None]:
model_results = {
    "GaussianNB": {
        "accuracy": 0.75,
        "precision": {"class_0": 0.99, "class_1": 0.24},
        "recall": {"class_0": 0.73, "class_1": 0.94},
        "f1_score": {"class_0": 0.84, "class_1": 0.39},
        "best_cv_score": 0.8560346281260178,
        "best_params": {'var_smoothing': 1e-06}
    },
    "KNN": {
        "accuracy": 0.93,
        "precision": {"class_0": 0.98, "class_1": 0.54},
        "recall": {"class_0": 0.94, "class_1": 0.79},
        "f1_score": {"class_0": 0.96, "class_1": 0.64},
        "best_cv_score": 0.9467395686567859,
        "best_params": {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}
    },
    "CatBoost": {
        "accuracy": 0.97,
        "precision": {"class_0": 0.97, "class_1": 0.96},
        "recall": {"class_0": 1.00, "class_1": 0.70},
        "f1_score": {"class_0": 0.99, "class_1": 0.81},
        "best_cv_score": 0.9769942405184306,
        "best_params": {'depth': 6, 'iterations': 200, 'learning_rate': 0.1}
    },
    "RandomForest": {
        "accuracy": 0.96,
        "precision": {"class_0": 0.98, "class_1": 0.79},
        "recall": {"class_0": 0.98, "class_1": 0.74},
        "f1_score": {"class_0": 0.98, "class_1": 0.76},
        "best_cv_score": 0.9729655120292324,
        "best_params": {'bootstrap': False, 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}
    },
    "XGB": {
        "accuracy": 0.97,
        "precision": {"class_0": 0.97, "class_1": 0.94},
        "recall": {"class_0": 1.00, "class_1": 0.71},
        "f1_score": {"class_0": 0.98, "class_1": 0.81},
        "best_cv_score": 0.9760651261380138,
        "best_params": {'colsample_bytree': 0.6448155608672209, 'gamma': 0.07957111980914833, 'learning_rate': 0.10694704332753689, 'max_depth': 7, 'min_child_weight': 3, 'n_estimators': 233, 'reg_alpha': 0.09750671629451425, 'reg_lambda': 1.4907487792587846, 'subsample': 0.8891584386487102}
    },
    "LogisticRegression": {
        "accuracy": 0.88,
        "precision": {"class_0": 0.99, "class_1": 0.41},
        "recall": {"class_0": 0.89, "class_1": 0.87},
        "f1_score": {"class_0": 0.93, "class_1": 0.56},
        "best_cv_score": 0.886275468881269,
        "best_params": {'C': 0.01, 'solver': 'liblinear'}
    }
}

# 2. MODEL SHOWDOWN

In [None]:
# Define the LogisticRegression model
log_reg = LogisticRegression(max_iter=1000)

# Define the parameter grid
log_reg_param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'solver': ['liblinear', 'saga']
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=log_reg, param_grid=log_reg_param_grid, cv=5, n_jobs=-1, scoring='f1')

# Fit the model
grid_search.fit(X_train_cleaned, y_train_smote)

# Get the best estimator
best_log_reg = grid_search.best_estimator_

# Evaluate the model
y_pred = best_log_reg.predict(X_test_cleaned)
print(classification_report(y_test, y_pred))

# Optionally, print the best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

              precision    recall  f1-score   support

           0       0.99      0.89      0.93     18300
           1       0.41      0.87      0.56      1700

    accuracy                           0.88     20000
   macro avg       0.70      0.88      0.75     20000
weighted avg       0.94      0.88      0.90     20000

Best parameters found:  {'C': 0.01, 'solver': 'liblinear'}
Best cross-validation score:  0.886275468881269


In [None]:


# Define the parameter distribution
xgb_param_dist = {
    'n_estimators': randint(100, 300),
    'max_depth': randint(4, 8),
    'learning_rate': uniform(0.01, 0.1),
    'subsample': uniform(0.6, 0.4),
    'colsample_bytree': uniform(0.6, 0.4),
    'gamma': uniform(0, 0.2),
    'min_child_weight': randint(1, 5),
    'reg_alpha': uniform(0, 0.1),
    'reg_lambda': uniform(1, 1)
}

# Create the RandomizedSearchCV object
random_search_xgb = RandomizedSearchCV(estimator=xgb, param_distributions=xgb_param_dist, n_iter=100, cv=5, n_jobs=-1, scoring='f1', random_state=42)

# Fit the model
random_search_xgb.fit(X_train_cleaned, y_train_smote)

# Get the best estimator
best_xgb = random_search_xgb.best_estimator_

# Evaluate the model
y_pred_xgb = best_xgb.predict(X_test_cleaned)
print(classification_report(y_test, y_pred_xgb))

# Optionally, print the best parameters and best score
print("Best parameters found: ", random_search_xgb.best_params_)
print("Best cross-validation score: ", random_search_xgb.best_score_)

Parameters: { "use_label_encoder" } are not used.



              precision    recall  f1-score   support

           0       0.97      1.00      0.98     18300
           1       0.94      0.71      0.81      1700

    accuracy                           0.97     20000
   macro avg       0.95      0.85      0.90     20000
weighted avg       0.97      0.97      0.97     20000

Best parameters found:  {'colsample_bytree': 0.6448155608672209, 'gamma': 0.07957111980914833, 'learning_rate': 0.10694704332753689, 'max_depth': 7, 'min_child_weight': 3, 'n_estimators': 233, 'reg_alpha': 0.09750671629451425, 'reg_lambda': 1.4907487792587846, 'subsample': 0.8891584386487102}
Best cross-validation score:  0.9760651261380138


In [None]:
# Define the RandomForestClassifier model
rf = RandomForestClassifier()

# Define the extended parameter grid
rf_param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=rf, param_grid=rf_param_grid, cv=5, n_jobs=-1, scoring='f1')

# Fit the model
grid_search.fit(X_train_cleaned, y_train_smote)

# Get the best estimator
best_rf = grid_search.best_estimator_

# Evaluate the model
y_pred = best_rf.predict(X_test_cleaned)
print(classification_report(y_test, y_pred))

# Optionally, print the best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

              precision    recall  f1-score   support

           0       0.98      0.98      0.98     18300
           1       0.79      0.74      0.76      1700

    accuracy                           0.96     20000
   macro avg       0.88      0.86      0.87     20000
weighted avg       0.96      0.96      0.96     20000

Best parameters found:  {'bootstrap': False, 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}
Best cross-validation score:  0.9729655120292324


In [None]:
# Define the CatBoostClassifier
catboost = CatBoostClassifier(verbose=0)

# Define the parameter grid
catboost_param_grid = {
    'iterations': [100, 200],
    'depth': [4, 6],
    'learning_rate': [0.01, 0.1]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=catboost, param_grid=catboost_param_grid, cv=5, n_jobs=-1, scoring='f1')

# Fit the model
grid_search.fit(X_train_cleaned, y_train_smote)

# Get the best estimator
best_catboost = grid_search.best_estimator_

# Evaluate the model
y_pred = best_catboost.predict(X_test_cleaned)
print(classification_report(y_test, y_pred))

# Optionally, print the best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

              precision    recall  f1-score   support

           0       0.97      1.00      0.99     18300
           1       0.96      0.70      0.81      1700

    accuracy                           0.97     20000
   macro avg       0.97      0.85      0.90     20000
weighted avg       0.97      0.97      0.97     20000

Best parameters found:  {'depth': 6, 'iterations': 200, 'learning_rate': 0.1}
Best cross-validation score:  0.9769942405184306


In [None]:
# Define the KNeighborsClassifier model
knn = KNeighborsClassifier()

# Define the parameter grid
knn_param_grid = {
    'n_neighbors': [3, 5, 7, 10, 20, 40],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=knn, param_grid=knn_param_grid, cv=5, n_jobs=-1, scoring='f1')

# Fit the model
grid_search.fit(X_train_cleaned, y_train_smote)

# Get the best estimator
best_knn = grid_search.best_estimator_

# Evaluate the model
y_pred = best_knn.predict(X_test_cleaned)
print(classification_report(y_test, y_pred))

# Optionally, print the best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

              precision    recall  f1-score   support

           0       0.98      0.94      0.96     18300
           1       0.54      0.79      0.64      1700

    accuracy                           0.93     20000
   macro avg       0.76      0.86      0.80     20000
weighted avg       0.94      0.93      0.93     20000

Best parameters found:  {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}
Best cross-validation score:  0.9467395686567859


In [None]:
# Define the GaussianNB model
gnb = GaussianNB()

# Define the parameter grid
gnb_param_grid = {
    'var_smoothing': np.logspace(-9, -6, 4)
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=gnb, param_grid=gnb_param_grid, cv=5, n_jobs=-1, scoring='f1')

# Fit the model
grid_search.fit(X_train_cleaned, y_train_smote)

# Get the best estimator
best_gnb = grid_search.best_estimator_

# Evaluate the model
y_pred = best_gnb.predict(X_test_cleaned)
print(classification_report(y_test, y_pred))

# Optionally, print the best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

              precision    recall  f1-score   support

           0       0.99      0.73      0.84     18300
           1       0.24      0.94      0.39      1700

    accuracy                           0.75     20000
   macro avg       0.62      0.84      0.61     20000
weighted avg       0.93      0.75      0.80     20000

Best parameters found:  {'var_smoothing': 1e-06}
Best cross-validation score:  0.8560346281260178


NEURAL NETS ON DRUGS

In [None]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert the data to PyTorch tensors and move them to the GPU
X_train_tensor = torch.tensor(X_train_cleaned.values, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train_smote.values, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_cleaned.values, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).to(device)

# Create DataLoader for training and testing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the neural network class
class DiabetesNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, dropout_rate, activation):
        super(DiabetesNN, self).__init__()
        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(activation)
        layers.append(nn.Dropout(dropout_rate))
        for _ in range(n_layers - 1):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(activation)
            layers.append(nn.Dropout(dropout_rate))
        layers.append(nn.Linear(hidden_dim, output_dim))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# Define the objective function for Optuna
def objective(trial):
    # Hyperparameters to tune
    hidden_dim = trial.suggest_categorical('hidden_dim', [32 * i for i in range(1, 9)])  # Multiples of 32 up to 32*8
    n_layers = trial.suggest_int('n_layers', 1, 5)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2)
    activation_name = trial.suggest_categorical('activation', ['ReLU', 'LeakyReLU', 'Tanh'])
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD', 'RMSprop'])

    # Activation function
    if activation_name == 'ReLU':
        activation = nn.ReLU()
    elif activation_name == 'LeakyReLU':
        activation = nn.LeakyReLU()
    elif activation_name == 'Tanh':
        activation = nn.Tanh()

    # Initialize the model
    model = DiabetesNN(input_dim=X_train_tensor.shape[1], hidden_dim=hidden_dim, output_dim=1, n_layers=n_layers, dropout_rate=dropout_rate, activation=activation).to(device)
    criterion = nn.BCEWithLogitsLoss()

    # Optimizer
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

    # Training loop
    model.train()
    for epoch in range(25):  # You can increase the number of epochs
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    y_pred = []
    with torch.no_grad():
        for X_batch, _ in test_loader:
            outputs = model(X_batch).squeeze()
            preds = torch.round(torch.sigmoid(outputs))
            y_pred.extend(preds.cpu().numpy())  # Move predictions back to CPU for evaluation

    # Move y_test_tensor back to CPU for evaluation
    f1 = f1_score(y_test_tensor.cpu().numpy(), y_pred)
    return f1

# Create a study object and optimize the objective function
study = optuna.create_study(direction='maximize', storage=f'sqlite:///{save_dir}/example_study.db', study_name='example_study', load_if_exists=True)
study.optimize(objective, n_trials=50)

# Save the study results to a file
with open(os.path.join(save_dir, "study.pkl"), "wb") as f:
    pickle.dump(study, f)

# Load the study results from a file
with open(os.path.join(save_dir, "study.pkl"), "rb") as f:
    study = pickle.load(f)

# Display the best trial
print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

# Function to resume the study
def resume_study(study_name, storage):
    study = optuna.load_study(study_name=study_name, storage=storage)
    study.optimize(objective, n_trials=100)
    return study

# Resume the study
study = resume_study("example_study", f'sqlite:///{save_dir}/example_study.db')

# Train the final model with the best hyperparameters
best_params = study.best_params_
activation = nn.ReLU() if best_params['activation'] == 'ReLU' else nn.LeakyReLU() if best_params['activation'] == 'LeakyReLU' else nn.Tanh()
model = DiabetesNN(input_dim=X_train_tensor.shape[1], hidden_dim=best_params['hidden_dim'], output_dim=1, n_layers=best_params['n_layers'], dropout_rate=best_params['dropout_rate'], activation=activation).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=best_params['learning_rate']) if best_params['optimizer'] == 'Adam' else optim.SGD(model.parameters(), lr=best_params['learning_rate']) if best_params['optimizer'] == 'SGD' else optim.RMSprop(model.parameters(), lr=best_params['learning_rate'])

# Training loop with gradient clipping
model.train()
for epoch in range(20):  # You can increase the number of epochs
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch).squeeze()
        loss = criterion(outputs, y_batch)
        loss.backward()

        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

# Evaluation of the final model
model.eval()
y_pred = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        outputs = model(X_batch).squeeze()
        preds = torch.round(torch.sigmoid(outputs))
        y_pred.extend(preds.cpu().numpy())  # Move predictions back to CPU for evaluation

accuracy = accuracy_score(y_test_tensor.cpu().numpy(), y_pred)  # Move y_test_tensor back to CPU for evaluation
f1 = f1_score(y_test_tensor.cpu().numpy(), y_pred)  # Move y_test_tensor back to CPU for evaluation
print("Final Model Accuracy: ", accuracy)
print("Final Model F1 Score: ", f1)

[I 2024-07-26 05:14:05,876] Using an existing study with name 'example_study' instead of creating a new one.
[I 2024-07-26 05:18:09,415] Trial 44 finished with value: 0.5871913580246914 and parameters: {'hidden_dim': 96, 'n_layers': 3, 'dropout_rate': 0.12025313227122837, 'learning_rate': 0.004336429107743327, 'activation': 'Tanh', 'optimizer': 'SGD'}. Best is trial 24 with value: 0.6571815718157181.
[I 2024-07-26 05:22:08,865] Trial 45 finished with value: 0.6024566192240203 and parameters: {'hidden_dim': 192, 'n_layers': 3, 'dropout_rate': 0.11603338633603141, 'learning_rate': 0.003182738519228813, 'activation': 'ReLU', 'optimizer': 'RMSprop'}. Best is trial 24 with value: 0.6571815718157181.
[I 2024-07-26 05:26:27,808] Trial 46 finished with value: 0.5576616474756421 and parameters: {'hidden_dim': 256, 'n_layers': 3, 'dropout_rate': 0.17126796848121623, 'learning_rate': 0.005836536400903046, 'activation': 'ReLU', 'optimizer': 'Adam'}. Best is trial 24 with value: 0.6571815718157181.

Train with best pars

In [None]:

# Manually input the best parameters
hidden_dim = 82
n_layers = 3
dropout_rate = 0.44302401141611214
learning_rate = 0.008185942513111595
activation = nn.LeakyReLU()
optimizer_name = 'RMSprop'

# Initialize the model
model = DiabetesNN(input_dim=X_train_tensor.shape[1], hidden_dim=hidden_dim, output_dim=1, n_layers=n_layers, dropout_rate=dropout_rate, activation=activation).to(device)
criterion = nn.BCEWithLogitsLoss()

# Optimizer
if optimizer_name == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
elif optimizer_name == 'SGD':
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
elif optimizer_name == 'RMSprop':
    optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

# Early stopping and learning rate scheduler
early_stopping_patience = 5
lr_scheduler_patience = 3
lr_scheduler_factor = 0.1

best_val_loss = float('inf')
early_stopping_counter = 0

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=lr_scheduler_factor, patience=lr_scheduler_patience, verbose=True)

# Training loop with early stopping and learning rate reduction
model.train()
for epoch in range(200):  # You can increase the number of epochs
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch).squeeze()
        loss = criterion(outputs, y_batch)
        loss.backward()

        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        train_loss += loss.item()
        preds = torch.round(torch.sigmoid(outputs))
        train_correct += (preds == y_batch).sum().item()
        train_total += y_batch.size(0)

    train_loss /= len(train_loader)
    train_accuracy = train_correct / train_total

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            preds = torch.round(torch.sigmoid(outputs))
            val_correct += (preds == y_batch).sum().item()
            val_total += y_batch.size(0)

    val_loss /= len(test_loader)
    val_accuracy = val_correct / val_total
    scheduler.step(val_loss)

    print(f"Epoch {epoch+1}, Train Loss: {train_loss}, Train Accuracy: {train_accuracy}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stopping_counter = 0
        # Save the best model
        torch.save(model.state_dict(), os.path.join(save_dir, "best_model.pth"))
    else:
        early_stopping_counter += 1

    if early_stopping_counter >= early_stopping_patience:
        print("Early stopping triggered")
        break

# Load the best model
model.load_state_dict(torch.load(os.path.join(save_dir, "best_model.pth")))

# Evaluation of the final model
model.eval()
y_pred = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        outputs = model(X_batch).squeeze()
        preds = torch.round(torch.sigmoid(outputs))
        y_pred.extend(preds.cpu().numpy())  # Move predictions back to CPU for evaluation

accuracy = accuracy_score(y_test_tensor.cpu().numpy(), y_pred)  # Move y_test_tensor back to CPU for evaluation
f1 = f1_score(y_test_tensor.cpu().numpy(), y_pred)  # Move y_test_tensor back to CPU for evaluation
print("Final Model Accuracy: ", accuracy)
print("Final Model F1 Score: ", f1)