# Libraries

In [None]:
!pip install catboost

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVC
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import GridSearchCV
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import StackingClassifier
#from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import joblib
import tensorflow as tf
from tensorflow import keras

%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
sns.set_theme(context='notebook', palette='muted', style='darkgrid')

In [None]:
main_df = pd.read_csv("/content/alzheimers_disease_data.csv")
main_df.head().T

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
main_df.info()

In [None]:
main_df.describe().T

In [None]:
sum(main_df.duplicated())

In [None]:
len(main_df.columns)

In [None]:
main_df.drop(['PatientID', 'DoctorInCharge'], axis=1, inplace=True)
len(main_df.columns)

<div style="padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px 0 rgba(0, 0, 0, 0.1); border: 2px solid #888888;">
    <h1 style="font-size: 24px; font-family: 'Arial'; color: #c77220"><b>Initial Data Exploration Summary</b></h1>
    <ul style="font-size: 20px; font-family: 'Arial'; line-height: 1.5em;">
        <li>The dataset contains a total of <strong>2,149 observations</strong>.</li>
        <li>All values in the dataset are <strong>non-null</strong> and <strong>numerical</strong>.</li>
        <li>There are <strong>no duplicate</strong> records.</li>
        <li>After removing the `DoctorInCharge` and `PatientID` columns, the dataset consists of <strong>33 features</strong>.</li>
    </ul>
</div>

# Data Visualization

In [None]:
# Identify numerical columns: columns with more than 10 unique values are considered numerical
numerical_columns = [col for col in main_df.columns if main_df[col].nunique() > 10]

# Identify categorical columns: columns that are not numerical and not 'Diagnosis'
categorical_columns = main_df.columns.difference(numerical_columns).difference(['Diagnosis']).to_list()

In [None]:
# Custom labels for the categorical columns
custom_labels = {
    'Gender': ['Male', 'Female'],
    'Ethnicity': ['Caucasian', 'African American', 'Asian', 'Other'],
    'EducationLevel': ['None', 'High School', 'Bachelor\'s', 'Higher'],
    'Smoking': ['No', 'Yes'],
    'FamilyHistoryAlzheimers': ['No', 'Yes'],
    'CardiovascularDisease': ['No', 'Yes'],
    'Diabetes': ['No', 'Yes'],
    'Depression': ['No', 'Yes'],
    'HeadInjury': ['No', 'Yes'],
    'Hypertension': ['No', 'Yes'],
    'MemoryComplaints': ['No', 'Yes'],
    'BehavioralProblems': ['No', 'Yes'],
    'Confusion': ['No', 'Yes'],
    'Disorientation': ['No', 'Yes'],
    'PersonalityChanges': ['No', 'Yes'],
    'DifficultyCompletingTasks': ['No', 'Yes'],
    'Forgetfulness': ['No', 'Yes']
}

# Plot countplots for each categorical column
for column in categorical_columns:
    plt.figure(figsize=(8, 5))
    sns.countplot(data=main_df, x=column)
    plt.title(f'Countplot of {column}')

    # Directly set custom labels
    labels = custom_labels[column]
    ticks = range(len(labels))
    plt.xticks(ticks=ticks, labels=labels)

    plt.show()

<div style="padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px 0 rgba(0, 0, 0, 0.1); border: 2px solid #888888;">
    <h1 style="font-size: 24px; font-family: 'Arial'; color: #c77220"><b>Observations from Visualization of Categorical Features</b></h1>
    <ul style="font-size: 20px; font-family: 'Arial'; line-height: 1.5em;">
        <li>Overall, the dataset predominantly consists of individuals <strong>without disease or health problems</strong>.</li>
        <li><strong>Caucasian</strong>. The most represented demographic is</li>
        <li><strong>High school</strong> graduates constitute the largest educational group, closely followed by individuals with a <strong>bachelor's</strong> degree.</li>
        <li>Notably, both <strong>females</strong> and <strong>males</strong> are equally represented across the dataset.</li>
    </ul>
</div>

In [None]:
# Plot histogram for each numerical column
for column in numerical_columns:
    plt.figure(figsize=(8, 5))
    sns.histplot(data=main_df, x=column, kde=True, bins=20)
    plt.title(f'Distribution of {column}')
    plt.show()

<div style="padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px 0 rgba(0, 0, 0, 0.1); border: 2px solid #888888;">
    <h1 style="font-size: 24px; font-family: 'Arial'; color: #c77220"><b>Observations from Visualization of Numerical Features</b></h1>
    <ul style="font-size: 20px; font-family: 'Arial'; line-height: 1.5em;">
        <li>Most of the columns show a <strong>fairly uniform</strong> distribution. </li>
        <li>The `MMSE` (Mini-Mental State Examination) scores appear to follow a <strong>bimodal</strong> distribution, indicating two distinct groups within the data.</li>
    </ul>
</div>

In [None]:
# Create a mask for the upper triangle
mask = np.triu(np.ones_like(main_df.corr(), dtype=bool))

# Plot heatmap of the correlation matrix
plt.figure(figsize=(12, 10))
sns.heatmap(main_df.corr(),cmap="coolwarm", cbar_kws={"shrink": .5}, mask=mask)

plt.show()

The heatmap reveals that the features do not have any strong correlations among themselves. However, there are five columns that show a correlation **with the target variable**.

In [None]:
# Compute Pearson correlation coefficients
correlations = main_df.corr(numeric_only=True)['Diagnosis'][:-1].sort_values()

# Set the size of the figure
plt.figure(figsize=(20, 7))

# Create a bar plot of the Pearson correlation coefficients
ax = correlations.plot(kind='bar', width=0.7)

# Set the y-axis limits and labels
ax.set(ylim=[-1, 1], ylabel='Pearson Correlation', xlabel='Features',
       title='Pearson Correlation with Diagnosis')

# Rotate x-axis labels for better readability
ax.set_xticklabels(correlations.index, rotation=45, ha='right')

plt.tight_layout()
plt.show()

<div style="padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px 0 rgba(0, 0, 0, 0.1); border: 2px solid #888888;">
    <h1 style="font-size: 24px; font-family: 'Arial'; color: #c77220"><b>Observations from Exploring Correlations</b></h1>
    <ul style="font-size: 20px; font-family: 'Arial'; line-height: 1.5em;">
        <li>As observed, there are five columns correlated with the target variable.</li>
        <li>Three numerical features—`Functional Assessment`, `ADL` (Activities of Daily Living), and `MMSE` (Mini-Mental State Examination)—are <strong>negatively</strong> correlated with the `diagnosis of Alzheimer's disease`, with correlation coefficients of -0.36, -0.33, and -0.24 respectively. This indicates that lower scores in these assessments are associated with a higher likelihood of an Alzheimer's diagnosis.</li>
        <li>Additionally, two categorical variables—`Behavioral Problems` and `Memory Complaints`—are <strong>positively</strong> correlated with the `diagnosis`, with correlation coefficients of 0.22 and 0.30 respectively. This means the presence of these issues is associated with a higher likelihood of an Alzheimer's diagnosis, highlighting their significance in the diagnostic process.</li>
    </ul>
</div>

In [None]:
# Define the Response categories and count occurences
categories = [0, 1]
counts = main_df.Diagnosis.value_counts().tolist()

# Choose a color palette from Seaborn for the pie chart
colors = sns.color_palette("muted")

# Plot the pie chart with the counts of each response category
plt.figure(figsize=(6, 6))
plt.pie(counts, labels=categories, autopct='%1.1f%%', startangle=140, colors=colors)
plt.title('Diagnosis Distribution')
plt.show()

In [None]:
main_df['Diagnosis'].value_counts()

<div style="padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px 0 rgba(0, 0, 0, 0.1); border: 2px solid #888888;">
    <h1 style="font-size: 24px; font-family: 'Arial'; color: #c77220"><b>Observation from the Target Distribution</b></h1>
    <ul style="font-size: 20px; font-family: 'Arial'; line-height: 1.5em;">
        <li>The target variable is <strong>moderately imbalanced</strong>, with 65% of instances being 0 and 35% being 1.</li>
    </ul>
</div>

# Data Preprocessing

In [None]:
for i in main_df['Gender']:
  if i == 0:
    main_df['Gender'].replace(0, 'Male', inplace=True)
  else:
    main_df['Gender'].replace(1, 'Female', inplace=True)

In [None]:
for i in main_df['Ethnicity']:
  if i == 0:
    main_df['Ethnicity'].replace(0, 'Caucasian', inplace=True)
  elif i == 1:
    main_df['Ethnicity'].replace(1, 'African American', inplace=True)
  elif i == 2:
    main_df['Ethnicity'].replace(2, 'Asian', inplace=True)
  else:
    main_df['Ethnicity'].replace(3, 'Other', inplace=True)

In [None]:
for i in main_df['EducationLevel']:
  if i == 0:
    main_df['EducationLevel'].replace(0, 'None', inplace=True)
  elif i == 1:
    main_df['EducationLevel'].replace(1, 'High School', inplace=True)
  elif i == 2:
    main_df['EducationLevel'].replace(2, 'Bachelor\'s', inplace=True)
  else:
    main_df['EducationLevel'].replace(3, 'Higher', inplace=True)

In [None]:
main_df.head()

In [None]:
ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False).set_output(transform="pandas")
ohe_transform = ohe.fit_transform(main_df[['Gender']])
main_df = pd.concat([main_df, ohe_transform], axis=1).drop(columns=['Gender'])
main_df.head()

In [None]:
ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False).set_output(transform="pandas")
ohe_transform = ohe.fit_transform(main_df[['Ethnicity']])
main_df = pd.concat([main_df, ohe_transform], axis=1).drop(columns=['Ethnicity'])
main_df.head()

In [None]:
ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False).set_output(transform="pandas")
ohe_transform = ohe.fit_transform(main_df[['EducationLevel']])
main_df = pd.concat([main_df, ohe_transform], axis=1).drop(columns=['EducationLevel'])
main_df.head()

In [None]:
#unique values in each column
for column in main_df.columns:
    unique_values = main_df[column].unique()
    print(f"Unique values in column '{column}':")
    print(unique_values)
    print()

In [None]:
#split data into features and target
X = main_df.drop(columns = ['Diagnosis'])
y = main_df['Diagnosis']

In [None]:
#split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 4646, shuffle = True, stratify = y)

# Scaling Columns

In [None]:
columns_to_scale = continuous_features = ["Age", "BMI", "AlcoholConsumption", "PhysicalActivity", "DietQuality", "SleepQuality", "SystolicBP", "DiastolicBP", "CholesterolTotal", "CholesterolLDL", "CholesterolHDL", "CholesterolTriglycerides", "MMSE", "FunctionalAssessment", "ADL"]


# Standard Scaling

In [None]:
scaler = StandardScaler()

X_train_standardized = X_train.copy()
X_train_standardized = scaler.fit_transform(X_train[columns_to_scale])

X_test_standardized = X_test.copy()
X_test_standardized = scaler.transform(X_test[columns_to_scale])

## Resampling

In [None]:
smote = SMOTE(sampling_strategy='minority')
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_standardized, y_train)

In [None]:
smote = SMOTE(sampling_strategy='minority')
X_train_resampled2, y_train_resampled2 = smote.fit_resample(X_train, y_train)

In [None]:
y_train_resampled.value_counts()

In [None]:
y_train_resampled2.value_counts()

## Training-01

In [None]:
best_estimators = {}

In [None]:
#define hyperparameter grids for each model
param_grids = {
    'K-Nearest Neighbors': {'n_neighbors': [3, 5, 7]},
    'Logistic Regression': {'C': [0.1, 1, 10]},
    'Support Vector Machine': {'C': [0.1, 1, 10], 'gamma': [0.1, 1, 'scale', 'auto']}
}

#instantiate classification models with default parameters
models = {
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Support Vector Machine': SVC()
}

#fit models using GridSearchCV for hyperparameter tuning
for name, model in models.items():
    grid_search = GridSearchCV(model, param_grids[name], cv = 5, scoring = 'f1')
    grid_search.fit(X_train_resampled, y_train_resampled)
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test_standardized)
    report = classification_report(y_test, y_pred)
    best_estimators1 = {}
    best_estimators1[name] = grid_search.best_estimator_
    best_estimators.update(best_estimators1)
    print(f'{name} Classification Report:\n{report}\nBest Parameters: {grid_search.best_params_}\n')

    roc_auc_score = roc_auc_score(y_test, y_pred)
    print(f'ROC AUC Score: {roc_auc_score}\n')

In [None]:
#define hyperparameter grids for each model
param_grids = {
    'Decision Tree': {'max_depth': [3, 5, 7, 12, None]},
    'Random Forest': {'n_estimators': [50, 100, 200], 'max_depth': [3, 5, 7, 12, None]},
    'XGBoost': {'n_estimators': [50, 100, 200], 'learning_rate': [0.01, 0.1, 1], 'max_depth': [3, 5, 7]},
    'GradientBoostingClassifier': {'n_estimators': [100, 200], 'learning_rate': [0.01, 0.1, 0.5]},
    'CatBoost': {'iterations': [50, 100, 200], 'learning_rate': [0.01, 0.1, 1]}
}

#instantiate classification models with default parameters
models = {
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'XGBoost': XGBClassifier(),
    'GradientBoostingClassifier': GradientBoostingClassifier(random_state=4646),
    'CatBoost': CatBoostClassifier(verbose=0)
}

#fit models using GridSearchCV for hyperparameter tuning
for name, model in models.items():
    grid_search = GridSearchCV(model, param_grids[name], cv = 5, scoring = 'f1')
    grid_search.fit(X_train_resampled2, y_train_resampled2)
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    report = classification_report(y_test, y_pred)
    best_estimators1 = {}
    best_estimators1[name] = grid_search.best_estimator_
    best_estimators.update(best_estimators1)
    print(f'{name} Classification Report:\n{report}\nBest Parameters: {grid_search.best_params_}\n')

## Ensemble Learning

### Pipelines

In [None]:
catBoost_pipeline = Pipeline([
    ('smote', SMOTE(sampling_strategy='minority')),
    ('CatBoost', best_estimators['CatBoost'])
])

xgboost_pipeline = Pipeline([
    ('smote', SMOTE(sampling_strategy='minority')),
    ('XGBoost', best_estimators['XGBoost'])
])

gradientBoosting_pipeline = Pipeline([
    ('smote', SMOTE(sampling_strategy='minority')),
    ('GradientBoostingClassifier', best_estimators['GradientBoostingClassifier'])
])

svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('smote', SMOTE(sampling_strategy='minority')),
    ('SVM', best_estimators['Support Vector Machine'])
])

knn_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('smote', SMOTE(sampling_strategy='minority')),
    ('KNN', best_estimators['K-Nearest Neighbors'])
])

logistic_regression_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('smote', SMOTE(sampling_strategy='minority')),
    ('LogisticRegression', best_estimators['Logistic Regression'])
])

In [None]:
estimators = [
    ('CatBoost', catBoost_pipeline),
    ('XGBoost', xgboost_pipeline),
    ('GradientBoostingClassifier', gradientBoosting_pipeline),
    ('Support Vector Machine', svm_pipeline)
]

# Define the final estimator (meta-model)
final_estimator = GradientBoostingClassifier()

# Create the stacking classifier
stacking_model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=5)

# Fit the stacking classifier on the training data
stacking_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = stacking_model.predict(X_test)

# Evaluate the model
report = classification_report(y_test, y_pred)
print(f'Stacking Classifier Classification Report:\n{report}\n')

# Evaluate stacking model
y_pred_stacking = stacking_model.predict(X_test)
if hasattr(stacking_model, "predict_proba"):
    y_pred_proba_stacking = stacking_model.predict_proba(X_test)[:, 1]
else:
    y_pred_proba_stacking = None

stack_roc_auc_score = roc_auc_score(y_test, y_pred_proba_stacking) if y_pred_proba_stacking is not None else 'N/A'
print(f'Stacking Classifier ROC AUC Score: {stack_roc_auc_score}')

In [None]:
estimators = [
    ('CatBoost', catBoost_pipeline),
    ('XGBoost', xgboost_pipeline)
]

# Define the final estimator (meta-model)
final_estimator = LogisticRegression()

# Create the stacking classifier
stacking_model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=5)

# Fit the stacking classifier on the training data
stacking_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = stacking_model.predict(X_test)

# Evaluate the model
report = classification_report(y_test, y_pred)
print(f'Stacking Classifier Classification Report:\n{report}\n')

# Evaluate stacking model
y_pred_stacking = stacking_model.predict(X_test)
if hasattr(stacking_model, "predict_proba"):
    y_pred_proba_stacking = stacking_model.predict_proba(X_test)[:, 1]
else:
    y_pred_proba_stacking = None

stack_roc_auc_score = roc_auc_score(y_test, y_pred_proba_stacking) if y_pred_proba_stacking is not None else 'N/A'
print(f'Stacking Classifier ROC AUC Score: {stack_roc_auc_score}')

In [None]:
estimators = [
    ('CatBoost', catBoost_pipeline),
    ('XGBoost', xgboost_pipeline)
]

# Define the final estimator (meta-model)
final_estimator = KNeighborsClassifier(n_neighbors=3)

# Create the stacking classifier
stacking_model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=5)

# Fit the stacking classifier on the training data
stacking_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = stacking_model.predict(X_test)

# Evaluate the model
report = classification_report(y_test, y_pred)
print(f'Stacking Classifier Classification Report:\n{report}\n')

# Evaluate stacking model
y_pred_stacking = stacking_model.predict(X_test)
if hasattr(stacking_model, "predict_proba"):
    y_pred_proba_stacking = stacking_model.predict_proba(X_test)[:, 1]
else:
    y_pred_proba_stacking = None

stack_roc_auc_score = roc_auc_score(y_test, y_pred_proba_stacking) if y_pred_proba_stacking is not None else 'N/A'
print(f'Stacking Classifier ROC AUC Score: {stack_roc_auc_score}')

In [None]:
estimators = [
    ('CatBoost', catBoost_pipeline),
    ('XGBoost', xgboost_pipeline)
]

# Define the final estimator (meta-model)
final_estimator = SVC(C=1.0, gamma='auto')

# Create the stacking classifier
stacking_model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=5)

# Fit the stacking classifier on the training data
stacking_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = stacking_model.predict(X_test)

# Evaluate the model
report = classification_report(y_test, y_pred)
print(f'Stacking Classifier Classification Report:\n{report}\n')

# Evaluate stacking model
y_pred_stacking = stacking_model.predict(X_test)
if hasattr(stacking_model, "predict_proba"):
    y_pred_proba_stacking = stacking_model.predict_proba(X_test)[:, 1]
else:
    y_pred_proba_stacking = None

stack_roc_auc_score = roc_auc_score(y_test, y_pred_proba_stacking) if y_pred_proba_stacking is not None else 'N/A'
print(f'Stacking Classifier ROC AUC Score: {stack_roc_auc_score}')

In [None]:
estimators = [
    ('CatBoost', catBoost_pipeline),
    ('XGBoost', xgboost_pipeline)
]

# Define the final estimator (meta-model)
final_estimator = LogisticRegression(C=0.1)

# Create the stacking classifier
stacking_model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=5)

# Fit the stacking classifier on the training data
stacking_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = stacking_model.predict(X_test)

# Evaluate the model
report = classification_report(y_test, y_pred)
print(f'Stacking Classifier Classification Report:\n{report}\n')

# Evaluate stacking model
y_pred_stacking = stacking_model.predict(X_test)
if hasattr(stacking_model, "predict_proba"):
    y_pred_proba_stacking = stacking_model.predict_proba(X_test)[:, 1]
else:
    y_pred_proba_stacking = None

stack_roc_auc_score = roc_auc_score(y_test, y_pred_proba_stacking) if y_pred_proba_stacking is not None else 'N/A'
print(f'Stacking Classifier ROC AUC Score: {stack_roc_auc_score}')

## Save

In [None]:
# The path to save the model
model_path = '/content/Stack_std_LR_model3.joblib'

# Save the model
joblib.dump(stacking_model, model_path)

print(f"Model saved successfully at: {model_path}")

## Load

In [None]:
model_path = '/content/stack_model2.joblib'
loaded_model = joblib.load(model_path)

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

y_pred = loaded_model.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=loaded_model.classes_, yticklabels=loaded_model.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

y_pred = loaded_model.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=loaded_model.classes_, yticklabels=loaded_model.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

y_pred = loaded_model.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=loaded_model.classes_, yticklabels=loaded_model.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

## Test Loading

In [None]:
# Make predictions on the test data
y_pred = loaded_model.predict(X_test)

# Evaluate the model
report = classification_report(y_test, y_pred)
print(f'Stacking Classifier Classification Report:\n{report}\n')

# Evaluate stacking model
y_pred_stacking = loaded_model.predict(X_test)
if hasattr(loaded_model, "predict_proba"):
    y_pred_proba_stacking = loaded_model.predict_proba(X_test)[:, 1]
else:
    y_pred_proba_stacking = None

stack_roc_auc_score = roc_auc_score(y_test, y_pred_proba_stacking) if y_pred_proba_stacking is not None else 'N/A'
print(f'Stacking Classifier ROC AUC Score: {stack_roc_auc_score}')

# SHAP

In [None]:
import numpy as np
import shap

# Configure base estimators with probability predictions
estimators = [
    ('CatBoost', CatBoostClassifier(iterations=200, learning_rate=0.1, verbose=False)),
    ('XGBoost', XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=3))
]

# Define the final estimator (meta-model)
final_estimator = LogisticRegression(C=0.1)

# Create the stacking classifier with predict_proba method
stacking_model = StackingClassifier(
    estimators=estimators,
    final_estimator=final_estimator,
    cv=5,
    stack_method='predict_proba'  # Use probability predictions for stacking
)

# Fit the stacking classifier on the training data
stacking_model.fit(X_train_resampled2, y_train_resampled2)

In [None]:
# Make predictions on the test data
y_pred = stacking_model.predict(X_test)

# Evaluate the model
report = classification_report(y_test, y_pred)
print(f'Stacking Classifier Classification Report:\n{report}\n')

# Evaluate stacking model
y_pred_stacking = stacking_model.predict(X_test)
if hasattr(stacking_model, "predict_proba"):
    y_pred_proba_stacking = stacking_model.predict_proba(X_test)[:, 1]
else:
    y_pred_proba_stacking = None

stack_roc_auc_score = roc_auc_score(y_test, y_pred_proba_stacking) if y_pred_proba_stacking is not None else 'N/A'
print(f'Stacking Classifier ROC AUC Score: {stack_roc_auc_score}')

In [None]:
X_train.columns

In [None]:
import shap
shap.initjs()

In [None]:
# Function to get predictions from the stacking model
def predict_proba_wrapper(X):
    # Ensure the input is in the right format
    if isinstance(X, np.ndarray):
        X = pd.DataFrame(X, columns=X_test.columns)
    return stacking_model.predict_proba(X)[:, 1]  # Return only positive class probabilities

# Create a background dataset for SHAP
n_background = 100  # Number of background samples
background_data = shap.sample(X_train_resampled2, n_background)

# Initialize KernelExplainer with the wrapper function
explainer = shap.KernelExplainer(predict_proba_wrapper, background_data)

# Calculate SHAP values for a small test sample
n_samples = 10  # Number of test samples to explain
test_sample = X_test.iloc[:n_samples]
shap_values = explainer.shap_values(test_sample)

# Create summary plot
plt.figure(figsize=(12, 8))
shap.summary_plot(shap_values, test_sample, feature_names=X_test.columns)
plt.title("SHAP Summary Plot for Stacking Model")
plt.tight_layout()
plt.show()

# Create feature importance bar plot
plt.figure(figsize=(12, 8))
shap.summary_plot(shap_values, test_sample, feature_names=X_test.columns, plot_type="bar")
plt.title("Feature Importance Based on SHAP Values")
plt.tight_layout()
plt.show()

# Calculate and display feature importance
feature_importance = pd.DataFrame({
    'Feature': X_test.columns,
    'Importance': np.abs(shap_values).mean(0)
})
feature_importance = feature_importance.sort_values('Importance', ascending=False)

print("\nTop 10 Most Important Features:")
print(feature_importance.head(10))

# ANN

In [None]:
#split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 4646, shuffle = True, stratify = y)

X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size = 0.3, random_state = 4646, shuffle = True, stratify = y_test)

## Standard Scaling

In [None]:
scaler = StandardScaler()

X_train_standardized = X_train.copy()
X_train_standardized = scaler.fit_transform(X_train[columns_to_scale])

X_test_standardized = X_test.copy()
X_test_standardized = scaler.transform(X_test[columns_to_scale])

X_valid_standardized = X_valid.copy()
X_valid_standardized = scaler.transform(X_valid[columns_to_scale])

In [None]:
smote = SMOTE(sampling_strategy='minority')
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_standardized, y_train)
y_train_resampled.value_counts()

In [None]:
def plot_history(history):
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
  ax1.plot(history.history['loss'], label='loss')
  ax1.plot(history.history['val_loss'], label='val_loss')
  ax1.set_xlabel('Epoch')
  ax1.set_ylabel('Binary crossentropy')
  ax1.grid(True)

  ax2.plot(history.history['accuracy'], label='accuracy')
  ax2.plot(history.history['val_accuracy'], label='val_accuracy')
  ax2.set_xlabel('Epoch')
  ax2.set_ylabel('Accuracy')
  ax2.grid(True)

  plt.show()

In [None]:
def train_model(X_train, y_train, num_nodes, dropout_prob, lr, batch_size, epochs):
  nn_model = tf.keras.Sequential([
      tf.keras.layers.Dense(num_nodes, activation='relu', input_shape=(X_train.shape[1],)),
      tf.keras.layers.Dropout(dropout_prob),
      tf.keras.layers.Dense(num_nodes, activation='relu'),
      tf.keras.layers.Dropout(dropout_prob),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])

  nn_model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss='binary_crossentropy',
                  metrics=['accuracy'])
  history = nn_model.fit(
    X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_valid_standardized, y_valid), verbose=0
  )

  return nn_model, history

In [None]:
least_val_loss = float('inf')
least_loss_model = None
epochs=100
for num_nodes in [52, 78, 32]:
  for dropout_prob in[0, 0.2]:
    for lr in [0.01, 0.005, 0.001]:
      for batch_size in [32, 64, 128]:
        print(f"{num_nodes} nodes, dropout {dropout_prob}, lr {lr}, batch size {batch_size}")
        model, history = train_model(X_train_resampled, y_train_resampled, num_nodes, dropout_prob, lr, batch_size, epochs)
        plot_history(history)
        val_loss = model.evaluate(X_valid_standardized, y_valid)[0]
        if val_loss < least_val_loss:
          least_val_loss = val_loss
          least_loss_model = model

In [None]:
y_pred = least_loss_model.predict(X_test_standardized)
y_pred = (y_pred > 0.5).astype(int).reshape(-1,)

In [None]:
print(classification_report(y_test, y_pred))

## Save

In [None]:
path = '/content/ann_standardized.keras'
least_loss_model.save(path)

## Load

In [None]:
loaded_model = tf.keras.models.load_model(path)

# Min-Max Scaling

In [None]:
scaler = MinMaxScaler()

X_train_normalized = X_train.copy()
X_train_normalized = scaler.fit_transform(X_train[columns_to_scale])

X_test_normalized = X_test.copy()
X_test_normalized = scaler.transform(X_test[columns_to_scale])

## Resampling

In [None]:
smote = SMOTE(sampling_strategy='minority')
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_normalized, y_train)

In [None]:
smote = SMOTE(sampling_strategy='minority')
X_train_resampled2, y_train_resampled2 = smote.fit_resample(X_train, y_train)

## Training-01

In [None]:
best_estimators2 = {}

In [None]:
#define hyperparameter grids for each model
param_grids = {
    'K-Nearest Neighbors': {'n_neighbors': [3, 5, 7]},
    'Logistic Regression': {'C': [0.1, 1, 10]},
    'Support Vector Machine': {'C': [0.1, 1, 10], 'gamma': [0.1, 1, 50, 100, 'scale', 'auto']}
}

#instantiate classification models with default parameters
models = {
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Support Vector Machine': SVC()
}

#fit models using GridSearchCV for hyperparameter tuning
for name, model in models.items():
    grid_search = GridSearchCV(model, param_grids[name], cv = 5, scoring = 'f1')
    grid_search.fit(X_train_resampled, y_train_resampled)
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test_normalized)
    report = classification_report(y_test, y_pred)
    best_estimators1 = {}
    best_estimators1[name] = grid_search.best_estimator_
    best_estimators2.update(best_estimators1)
    print(f'{name} Classification Report:\n{report}\nBest Parameters: {grid_search.best_params_}\n')

In [None]:
#define hyperparameter grids for each model
param_grids = {
    'Decision Tree': {'max_depth': [3, 5, 7, 12, None]},
    'Random Forest': {'n_estimators': [50, 100, 200], 'max_depth': [3, 5, 7, 12, None]},
    'XGBoost': {'n_estimators': [50, 100, 200], 'learning_rate': [0.01, 0.1, 1], 'max_depth': [3, 5, 7]},
    'GradientBoostingClassifier': {'n_estimators': [100, 200], 'learning_rate': [0.01, 0.1, 0.5]},
    'CatBoost': {'iterations': [50, 100, 200], 'learning_rate': [0.01, 0.1, 1]}
}

#instantiate classification models with default parameters
models = {
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'XGBoost': XGBClassifier(),
    'GradientBoostingClassifier': GradientBoostingClassifier(random_state=4646),
    'CatBoost': CatBoostClassifier(verbose=0)
}

#fit models using GridSearchCV for hyperparameter tuning
for name, model in models.items():
    grid_search = GridSearchCV(model, param_grids[name], cv = 5, scoring = 'f1')
    grid_search.fit(X_train_resampled2, y_train_resampled2)
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    report = classification_report(y_test, y_pred)
    best_estimators1 = {}
    best_estimators1[name] = grid_search.best_estimator_
    best_estimators2.update(best_estimators1)
    print(f'{name} Classification Report:\n{report}\nBest Parameters: {grid_search.best_params_}\n')

## Ensemble Learning

In [None]:
estimators = [
    ('CatBoost', catBoost_pipeline),
    ('XGBoost', xgboost_pipeline),
    ('GradientBoostingClassifier', gradientBoosting_pipeline),
    ('Support Vector Machine', svm_pipeline)
]

# Define the final estimator (meta-model)
final_estimator = GradientBoostingClassifier(random_state=4646)

# Create the stacking classifier
stacking_model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=5)

# Fit the stacking classifier on the training data
stacking_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = stacking_model.predict(X_test)

# Evaluate the model
report = classification_report(y_test, y_pred)
print(f'Stacking Classifier Classification Report:\n{report}\n')

# Evaluate stacking model
y_pred_stacking = stacking_model.predict(X_test)
if hasattr(stacking_model, "predict_proba"):
    y_pred_proba_stacking = stacking_model.predict_proba(X_test)[:, 1]
else:
    y_pred_proba_stacking = None

stack_roc_auc_score = roc_auc_score(y_test, y_pred_proba_stacking) if y_pred_proba_stacking is not None else 'N/A'
print(f'Stacking Classifier ROC AUC Score: {stack_roc_auc_score}')

In [None]:
estimators = [
    ('CatBoost', catBoost_pipeline),
    ('XGBoost', xgboost_pipeline)
]

# Define the final estimator (meta-model)
final_estimator = LogisticRegression()

# Create the stacking classifier
stacking_model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=5)

# Fit the stacking classifier on the training data
stacking_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = stacking_model.predict(X_test)

# Evaluate the model
report = classification_report(y_test, y_pred)
print(f'Stacking Classifier Classification Report:\n{report}\n')

# Evaluate stacking model
y_pred_stacking = stacking_model.predict(X_test)
if hasattr(stacking_model, "predict_proba"):
    y_pred_proba_stacking = stacking_model.predict_proba(X_test)[:, 1]
else:
    y_pred_proba_stacking = None

stack_roc_auc_score = roc_auc_score(y_test, y_pred_proba_stacking) if y_pred_proba_stacking is not None else 'N/A'
print(f'Stacking Classifier ROC AUC Score: {stack_roc_auc_score}')

### Confusion Matrix

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=stacking_model.classes_, yticklabels=stacking_model.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

### K-Fold Cross Validation

In [None]:
from sklearn.model_selection import StratifiedKFold, cross_val_score, KFold

k = 5  # Number of folds
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=4646)

# Perform k-fold cross-validation
scores = cross_val_score(stacking_model, X, y, cv=skf, scoring='accuracy')

print(f"Accuracy Scores for each fold: {scores}")
print(f"Mean Accuracy: {scores.mean():.2f}")
print(f"Standard Deviation: {scores.std():.2f}")

In [None]:
from sklearn.metrics import roc_curve, auc

fpr = {}
tpr = {}
roc_auc = {}

fpr, tpr, _ = roc_curve(y_test, y_pred_proba_stacking, pos_label=1)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1, label='Random guess')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()

### Save Model

In [None]:
# Define the path to save the model
model_path = '/content/Stack_model2.joblib'

# Save the model
joblib.dump(stacking_model, model_path)

print(f"Model saved successfully at: {model_path}")

In [None]:
X_train_resampled.shape

# ANN

In [None]:
#split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 4646, shuffle = True, stratify = y)

X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size = 0.3, random_state = 4646, shuffle = True, stratify = y_test)

In [None]:
scaler = MinMaxScaler()

X_train_normalized = X_train.copy()
X_train_normalized = scaler.fit_transform(X_train[columns_to_scale])

X_test_normalized = X_test.copy()
X_test_normalized = scaler.transform(X_test[columns_to_scale])

X_valid_normalized = X_valid.copy()
X_valid_normalized = scaler.transform(X_valid[columns_to_scale])

In [None]:
smote = SMOTE(sampling_strategy='minority')
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_normalized, y_train)
y_train_resampled.value_counts()

In [None]:
def plot_history(history):
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
  ax1.plot(history.history['loss'], label='loss')
  ax1.plot(history.history['val_loss'], label='val_loss')
  ax1.set_xlabel('Epoch')
  ax1.set_ylabel('Binary crossentropy')
  ax1.grid(True)

  ax2.plot(history.history['accuracy'], label='accuracy')
  ax2.plot(history.history['val_accuracy'], label='val_accuracy')
  ax2.set_xlabel('Epoch')
  ax2.set_ylabel('Accuracy')
  ax2.grid(True)

  plt.show()

In [None]:
def train_model(X_train, y_train, num_nodes, dropout_prob, lr, batch_size, epochs):
  nn_model = tf.keras.Sequential([
      tf.keras.layers.Dense(num_nodes, activation='relu', input_shape=(X_train.shape[1],)),
      tf.keras.layers.Dropout(dropout_prob),
      tf.keras.layers.Dense(num_nodes, activation='relu'),
      tf.keras.layers.Dropout(dropout_prob),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])

  nn_model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss='binary_crossentropy',
                  metrics=['accuracy'])
  history = nn_model.fit(
    X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_valid_normalized, y_valid), verbose=0
  )

  return nn_model, history

In [None]:
least_val_loss = float('inf')
least_loss_model = None
epochs=100
for num_nodes in [52, 78, 32]:
  for dropout_prob in[0, 0.2]:
    for lr in [0.01, 0.005, 0.001]:
      for batch_size in [32, 64, 128]:
        print(f"{num_nodes} nodes, dropout {dropout_prob}, lr {lr}, batch size {batch_size}")
        model, history = train_model(X_train_resampled, y_train_resampled, num_nodes, dropout_prob, lr, batch_size, epochs)
        plot_history(history)
        val_loss = model.evaluate(X_valid_normalized, y_valid)[0]
        if val_loss < least_val_loss:
          least_val_loss = val_loss
          least_loss_model = model

In [None]:
y_pred = least_loss_model.predict(X_test_normalized)
y_pred = (y_pred > 0.5).astype(int).reshape(-1,)

In [None]:
print(classification_report(y_test, y_pred))

# Deployment

In [None]:
!pip install flask-ngrok flask-cors pyngrok shap pandas

In [None]:
!pip install flask-ngrok flask shap pandas scikit-learn joblib

In [None]:
#%%writefile app.py
import pandas as pd
import shap
import numpy as np
from flask import Flask, request, render_template_string, redirect, url_for
from pyngrok import ngrok
import joblib
import threading

In [None]:
model_path = '/content/Stack_std_LR_model3.joblib'
try:
    loaded_model = joblib.load(model_path)
except FileNotFoundError:
    print(f"Error: Model file not found at {model_path}")
    exit()


app = Flask(__name__)


FEATURE_NAMES = [
    'Age', 'BMI', 'Smoking', 'AlcoholConsumption', 'PhysicalActivity', 'DietQuality',
    'SleepQuality', 'FamilyHistoryAlzheimers', 'CardiovascularDisease', 'Diabetes',
    'Depression', 'HeadInjury', 'Hypertension', 'SystolicBP', 'DiastolicBP',
    'CholesterolTotal', 'CholesterolLDL', 'CholesterolHDL', 'CholesterolTriglycerides',
    'MMSE', 'FunctionalAssessment', 'MemoryComplaints', 'BehavioralProblems', 'ADL',
    'Confusion', 'Disorientation', 'PersonalityChanges', 'DifficultyCompletingTasks',
    'Forgetfulness', 'Gender_Female', 'Gender_Male', 'Ethnicity_African American',
    'Ethnicity_Asian', 'Ethnicity_Caucasian', 'Ethnicity_Other',
    "EducationLevel_Bachelor's", 'EducationLevel_High School',
    'EducationLevel_Higher', 'EducationLevel_None'
]


@app.route('/')
def index():
    """Redirects the base URL to the prediction form."""
    return redirect(url_for('predict_form'))

@app.route('/predict', methods=['GET', 'POST'])
def predict_form():
    """
    Handles both displaying the form (GET) and processing the prediction (POST).
    """
    if request.method == 'POST':
        try:
            features = [float(request.form[name]) for name in FEATURE_NAMES]

            prediction = loaded_model.predict([features])
            prediction_proba = loaded_model.predict_proba([features])[0][1]

            if prediction[0] == 1:
                result = f"Alzheimer ALERT!! (Risk Score: {prediction_proba:.2f})"
                result_color = '#dc3545'
            else:
                result = f"No significant risk of Alzheimer's detected. (Risk Score: {prediction_proba:.2f})"
                result_color = '#28a745'

            return f'''
                <!DOCTYPE html>
                <html>
                <head><title>Prediction Result</title></head>
                <body style="font-family: 'Segoe UI', sans-serif; background-color: #f0f2f5; display: flex; align-items: center; justify-content: center; height: 100vh; margin: 0;">
                    <div style="max-width: 600px; margin: 40px auto; padding: 30px; background: #fff; border-radius: 10px; box-shadow: 0 4px 20px rgba(0,0,0,0.1); text-align: center;">
                        <h2 style="color: {result_color}; font-size: 1.8rem;">Prediction Result</h2>
                        <p style="font-size: 1.2rem; margin: 20px 0;">{result}</p>
                        <a href="/predict" style="display: inline-block; padding: 12px 25px; background: #007bff; color: white; text-decoration: none; border-radius: 5px; transition: background 0.3s;">Make another prediction</a>
                    </div>
                </body>
                </html>
            '''

        except Exception as e:
            error_message = f"Error processing your request: {e}. Please ensure all fields are filled with valid numbers."
            return f'''
                 <!DOCTYPE html>
                <html>
                <head><title>Error</title></head>
                <body style="font-family: 'Segoe UI', sans-serif; background-color: #f0f2f5; display: flex; align-items: center; justify-content: center; height: 100vh; margin: 0;">
                    <div style="max-width: 600px; margin: 40px auto; padding: 30px; background: #fff; border-radius: 10px; box-shadow: 0 4px 20px rgba(0,0,0,0.1); text-align: center;">
                        <h2 style="color: #dc3545; font-size: 1.8rem;">An Error Occurred</h2>
                        <p style="font-size: 1.2rem; margin: 20px 0; color: #555;">{error_message}</p>
                        <a href="/predict" style="display: inline-block; padding: 12px 25px; background: #007bff; color: white; text-decoration: none; border-radius: 5px; transition: background 0.3s;">Go back to the form</a>
                    </div>
                </body>
                </html>
            '''

    styles = """
    <style>
        * { box-sizing: border-box; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
        body { background-color: #f0f2f5; margin: 0; padding: 20px; color: #333; }
        .container { max-width: 1200px; margin: 0 auto; background: white; border-radius: 10px; box-shadow: 0 0 20px rgba(0,0,0,0.1); overflow: hidden; }
        header { background: linear-gradient(135deg, #1a6ea0, #3498db); color: white; padding: 30px 20px; text-align: center; }
        header h1 { margin: 0; font-size: 2.2rem; }
        header p { opacity: 0.9; max-width: 700px; margin: 10px auto 0; }
        .form-container { padding: 30px; }
        .form-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 20px 40px;}
        .form-section { margin-bottom: 25px; }
        .form-section h3 { margin-top: 0; padding-bottom: 10px; border-bottom: 2px solid #3498db; color: #2c3e50; }
        .form-group { margin-bottom: 20px; }
        label { display: block; margin-bottom: 8px; font-weight: 500; color: #2c3e50; }
        input[type="text"] { width: 100%; padding: 12px 15px; border: 1px solid #ddd; border-radius: 5px; font-size: 1rem; transition: all 0.3s; }
        input[type="text"]:focus { border-color: #3498db; outline: none; box-shadow: 0 0 0 3px rgba(52, 152, 219, 0.2); }
        .hint { font-size: 0.85rem; color: #6c757d; margin-top: 5px; }
        .submit-container { text-align: center; margin-top: 30px; padding-top: 20px; border-top: 1px solid #eee; }
        input[type="submit"] { background: linear-gradient(135deg, #28a745, #218838); color: white; border: none; padding: 14px 35px; font-size: 1.1rem; border-radius: 5px; cursor: pointer; transition: all 0.3s; }
        input[type="submit"]:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(0,0,0,0.15); }
        .footer-note { text-align: center; margin-top: 20px; color: #6c757d; font-size: 0.9rem; padding: 0 20px 20px 20px; }
    </style>
    """

    def form_field(name, label, hint):
        return f'''
        <div class="form-group">
            <label for="{name}">{label}</label>
            <input type="text" id="{name}" name="{name}" required>
            <div class="hint">{hint}</div>
        </div>
        '''

    form_html = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Alzheimer's Disease Prediction</title>
        {styles}
    </head>
    <body>
        <div class="container">
            <header>
                <h1>Alzheimer's Disease Risk Assessment</h1>
                <p>Fill in the health information below to assess your risk. This tool provides an estimate and is not a medical diagnosis.</p>
            </header>

            <div class="form-container">
                <form method="post">
                    <div class="form-grid">

                        <div class="form-section">
                            <h3>Personal & Demographic</h3>
                            {form_field('Age', 'Age', 'In years (e.g., 75)')}
                            {form_field('Gender_Male', 'Gender: Male', 'Enter 1 for Male, 0 for otherwise')}
                            {form_field('Gender_Female', 'Gender: Female', 'Enter 1 for Female, 0 for otherwise')}
                            {form_field("EducationLevel_Bachelor's", "Education: Bachelor's", 'Enter 1 if yes, 0 if no')}
                            {form_field('EducationLevel_High School', 'Education: High School', 'Enter 1 if yes, 0 if no')}
                            {form_field('EducationLevel_Higher', 'Education: Higher Degree', 'Enter 1 if yes, 0 if no')}
                            {form_field('EducationLevel_None', 'Education: None', 'Enter 1 if yes, 0 if no')}
                            {form_field('Ethnicity_Caucasian', 'Ethnicity: Caucasian', 'Enter 1 if yes, 0 if no')}
                            {form_field('Ethnicity_African American', 'Ethnicity: African American', 'Enter 1 if yes, 0 if no')}
                            {form_field('Ethnicity_Asian', 'Ethnicity: Asian', 'Enter 1 if yes, 0 if no')}
                            {form_field('Ethnicity_Other', 'Ethnicity: Other', 'Enter 1 if yes, 0 if no')}
                        </div>

                        <div class="form-section">
                            <h3>Health Metrics</h3>
                            {form_field('BMI', 'Body Mass Index (BMI)', 'e.g., 22.5')}
                            {form_field('SystolicBP', 'Systolic Blood Pressure', 'Top number (e.g., 120 mmHg)')}
                            {form_field('DiastolicBP', 'Diastolic Blood Pressure', 'Bottom number (e.g., 80 mmHg)')}
                            {form_field('CholesterolTotal', 'Total Cholesterol', 'e.g., 180 mg/dL')}
                            {form_field('CholesterolLDL', 'LDL Cholesterol ("Bad")', 'e.g., 110 mg/dL')}
                            {form_field('CholesterolHDL', 'HDL Cholesterol ("Good")', 'e.g., 50 mg/dL')}
                            {form_field('CholesterolTriglycerides', 'Triglycerides', 'e.g., 150 mg/dL')}
                            {form_field('MMSE', 'MMSE Score', 'Mini-Mental State Examination (0-30)')}
                            {form_field('ADL', 'Activities of Daily Living (ADL)', 'Score from 0-10')}
                            {form_field('FunctionalAssessment', 'Functional Assessment', 'Score from 0-10')}
                        </div>

                        <div class="form-section">
                            <h3>Lifestyle & Medical History</h3>
                            {form_field('Smoking', 'Currently Smoking', 'Enter 1 for yes, 0 for no')}
                            {form_field('AlcoholConsumption', 'Alcohol Consumption', 'Scale from 0-20')}
                            {form_field('PhysicalActivity', 'Physical Activity Level', 'Scale from 0-10')}
                            {form_field('DietQuality', 'Diet Quality', 'Scale from 0-10')}
                            {form_field('SleepQuality', 'Sleep Quality', 'Scale from 4-10')}
                            {form_field('FamilyHistoryAlzheimers', "Family History of Alzheimer's", 'Enter 1 if yes, 0 if no')}
                            {form_field('CardiovascularDisease', 'History of Cardiovascular Disease', 'Enter 1 if yes, 0 if no')}
                            {form_field('Diabetes', 'History of Diabetes', 'Enter 1 if yes, 0 if no')}
                            {form_field('Hypertension', 'History of Hypertension', 'Enter 1 if yes, 0 if no')}
                            {form_field('Depression', 'History of Depression', 'Enter 1 if yes, 0 if no')}
                            {form_field('HeadInjury', 'History of Significant Head Injury', 'Enter 1 if yes, 0 if no')}
                        </div>

                        <div class="form-section">
                            <h3>Cognitive & Behavioral Symptoms</h3>
                            {form_field('MemoryComplaints', 'Subjective Memory Complaints', 'Enter 1 if yes, 0 if no')}
                            {form_field('Forgetfulness', 'Observable Forgetfulness', 'Enter 1 if yes, 0 if no')}
                            {form_field('DifficultyCompletingTasks', 'Difficulty with Familiar Tasks', 'Enter 1 if yes, 0 if no')}
                            {form_field('Confusion', 'Episodes of Confusion', 'Enter 1 if yes, 0 if no')}
                            {form_field('Disorientation', 'Episodes of Disorientation', 'Enter 1 if yes, 0 if no')}
                            {form_field('PersonalityChanges', 'Observed Personality Changes', 'Enter 1 if yes, 0 if no')}
                            {form_field('BehavioralProblems', 'Observed Behavioral Problems', 'Enter 1 if yes, 0 if no')}
                        </div>

                    </div>

                    <div class="submit-container">
                        <input type="submit" value="Assess Alzheimer's Risk">
                    </div>
                </form>
            </div>
             <div class="footer-note">
                <p><strong>Disclaimer:</strong> This tool is for informational purposes only and does not constitute a medical diagnosis. Consult a healthcare professional for any health concerns.</p>
            </div>
        </div>
    </body>
    </html>
    """
    return render_template_string(form_html)

def start_flask():
    """Starts the Flask server."""
    app.run(port=5000)

if __name__ == '__main__':

    NGROK_AUTH_TOKEN = "31HXHWmx4xOeNi5PI55IGhuFA7S_3JBeA9Ab7mH9JstPWFyiT"

    if not NGROK_AUTH_TOKEN:
        print("Error: ngrok authtoken is missing. Please add it to the script.")
    else:
        ngrok.set_auth_token(NGROK_AUTH_TOKEN)

        flask_thread = threading.Thread(target=start_flask, daemon=True)
        flask_thread.start()

        try:
            # Start ngrok tunnel
            public_url = ngrok.connect(5000)
            print("="*50)
            print(f"✅ The Alzheimer's Prediction App is LIVE!")
            print(f"   Public URL: {public_url.public_url}")
            print("="*50)
            print("(Press CTRL+C to shut down the server)")

            flask_thread.join()

        except KeyboardInterrupt:
            print("\n shutting down server and ngrok tunnel...")
            ngrok.kill()
        except Exception as e:
            print(f"An error occurred: {e}")
            ngrok.kill()