In [9]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [10]:
import numpy as np
import pandas as pd
df = pd.read_csv('lung cancer survey.csv')
df_no_na = df.dropna()

df_age = df_no_na[df_no_na["AGE"] > 21]
# Use df_age for all models, where clustering algorithm models, such as Kmeans, randomforest, decision tree as they would have already categorise an age threshold within the model
df_age

# Use df_cluster only for logistic regression, lasso, ridge and elastic net - cross comparison of these models with df_age
df_cluster = df_age.copy()
df_cluster['cluster'] = df_cluster['AGE'].apply(lambda x: 1 if x >= 61 else 0)
df_cluster = df_cluster.drop("AGE", axis = 1)
df_cluster

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,0.0,61.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
1,1.0,70.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0
2,1.0,59.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.0,54.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0
4,0.0,54.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8996,1.0,62.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
8997,0.0,71.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
8998,1.0,63.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
8999,1.0,70.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0


Unnamed: 0,GENDER,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER,cluster
0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1
1,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1
2,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
3,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0
4,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8996,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1
8997,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1
8998,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1
8999,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1


## model training

### non-PCA

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

X = df_age[['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY',
       'PEER_PRESSURE', 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',
       'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',
       'SWALLOWING DIFFICULTY', 'CHEST PAIN']].values
y = df_age['LUNG_CANCER'].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Standardize the data (since KNN is sensitive to scale)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

k = 3  # Number of neighbors
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Fit the model
knn_classifier.fit(X_train, y_train)

# Predict on the validation set
y_pred = knn_classifier.predict(X_val)

# Calculate accuracy, precision, recall, and F1-score
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.8383
Precision: 0.8641
Recall: 0.9455
F1 Score: 0.9030


#### try to beat baseline above

In [21]:
##### maximise accuracy

In [4]:
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Create a pipeline to standardize the data and apply KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardizes the data
    ('knn', KNeighborsClassifier())  # KNN Classifier without setting neighbors yet
])

# Define the parameter grid for GridSearchCV
param_grid = {
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}

# Define the custom K-Fold cross-validation strategy
kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='accuracy')

# Fit the GridSearchCV to find the best k
grid_search.fit(X_train, y_train)

# Get the best parameter for k
best_k = grid_search.best_params_['knn__n_neighbors']

# Retrieve best model
best_knn_classifier =  grid_search.best_estimator_

# Use best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Accuracy on validation set with k={best_k}: {accuracy:.4f}")
print(f"Precision on validation set with k={best_k}: {precision:.4f}")
print(f"Recall on validation set with k={best_k}: {recall:.4f}")
print(f"F1 Score on validation set with k={best_k}: {f1:.4f}")

Optimal number of neighbors (k): 18
Accuracy on validation set with k=18: 0.8589
Precision on validation set with k=18: 0.8557
Recall on validation set with k=18: 0.9895
F1 Score on validation set with k=18: 0.9177


##### maximise recall

In [5]:
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Create a pipeline to standardize the data and apply KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardizes the data
    ('knn', KNeighborsClassifier())  # KNN Classifier without setting neighbors yet
])

# Define the parameter grid for GridSearchCV
param_grid = {
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}

# Define the custom K-Fold cross-validation strategy
kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='recall')

# Fit the GridSearchCV to find the best k
grid_search.fit(X_train, y_train)

# Get the best parameter for k
best_k = grid_search.best_params_['knn__n_neighbors']

# Retrieve best model
best_knn_classifier =  grid_search.best_estimator_

# Use best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Accuracy on validation set with k={best_k}: {accuracy:.4f}")
print(f"Precision on validation set with k={best_k}: {precision:.4f}")
print(f"Recall on validation set with k={best_k}: {recall:.4f}")
print(f"F1 Score on validation set with k={best_k}: {f1:.4f}")

Optimal number of neighbors (k): 19
Accuracy on validation set with k=19: 0.8556
Precision on validation set with k=19: 0.8509
Recall on validation set with k=19: 0.9923
F1 Score on validation set with k=19: 0.9162


In [6]:
#### maximise f1

In [7]:
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Create a pipeline to standardize the data and apply KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardizes the data
    ('knn', KNeighborsClassifier())  # KNN Classifier without setting neighbors yet
])

# Define the parameter grid for GridSearchCV
param_grid = {
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}

# Define the custom K-Fold cross-validation strategy
kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='f1')

# Fit the GridSearchCV to find the best k
grid_search.fit(X_train, y_train)

# Get the best parameter for k
best_k = grid_search.best_params_['knn__n_neighbors']

# Retrieve best model
best_knn_classifier =  grid_search.best_estimator_

# Use best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Accuracy on validation set with k={best_k}: {accuracy:.4f}")
print(f"Precision on validation set with k={best_k}: {precision:.4f}")
print(f"Recall on validation set with k={best_k}: {recall:.4f}")
print(f"F1 Score on validation set with k={best_k}: {f1:.4f}")

Optimal number of neighbors (k): 18
Accuracy on validation set with k=18: 0.8589
Precision on validation set with k=18: 0.8557
Recall on validation set with k=18: 0.9895
F1 Score on validation set with k=18: 0.9177


Recall (Sensitivity):
Reason: In medical diagnostics, especially for severe conditions like cancer, false negatives (missing a cancer diagnosis) can have serious consequences. Prioritizing recall ensures that more true positives are detected, meaning fewer cancer cases are missed.

In [8]:
## accuracy increase by 0.02,precision remained, recall increased by 0.04, f1-score increased by 0.02

### pairwise features

In [9]:
import pandas as pd
from itertools import product

feature_names = [col for col in df_age.columns if col != "LUNG_CANCER"]

df_pairwise = df_age.copy()
# Generate all combinations of interaction terms for these features
for i in range(len(feature_names)):
    for j in range(i + 1, len(feature_names)):
        new_column_name = f'{feature_names[i]}_{feature_names[j]}'
        df_pairwise[new_column_name] = df_pairwise[feature_names[i]] * df_pairwise[feature_names[j]]

df_pairwise

  df_pairwise[new_column_name] = df_pairwise[feature_names[i]] * df_pairwise[feature_names[j]]
  df_pairwise[new_column_name] = df_pairwise[feature_names[i]] * df_pairwise[feature_names[j]]
  df_pairwise[new_column_name] = df_pairwise[feature_names[i]] * df_pairwise[feature_names[j]]
  df_pairwise[new_column_name] = df_pairwise[feature_names[i]] * df_pairwise[feature_names[j]]
  df_pairwise[new_column_name] = df_pairwise[feature_names[i]] * df_pairwise[feature_names[j]]
  df_pairwise[new_column_name] = df_pairwise[feature_names[i]] * df_pairwise[feature_names[j]]


Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,...,ALCOHOL CONSUMING_COUGHING,ALCOHOL CONSUMING_SHORTNESS OF BREATH,ALCOHOL CONSUMING_SWALLOWING DIFFICULTY,ALCOHOL CONSUMING_CHEST PAIN,COUGHING_SHORTNESS OF BREATH,COUGHING_SWALLOWING DIFFICULTY,COUGHING_CHEST PAIN,SHORTNESS OF BREATH_SWALLOWING DIFFICULTY,SHORTNESS OF BREATH_CHEST PAIN,SWALLOWING DIFFICULTY_CHEST PAIN
0,0.0,61.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,70.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,1.0,59.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,54.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0
4,0.0,54.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8996,1.0,62.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
8997,0.0,71.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0
8998,1.0,63.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
8999,1.0,70.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [10]:
y = df_pairwise['LUNG_CANCER'].values
X = df_pairwise.drop("LUNG_CANCER", axis = 1).values

# Split the data into training and validation sets (80% training, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

k = 3  # Number of neighbors
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Fit the model
knn_classifier.fit(X_train, y_train)

# Predict on the validation set
y_pred = knn_classifier.predict(X_val)

# Calculate accuracy, precision, recall, and F1-score
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.8311
Precision: 0.8677
Recall: 0.9295
F1 Score: 0.8975


In [11]:
# Create a pipeline to standardize the data and apply KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardizes the data
    ('knn', KNeighborsClassifier())  # KNN Classifier without setting neighbors yet
])

# Define the parameter grid for GridSearchCV
param_grid = {
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}

# Define the custom K-Fold cross-validation strategy
kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='accuracy')

# Fit the GridSearchCV to find the best k
grid_search.fit(X_train, y_train)

# Get the best parameter for k
best_k = grid_search.best_params_['knn__n_neighbors']

# Retrieve best model
best_knn_classifier =  grid_search.best_estimator_

# Use best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Accuracy on validation set with k={best_k}: {accuracy:.4f}")
print(f"Precision on validation set with k={best_k}: {precision:.4f}")
print(f"Recall on validation set with k={best_k}: {recall:.4f}")
print(f"F1 Score on validation set with k={best_k}: {f1:.4f}")

Optimal number of neighbors (k): 20
Accuracy on validation set with k=20: 0.8622
Precision on validation set with k=20: 0.8650
Recall on validation set with k=20: 0.9797
F1 Score on validation set with k=20: 0.9188


In [12]:
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Create a pipeline to standardize the data and apply KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardizes the data
    ('knn', KNeighborsClassifier())  # KNN Classifier without setting neighbors yet
])

# Define the parameter grid for GridSearchCV
param_grid = {
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}

# Define the custom K-Fold cross-validation strategy
kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='recall')

# Fit the GridSearchCV to find the best k
grid_search.fit(X_train, y_train)

# Get the best parameter for k
best_k = grid_search.best_params_['knn__n_neighbors']

# Retrieve best model
best_knn_classifier =  grid_search.best_estimator_

# Use best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Accuracy on validation set with k={best_k}: {accuracy:.4f}")
print(f"Precision on validation set with k={best_k}: {precision:.4f}")
print(f"Recall on validation set with k={best_k}: {recall:.4f}")
print(f"F1 Score on validation set with k={best_k}: {f1:.4f}")

Optimal number of neighbors (k): 19
Accuracy on validation set with k=19: 0.8628
Precision on validation set with k=19: 0.8624
Recall on validation set with k=19: 0.9846
F1 Score on validation set with k=19: 0.9195


In [13]:
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Create a pipeline to standardize the data and apply KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardizes the data
    ('knn', KNeighborsClassifier())  # KNN Classifier without setting neighbors yet
])

# Define the parameter grid for GridSearchCV
param_grid = {
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}

# Define the custom K-Fold cross-validation strategy
kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='f1')

# Fit the GridSearchCV to find the best k
grid_search.fit(X_train, y_train)

# Get the best parameter for k
best_k = grid_search.best_params_['knn__n_neighbors']

# Retrieve best model
best_knn_classifier =  grid_search.best_estimator_

# Use best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Accuracy on validation set with k={best_k}: {accuracy:.4f}")
print(f"Precision on validation set with k={best_k}: {precision:.4f}")
print(f"Recall on validation set with k={best_k}: {recall:.4f}")
print(f"F1 Score on validation set with k={best_k}: {f1:.4f}")

Optimal number of neighbors (k): 20
Accuracy on validation set with k=20: 0.8622
Precision on validation set with k=20: 0.8650
Recall on validation set with k=20: 0.9797
F1 Score on validation set with k=20: 0.9188


In [35]:
### PCA + Pairwise

In [18]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('knn', KNeighborsClassifier()) 
])

param_grid = {
    'pca__n_components': list(range(1,15)),
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}


kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='accuracy')

# Fit the GridSearchCV to find the best parameters
grid_search.fit(X_train, y_train)

# Get the best parameters for k and number of components
best_k = grid_search.best_params_['knn__n_neighbors']
best_n_components = grid_search.best_params_['pca__n_components']

# Retrieve the best model
best_knn_classifier = grid_search.best_estimator_

# Use the best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Optimal number of PCA components: {best_n_components}")
print(f"Accuracy on validation set with k={best_k}, n_components={best_n_components}: {accuracy:.4f}")
print(f"Precision on validation set: {precision:.4f}")
print(f"Recall on validation set: {recall:.4f}")
print(f"F1 Score on validation set: {f1:.4f}")

  _data = np.array(data, dtype=dtype, copy=copy,


Optimal number of neighbors (k): 18
Optimal number of PCA components: 14
Accuracy on validation set with k=18, n_components=14: 0.8661
Precision on validation set: 0.8678
Recall on validation set: 0.9811
F1 Score on validation set: 0.9210


In [19]:
# Create a pipeline to standardize the data, apply PCA, and then KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('knn', KNeighborsClassifier()) 
])

param_grid = {
    'pca__n_components': list(range(1,15)),
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}


kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='recall')

# Fit the GridSearchCV to find the best parameters
grid_search.fit(X_train, y_train)

# Get the best parameters for k and number of components
best_k = grid_search.best_params_['knn__n_neighbors']
best_n_components = grid_search.best_params_['pca__n_components']

# Retrieve the best model
best_knn_classifier = grid_search.best_estimator_

# Use the best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Optimal number of PCA components: {best_n_components}")
print(f"Accuracy on validation set with k={best_k}, n_components={best_n_components}: {accuracy:.4f}")
print(f"Precision on validation set: {precision:.4f}")
print(f"Recall on validation set: {recall:.4f}")
print(f"F1 Score on validation set: {f1:.4f}")

  _data = np.array(data, dtype=dtype, copy=copy,


Optimal number of neighbors (k): 19
Optimal number of PCA components: 1
Accuracy on validation set with k=19, n_components=1: 0.7928
Precision on validation set: 0.7970
Recall on validation set: 0.9923
F1 Score on validation set: 0.8840


In [20]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('knn', KNeighborsClassifier()) 
])

param_grid = {
    'pca__n_components': list(range(1,15)),
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}


kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='f1')

# Fit the GridSearchCV to find the best parameters
grid_search.fit(X_train, y_train)

# Get the best parameters for k and number of components
best_k = grid_search.best_params_['knn__n_neighbors']
best_n_components = grid_search.best_params_['pca__n_components']

# Retrieve the best model
best_knn_classifier = grid_search.best_estimator_

# Use the best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Optimal number of PCA components: {best_n_components}")
print(f"Accuracy on validation set with k={best_k}, n_components={best_n_components}: {accuracy:.4f}")
print(f"Precision on validation set: {precision:.4f}")
print(f"Recall on validation set: {recall:.4f}")
print(f"F1 Score on validation set: {f1:.4f}")

  _data = np.array(data, dtype=dtype, copy=copy,


Optimal number of neighbors (k): 18
Optimal number of PCA components: 14
Accuracy on validation set with k=18, n_components=14: 0.8661
Precision on validation set: 0.8678
Recall on validation set: 0.9811
F1 Score on validation set: 0.9210


### with PCA

In [15]:
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Create a pipeline to standardize the data, apply PCA, and then KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('knn', KNeighborsClassifier()) 
])

param_grid = {
    'pca__n_components': list(range(1,15)),
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}


kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='accuracy')

# Fit the GridSearchCV to find the best parameters
grid_search.fit(X_train, y_train)

# Get the best parameters for k and number of components
best_k = grid_search.best_params_['knn__n_neighbors']
best_n_components = grid_search.best_params_['pca__n_components']

# Retrieve the best model
best_knn_classifier = grid_search.best_estimator_

# Use the best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Optimal number of PCA components: {best_n_components}")
print(f"Accuracy on validation set with k={best_k}, n_components={best_n_components}: {accuracy:.4f}")
print(f"Precision on validation set: {precision:.4f}")
print(f"Recall on validation set: {recall:.4f}")
print(f"F1 Score on validation set: {f1:.4f}")

Optimal number of neighbors (k): 18
Optimal number of PCA components: 14
Accuracy on validation set with k=18, n_components=14: 0.8661
Precision on validation set: 0.8678
Recall on validation set: 0.9811
F1 Score on validation set: 0.9210


In [16]:
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Create a pipeline to standardize the data, apply PCA, and then KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('knn', KNeighborsClassifier()) 
])

param_grid = {
    'pca__n_components': list(range(1,15)),
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}


kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='recall')

# Fit the GridSearchCV to find the best parameters
grid_search.fit(X_train, y_train)

# Get the best parameters for k and number of components
best_k = grid_search.best_params_['knn__n_neighbors']
best_n_components = grid_search.best_params_['pca__n_components']

# Retrieve the best model
best_knn_classifier = grid_search.best_estimator_

# Use the best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Optimal number of PCA components: {best_n_components}")
print(f"Accuracy on validation set with k={best_k}, n_components={best_n_components}: {accuracy:.4f}")
print(f"Precision on validation set: {precision:.4f}")
print(f"Recall on validation set: {recall:.4f}")
print(f"F1 Score on validation set: {f1:.4f}")

  _data = np.array(data, dtype=dtype, copy=copy,


Optimal number of neighbors (k): 19
Optimal number of PCA components: 1
Accuracy on validation set with k=19, n_components=1: 0.7928
Precision on validation set: 0.7970
Recall on validation set: 0.9923
F1 Score on validation set: 0.8840


In [17]:
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

# Create a pipeline to standardize the data, apply PCA, and then KNeighborsClassifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('knn', KNeighborsClassifier()) 
])

param_grid = {
    'pca__n_components': list(range(1,15)),
    'knn__n_neighbors': list(range(1, 21))  # Search for the best k in the range 1 to 20
}


kf = KFold(n_splits=5, shuffle=True, random_state=888)  # 5-fold CV with shuffling

# Initialize GridSearchCV with the custom KFold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='f1')

# Fit the GridSearchCV to find the best parameters
grid_search.fit(X_train, y_train)

# Get the best parameters for k and number of components
best_k = grid_search.best_params_['knn__n_neighbors']
best_n_components = grid_search.best_params_['pca__n_components']

# Retrieve the best model
best_knn_classifier = grid_search.best_estimator_

# Use the best model to predict on the validation set
y_pred = best_knn_classifier.predict(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Display the results
print(f"Optimal number of neighbors (k): {best_k}")
print(f"Optimal number of PCA components: {best_n_components}")
print(f"Accuracy on validation set with k={best_k}, n_components={best_n_components}: {accuracy:.4f}")
print(f"Precision on validation set: {precision:.4f}")
print(f"Recall on validation set: {recall:.4f}")
print(f"F1 Score on validation set: {f1:.4f}")

Optimal number of neighbors (k): 18
Optimal number of PCA components: 14
Accuracy on validation set with k=18, n_components=14: 0.8661
Precision on validation set: 0.8678
Recall on validation set: 0.9811
F1 Score on validation set: 0.9210


### Neural Network

In [15]:
df_age_scaled

Unnamed: 0,GENDER,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER,scaled_age
0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.025497
1,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.819532
2,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-0.150955
3,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,-0.592085
4,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,-0.592085
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8996,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.113723
8997,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.907758
8998,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.201950
8999,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.819532


In [22]:
df_age_scaled.columns

Index(['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY',
       'PEER_PRESSURE', 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',
       'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',
       'SWALLOWING DIFFICULTY', 'CHEST PAIN', 'LUNG_CANCER'],
      dtype='object')

In [33]:
from sklearn.preprocessing import StandardScaler
df_age_scaled = df_age.copy()
X = df_age_scaled.drop('LUNG_CANCER', axis=1).values
y = df_age_scaled['LUNG_CANCER'].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=888)

train_df = pd.DataFrame(np.hstack((X_train, y_train.reshape(-1, 1))), 
                        columns=['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 
                                 'ANXIETY', 'PEER_PRESSURE', 'CHRONIC DISEASE', 
                                 'FATIGUE', 'ALLERGY', 'WHEEZING', 
                                 'ALCOHOL CONSUMING', 'COUGHING', 
                                 'SHORTNESS OF BREATH', 'SWALLOWING DIFFICULTY', 
                                 'CHEST PAIN', 'LUNG_CANCER'])

# Combine X_val and y_val
val_df = pd.DataFrame(np.hstack((X_val, y_val.reshape(-1, 1))), 
                      columns=['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 
                               'ANXIETY', 'PEER_PRESSURE', 'CHRONIC DISEASE', 
                               'FATIGUE', 'ALLERGY', 'WHEEZING', 
                               'ALCOHOL CONSUMING', 'COUGHING', 
                               'SHORTNESS OF BREATH', 'SWALLOWING DIFFICULTY', 
                               'CHEST PAIN', 'LUNG_CANCER'])

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_df['AGE'].values.reshape(-1, 1))
train_df['scaled_age'] = X_train_scaled
X_val_scaled = scaler.transform(val_df['AGE'].values.reshape(-1, 1))
val_df['scaled_age'] = X_val_scaled

# Optionally drop the original 'AGE' column if you don't need it
train_df = train_df.drop('AGE', axis=1)
val_df = val_df.drop('AGE', axis=1)

X_train = train_df.drop('LUNG_CANCER', axis=1).values
X_val = val_df.drop('LUNG_CANCER', axis=1).values
y_train =  train_df['LUNG_CANCER'].values
y_val = val_df['LUNG_CANCER'].values

In [32]:
train_df
val_df

Unnamed: 0,GENDER,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER,scaled_age
0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,-1.301718
1,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.816531
2,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,-1.301718
3,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.434353
4,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,-1.301718
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7195,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.728270
7196,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,-1.301718
7197,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,-1.125197
7198,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,-0.242593


Unnamed: 0,GENDER,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER,scaled_age
0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.434353
1,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,-0.154333
2,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,-1.301718
3,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,-1.036937
4,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-0.595635
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.551750
1796,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.286969
1797,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.434353
1798,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,-1.301718


In [38]:
model = models.Sequential([
    layers.Input(shape=(15,)),                 # Input layer for 15 features
    layers.Dense(8, activation='sigmoid'),   # Hidden layer with 8 neurons (relu activation)
    layers.Dense(1, activation='sigmoid')
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
sgd = tf.keras.optimizers.SGD(learning_rate=0.1)
model.compile(optimizer=sgd, 
              loss='binary_crossentropy',   # Loss function for binary-class classification
              metrics=['accuracy'])              # Track accuracy as performance metric during training

# Train the model for 2000 epochs (each epoch uses the full training set, i.e. BGD)
model.fit(X_train, y_train, epochs=2000, batch_size=len(X_train), verbose=0)

# Display optimized weights and biases after training (useful for analysis)
for layer in model.layers:
    weights, biases = layer.get_weights()
    print(f"Weights for {layer.name}:\n{weights}\nBiases:\n{biases}")

# Evaluate model on validation data (i.e., calculate loss function & accuracy performance metric of fitted model on validation data)
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

<keras.src.callbacks.history.History at 0x1b6ca20e390>

Weights for dense_8:
[[ 1.44867122e-01 -4.47106436e-02 -2.37367861e-03 -1.51904255e-01
  -1.96627572e-01 -3.71458173e-01  6.51687920e-01 -3.28415632e-01]
 [-3.76124978e-01  3.88570607e-01 -1.85373411e-01 -2.96321005e-01
   4.17557001e-01 -2.68580496e-01  2.59127796e-01  6.55222014e-02]
 [ 5.74352182e-02  8.65518689e-01 -6.25172183e-02 -2.28844017e-01
   2.64114831e-02  4.68278974e-01 -4.46272761e-01  1.93985865e-01]
 [-3.67997855e-01 -3.22583802e-02 -4.33414541e-02  2.22781077e-01
  -2.71222973e-03  3.22268277e-01 -1.62842959e-01 -6.32605925e-02]
 [-2.31015116e-01  2.92921215e-01 -1.88144639e-01  2.54982024e-01
  -5.88596426e-02 -8.88469219e-02  2.30993375e-01  1.68371692e-01]
 [ 1.21563777e-01 -2.59301096e-01  3.23387027e-01 -1.37126088e-01
  -3.70939285e-01 -4.68861312e-02 -7.19710588e-02  9.69186053e-02]
 [-4.02828783e-01  2.71896608e-02  2.88351506e-01 -3.75079513e-01
   3.29863459e-01 -6.42393604e-02 -1.81511849e-01  2.49066368e-01]
 [-3.24581176e-01  2.50635296e-01 -4.11944181e-0

In [40]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

y_pred_prob = model.predict(X_val)  # Get predicted probabilities
y_pred = (y_pred_prob > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate precision, recall, and F1-score
precision = precision_score(y_val, y_pred, zero_division=0)
recall = recall_score(y_val, y_pred, zero_division=0)
f1 = f1_score(y_val, y_pred, zero_division=0)

# Display all the results
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Validation Loss: 0.4219, Validation Accuracy: 0.8711
Precision: 0.8610
Recall: 0.9979
F1 Score: 0.9244


In [52]:
random.seed(888)
tf.random.set_seed(888)
np.random.seed(888)


import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from keras import models, layers
import tensorflow as tf

# Function to create the model
def create_model(num_hidden_layers=1, learning_rate=0.01, optimizer='sgd',activation='relu'):
    model = models.Sequential()
    model.add(layers.Input(shape=(15,)))  # Input layer

    for _ in range(num_hidden_layers):
        model.add(layers.Dense(8, activation=activation))  # Hidden layers with specified activation

    model.add(layers.Dense(1, activation='sigmoid'))  # Output layer

    # Compile the model
    if optimizer == 'sgd':
        opt = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    elif optimizer == 'adam':
        opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    elif optimizer == 'adagrad':
        opt = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)
    else:
        raise ValueError("Optimizer not recognized.")
    
    model.compile(optimizer=opt, 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

# Define hyperparameter grids
num_hidden_layers_options = [1, 2, 3]  # Number of hidden layers
learning_rate_options = [0.01, 0.1, 0.5]  # Learning rates
optimizers = ['sgd', 'adam','rmsprop','adagrad']  # Optimizers
activation_functions = ['relu', 'tanh', 'sigmoid']  # Activation functions

# Store the best score and parameters
best_score = 0
best_params = {}

# Iterate over hyperparameter combinations
for num_hidden_layers in num_hidden_layers_options:
    for learning_rate in learning_rate_options:
        for optimizer in optimizers:
            for activation in activation_functions:
                print(f"Training with: {num_hidden_layers} hidden layers, "
                      f"learning rate = {learning_rate}, optimizer = {optimizer}, "
                      f"activation = {activation}")

                # Create and train the model
                model = create_model(num_hidden_layers, learning_rate, optimizer, activation)
                model.fit(X_train, y_train, epochs=2000, batch_size=len(X_train), verbose=0)

                # Evaluate model on validation data
                val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
                
                # Get predicted probabilities and convert to binary predictions
                y_pred_prob = model.predict(X_val)
                y_pred = (y_pred_prob > 0.5).astype(int)

                # Calculate precision, recall, and F1-score
                precision = precision_score(y_val, y_pred, zero_division=0)
                recall = recall_score(y_val, y_pred, zero_division=0)
                f1 = f1_score(y_val, y_pred, zero_division=0)

                # Display metrics for this iteration
                print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
                print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

                # Save the best parameters based on validation accuracy
                if val_accuracy > best_score:
                    best_score = val_accuracy
                    best_params = {
                        'num_hidden_layers': num_hidden_layers,
                        'learning_rate': learning_rate,
                        'optimizer': optimizer,
                        'activation': activation
                    }

# Display the best parameters
print(f"Best Validation Accuracy: {best_score:.4f} with parameters: {best_params}")

Training with: 1 hidden layers, learning rate = 0.01, optimizer = sgd, activation = relu


<keras.src.callbacks.history.History at 0x1b6d225fb90>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4324, Validation Accuracy: 0.8250
Precision: 0.8187, Recall: 1.0000, F1 Score: 0.9003
Training with: 1 hidden layers, learning rate = 0.01, optimizer = sgd, activation = tanh


<keras.src.callbacks.history.History at 0x1b6d3965c10>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4494, Validation Accuracy: 0.7928
Precision: 0.7922, Recall: 1.0000, F1 Score: 0.8841
Training with: 1 hidden layers, learning rate = 0.01, optimizer = sgd, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6d3860200>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Validation Loss: 0.5036, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 1 hidden layers, learning rate = 0.01, optimizer = adam, activation = relu


<keras.src.callbacks.history.History at 0x1b6dc0a8b60>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3175, Validation Accuracy: 0.9039
Precision: 0.8930, Recall: 0.9979, F1 Score: 0.9425
Training with: 1 hidden layers, learning rate = 0.01, optimizer = adam, activation = tanh


<keras.src.callbacks.history.History at 0x1b6dbdb2120>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
Validation Loss: 0.3298, Validation Accuracy: 0.9017
Precision: 0.8918, Recall: 0.9965, F1 Score: 0.9412
Training with: 1 hidden layers, learning rate = 0.01, optimizer = adam, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6dc2587d0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Validation Loss: 0.3230, Validation Accuracy: 0.9000
Precision: 0.8920, Recall: 0.9937, F1 Score: 0.9401
Training with: 1 hidden layers, learning rate = 0.01, optimizer = rmsprop, activation = relu


<keras.src.callbacks.history.History at 0x1b6dc5b00e0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3189, Validation Accuracy: 0.9000
Precision: 0.8925, Recall: 0.9930, F1 Score: 0.9401
Training with: 1 hidden layers, learning rate = 0.01, optimizer = rmsprop, activation = tanh


<keras.src.callbacks.history.History at 0x1b6dc896480>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  
Validation Loss: 0.3424, Validation Accuracy: 0.8972
Precision: 0.8868, Recall: 0.9972, F1 Score: 0.9388
Training with: 1 hidden layers, learning rate = 0.01, optimizer = rmsprop, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6dcb509e0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3176, Validation Accuracy: 0.9000
Precision: 0.8920, Recall: 0.9937, F1 Score: 0.9401
Training with: 1 hidden layers, learning rate = 0.01, optimizer = adagrad, activation = relu


<keras.src.callbacks.history.History at 0x1b6dce1c8c0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4223, Validation Accuracy: 0.8656
Precision: 0.8563, Recall: 0.9972, F1 Score: 0.9214
Training with: 1 hidden layers, learning rate = 0.01, optimizer = adagrad, activation = tanh


<keras.src.callbacks.history.History at 0x1b6de09f1a0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4162, Validation Accuracy: 0.8611
Precision: 0.8513, Recall: 0.9986, F1 Score: 0.9191
Training with: 1 hidden layers, learning rate = 0.01, optimizer = adagrad, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6de343650>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4925, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 1 hidden layers, learning rate = 0.1, optimizer = sgd, activation = relu


<keras.src.callbacks.history.History at 0x1b6de2ee630>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3448, Validation Accuracy: 0.9033
Precision: 0.8929, Recall: 0.9972, F1 Score: 0.9422
Training with: 1 hidden layers, learning rate = 0.1, optimizer = sgd, activation = tanh


<keras.src.callbacks.history.History at 0x1b6df848b30>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3520, Validation Accuracy: 0.9000
Precision: 0.8901, Recall: 0.9965, F1 Score: 0.9403
Training with: 1 hidden layers, learning rate = 0.1, optimizer = sgd, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6dfa8cce0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Validation Loss: 0.4083, Validation Accuracy: 0.8700
Precision: 0.8604, Recall: 0.9972, F1 Score: 0.9238
Training with: 1 hidden layers, learning rate = 0.1, optimizer = adam, activation = relu


<keras.src.callbacks.history.History at 0x1b6de5fe360>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3346, Validation Accuracy: 0.9039
Precision: 0.8935, Recall: 0.9972, F1 Score: 0.9425
Training with: 1 hidden layers, learning rate = 0.1, optimizer = adam, activation = tanh


<keras.src.callbacks.history.History at 0x1b6dcd98650>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3743, Validation Accuracy: 0.8917
Precision: 0.8885, Recall: 0.9866, F1 Score: 0.9350
Training with: 1 hidden layers, learning rate = 0.1, optimizer = adam, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6d20d78c0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3280, Validation Accuracy: 0.9017
Precision: 0.8927, Recall: 0.9951, F1 Score: 0.9411
Training with: 1 hidden layers, learning rate = 0.1, optimizer = rmsprop, activation = relu


<keras.src.callbacks.history.History at 0x1b6d39fe180>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4380, Validation Accuracy: 0.8972
Precision: 0.8849, Recall: 1.0000, F1 Score: 0.9389
Training with: 1 hidden layers, learning rate = 0.1, optimizer = rmsprop, activation = tanh


<keras.src.callbacks.history.History at 0x1b6d62b84a0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3292, Validation Accuracy: 0.9011
Precision: 0.8932, Recall: 0.9937, F1 Score: 0.9407
Training with: 1 hidden layers, learning rate = 0.1, optimizer = rmsprop, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6dc896000>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3512, Validation Accuracy: 0.8989
Precision: 0.8870, Recall: 0.9993, F1 Score: 0.9398
Training with: 1 hidden layers, learning rate = 0.1, optimizer = adagrad, activation = relu


<keras.src.callbacks.history.History at 0x1b6d383e510>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Validation Loss: 0.3245, Validation Accuracy: 0.9033
Precision: 0.8934, Recall: 0.9965, F1 Score: 0.9422
Training with: 1 hidden layers, learning rate = 0.1, optimizer = adagrad, activation = tanh


<keras.src.callbacks.history.History at 0x1b6dcdb63c0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3391, Validation Accuracy: 0.9017
Precision: 0.8922, Recall: 0.9958, F1 Score: 0.9412
Training with: 1 hidden layers, learning rate = 0.1, optimizer = adagrad, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6dc6c54c0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3723, Validation Accuracy: 0.8944
Precision: 0.8850, Recall: 0.9958, F1 Score: 0.9371
Training with: 1 hidden layers, learning rate = 0.5, optimizer = sgd, activation = relu


<keras.src.callbacks.history.History at 0x1b6e0e2bdd0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3178, Validation Accuracy: 0.9028
Precision: 0.8934, Recall: 0.9958, F1 Score: 0.9418
Training with: 1 hidden layers, learning rate = 0.5, optimizer = sgd, activation = tanh


<keras.src.callbacks.history.History at 0x1b6e203d520>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3310, Validation Accuracy: 0.9033
Precision: 0.8934, Recall: 0.9965, F1 Score: 0.9422
Training with: 1 hidden layers, learning rate = 0.5, optimizer = sgd, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6e2277fb0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Validation Loss: 0.3464, Validation Accuracy: 0.9028
Precision: 0.8929, Recall: 0.9965, F1 Score: 0.9418
Training with: 1 hidden layers, learning rate = 0.5, optimizer = adam, activation = relu


<keras.src.callbacks.history.History at 0x1b6e24e9100>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Validation Loss: 0.5151, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 1 hidden layers, learning rate = 0.5, optimizer = adam, activation = tanh


<keras.src.callbacks.history.History at 0x1b6e3910140>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3025, Validation Accuracy: 0.9039
Precision: 0.8930, Recall: 0.9979, F1 Score: 0.9425
Training with: 1 hidden layers, learning rate = 0.5, optimizer = adam, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6e0e99be0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3236, Validation Accuracy: 0.9033
Precision: 0.8925, Recall: 0.9979, F1 Score: 0.9422
Training with: 1 hidden layers, learning rate = 0.5, optimizer = rmsprop, activation = relu


<keras.src.callbacks.history.History at 0x1b6dc4b40e0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5254, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 1 hidden layers, learning rate = 0.5, optimizer = rmsprop, activation = tanh


<keras.src.callbacks.history.History at 0x1b6d24af4d0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Validation Loss: 0.4130, Validation Accuracy: 0.8856
Precision: 0.8933, Recall: 0.9712, F1 Score: 0.9306
Training with: 1 hidden layers, learning rate = 0.5, optimizer = rmsprop, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6d4df5b20>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4625, Validation Accuracy: 0.8911
Precision: 0.8793, Recall: 0.9993, F1 Score: 0.9355
Training with: 1 hidden layers, learning rate = 0.5, optimizer = adagrad, activation = relu


<keras.src.callbacks.history.History at 0x1b6dcce0800>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3179, Validation Accuracy: 0.9028
Precision: 0.8924, Recall: 0.9972, F1 Score: 0.9419
Training with: 1 hidden layers, learning rate = 0.5, optimizer = adagrad, activation = tanh


<keras.src.callbacks.history.History at 0x1b6dfb615b0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3216, Validation Accuracy: 0.9028
Precision: 0.8924, Recall: 0.9972, F1 Score: 0.9419
Training with: 1 hidden layers, learning rate = 0.5, optimizer = adagrad, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6dbf8ecc0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Validation Loss: 0.3290, Validation Accuracy: 0.9028
Precision: 0.8934, Recall: 0.9958, F1 Score: 0.9418
Training with: 2 hidden layers, learning rate = 0.01, optimizer = sgd, activation = relu


<keras.src.callbacks.history.History at 0x1b6dbe3ea80>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4696, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 2 hidden layers, learning rate = 0.01, optimizer = sgd, activation = tanh


<keras.src.callbacks.history.History at 0x1b6de741be0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Validation Loss: 0.3973, Validation Accuracy: 0.8700
Precision: 0.8622, Recall: 0.9944, F1 Score: 0.9236
Training with: 2 hidden layers, learning rate = 0.01, optimizer = sgd, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6dfb9b980>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5141, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 2 hidden layers, learning rate = 0.01, optimizer = adam, activation = relu


<keras.src.callbacks.history.History at 0x1b6e4e6ddc0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4145, Validation Accuracy: 0.9000
Precision: 0.8916, Recall: 0.9944, F1 Score: 0.9402
Training with: 2 hidden layers, learning rate = 0.01, optimizer = adam, activation = tanh


<keras.src.callbacks.history.History at 0x1b6e62918e0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3347, Validation Accuracy: 0.8994
Precision: 0.8900, Recall: 0.9958, F1 Score: 0.9399
Training with: 2 hidden layers, learning rate = 0.01, optimizer = adam, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6e7667bf0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3095, Validation Accuracy: 0.9006
Precision: 0.8916, Recall: 0.9951, F1 Score: 0.9405
Training with: 2 hidden layers, learning rate = 0.01, optimizer = rmsprop, activation = relu


<keras.src.callbacks.history.History at 0x1b6df7e9940>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3227, Validation Accuracy: 0.8983
Precision: 0.8918, Recall: 0.9916, F1 Score: 0.9391
Training with: 2 hidden layers, learning rate = 0.01, optimizer = rmsprop, activation = tanh


<keras.src.callbacks.history.History at 0x1b6d4c12ff0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3312, Validation Accuracy: 0.9006
Precision: 0.8906, Recall: 0.9965, F1 Score: 0.9406
Training with: 2 hidden layers, learning rate = 0.01, optimizer = rmsprop, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6d2514fe0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Validation Loss: 0.3061, Validation Accuracy: 0.9028
Precision: 0.8929, Recall: 0.9965, F1 Score: 0.9418
Training with: 2 hidden layers, learning rate = 0.01, optimizer = adagrad, activation = relu


<keras.src.callbacks.history.History at 0x1b6de5afb90>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4195, Validation Accuracy: 0.8572
Precision: 0.8473, Recall: 0.9993, F1 Score: 0.9171
Training with: 2 hidden layers, learning rate = 0.01, optimizer = adagrad, activation = tanh


<keras.src.callbacks.history.History at 0x1b6e27eb410>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3945, Validation Accuracy: 0.8789
Precision: 0.8698, Recall: 0.9958, F1 Score: 0.9285
Training with: 2 hidden layers, learning rate = 0.01, optimizer = adagrad, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6de741d30>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5036, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 2 hidden layers, learning rate = 0.1, optimizer = sgd, activation = relu


<keras.src.callbacks.history.History at 0x1b6dcf5b860>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3252, Validation Accuracy: 0.9000
Precision: 0.8916, Recall: 0.9944, F1 Score: 0.9402
Training with: 2 hidden layers, learning rate = 0.1, optimizer = sgd, activation = tanh


<keras.src.callbacks.history.History at 0x1b6e78f0e90>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3267, Validation Accuracy: 0.9039
Precision: 0.8935, Recall: 0.9972, F1 Score: 0.9425
Training with: 2 hidden layers, learning rate = 0.1, optimizer = sgd, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6e8c4f380>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4887, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 2 hidden layers, learning rate = 0.1, optimizer = adam, activation = relu


<keras.src.callbacks.history.History at 0x1b6e9ed0cb0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3364, Validation Accuracy: 0.9017
Precision: 0.8908, Recall: 0.9979, F1 Score: 0.9413
Training with: 2 hidden layers, learning rate = 0.1, optimizer = adam, activation = tanh


<keras.src.callbacks.history.History at 0x1b6ea2eaf00>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
Validation Loss: 0.3996, Validation Accuracy: 0.8972
Precision: 0.8902, Recall: 0.9923, F1 Score: 0.9385
Training with: 2 hidden layers, learning rate = 0.1, optimizer = adam, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6eb727aa0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3674, Validation Accuracy: 0.8872
Precision: 0.8920, Recall: 0.9754, F1 Score: 0.9318
Training with: 2 hidden layers, learning rate = 0.1, optimizer = rmsprop, activation = relu


<keras.src.callbacks.history.History at 0x1b6ecb88e60>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5528, Validation Accuracy: 0.8989
Precision: 0.8929, Recall: 0.9909, F1 Score: 0.9393
Training with: 2 hidden layers, learning rate = 0.1, optimizer = rmsprop, activation = tanh


<keras.src.callbacks.history.History at 0x1b6eceecb90>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3739, Validation Accuracy: 0.8889
Precision: 0.8804, Recall: 0.9944, F1 Score: 0.9339
Training with: 2 hidden layers, learning rate = 0.1, optimizer = rmsprop, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6e247c890>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3447, Validation Accuracy: 0.8983
Precision: 0.8909, Recall: 0.9930, F1 Score: 0.9391
Training with: 2 hidden layers, learning rate = 0.1, optimizer = adagrad, activation = relu


<keras.src.callbacks.history.History at 0x1b6dc10cfe0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3139, Validation Accuracy: 0.9022
Precision: 0.8928, Recall: 0.9958, F1 Score: 0.9415
Training with: 2 hidden layers, learning rate = 0.1, optimizer = adagrad, activation = tanh


<keras.src.callbacks.history.History at 0x1b6dc0aa870>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3145, Validation Accuracy: 0.9033
Precision: 0.8925, Recall: 0.9979, F1 Score: 0.9422
Training with: 2 hidden layers, learning rate = 0.1, optimizer = adagrad, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6d225eb70>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3615, Validation Accuracy: 0.8967
Precision: 0.8877, Recall: 0.9951, F1 Score: 0.9383
Training with: 2 hidden layers, learning rate = 0.5, optimizer = sgd, activation = relu


<keras.src.callbacks.history.History at 0x1b6e7806990>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3161, Validation Accuracy: 0.9011
Precision: 0.8902, Recall: 0.9979, F1 Score: 0.9410
Training with: 2 hidden layers, learning rate = 0.5, optimizer = sgd, activation = tanh


<keras.src.callbacks.history.History at 0x1b6e63e76e0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3128, Validation Accuracy: 0.9017
Precision: 0.8903, Recall: 0.9986, F1 Score: 0.9413
Training with: 2 hidden layers, learning rate = 0.5, optimizer = sgd, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6ea0e2990>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3368, Validation Accuracy: 0.9022
Precision: 0.8928, Recall: 0.9958, F1 Score: 0.9415
Training with: 2 hidden layers, learning rate = 0.5, optimizer = adam, activation = relu


<keras.src.callbacks.history.History at 0x1b6e221e060>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5151, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 2 hidden layers, learning rate = 0.5, optimizer = adam, activation = tanh


<keras.src.callbacks.history.History at 0x1b6ecf4fc50>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Validation Loss: 0.5140, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 2 hidden layers, learning rate = 0.5, optimizer = adam, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6ef0ae330>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5151, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 2 hidden layers, learning rate = 0.5, optimizer = rmsprop, activation = relu


<keras.src.callbacks.history.History at 0x1b6ef0cf4a0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5151, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 2 hidden layers, learning rate = 0.5, optimizer = rmsprop, activation = tanh


<keras.src.callbacks.history.History at 0x1b6ef81d820>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.8260, Validation Accuracy: 0.2100
Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Training with: 2 hidden layers, learning rate = 0.5, optimizer = rmsprop, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6ee01c1d0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Validation Loss: 0.4167, Validation Accuracy: 0.8633
Precision: 0.8534, Recall: 0.9986, F1 Score: 0.9203
Training with: 2 hidden layers, learning rate = 0.5, optimizer = adagrad, activation = relu


<keras.src.callbacks.history.History at 0x1b6e3c7d430>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3165, Validation Accuracy: 0.9028
Precision: 0.8934, Recall: 0.9958, F1 Score: 0.9418
Training with: 2 hidden layers, learning rate = 0.5, optimizer = adagrad, activation = tanh


<keras.src.callbacks.history.History at 0x1b6e21b8890>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3145, Validation Accuracy: 0.9039
Precision: 0.8915, Recall: 1.0000, F1 Score: 0.9427
Training with: 2 hidden layers, learning rate = 0.5, optimizer = adagrad, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6dc203920>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3157, Validation Accuracy: 0.9033
Precision: 0.8934, Recall: 0.9965, F1 Score: 0.9422
Training with: 3 hidden layers, learning rate = 0.01, optimizer = sgd, activation = relu


<keras.src.callbacks.history.History at 0x1b6ea0bd580>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4362, Validation Accuracy: 0.8089
Precision: 0.8052, Recall: 1.0000, F1 Score: 0.8921
Training with: 3 hidden layers, learning rate = 0.01, optimizer = sgd, activation = tanh


<keras.src.callbacks.history.History at 0x1b6eb596180>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4796, Validation Accuracy: 0.7900
Precision: 0.7903, Recall: 0.9993, F1 Score: 0.8826
Training with: 3 hidden layers, learning rate = 0.01, optimizer = sgd, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6ecb20920>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Validation Loss: 0.5150, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 3 hidden layers, learning rate = 0.01, optimizer = adam, activation = relu


<keras.src.callbacks.history.History at 0x1b6e76adc10>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3250, Validation Accuracy: 0.9028
Precision: 0.8924, Recall: 0.9972, F1 Score: 0.9419
Training with: 3 hidden layers, learning rate = 0.01, optimizer = adam, activation = tanh


<keras.src.callbacks.history.History at 0x1b6ecd292e0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3880, Validation Accuracy: 0.8939
Precision: 0.8893, Recall: 0.9887, F1 Score: 0.9364
Training with: 3 hidden layers, learning rate = 0.01, optimizer = adam, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6f1ff7bf0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3175, Validation Accuracy: 0.8994
Precision: 0.8905, Recall: 0.9951, F1 Score: 0.9399
Training with: 3 hidden layers, learning rate = 0.01, optimizer = rmsprop, activation = relu


<keras.src.callbacks.history.History at 0x1b6f3504530>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3263, Validation Accuracy: 0.9000
Precision: 0.8920, Recall: 0.9937, F1 Score: 0.9401
Training with: 3 hidden layers, learning rate = 0.01, optimizer = rmsprop, activation = tanh


<keras.src.callbacks.history.History at 0x1b6f487a5d0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3471, Validation Accuracy: 0.8994
Precision: 0.8900, Recall: 0.9958, F1 Score: 0.9399
Training with: 3 hidden layers, learning rate = 0.01, optimizer = rmsprop, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6f4c74ad0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3058, Validation Accuracy: 0.9022
Precision: 0.8928, Recall: 0.9958, F1 Score: 0.9415
Training with: 3 hidden layers, learning rate = 0.01, optimizer = adagrad, activation = relu


<keras.src.callbacks.history.History at 0x1b6eba669f0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3999, Validation Accuracy: 0.8722
Precision: 0.8625, Recall: 0.9972, F1 Score: 0.9250
Training with: 3 hidden layers, learning rate = 0.01, optimizer = adagrad, activation = tanh


<keras.src.callbacks.history.History at 0x1b6df9f85f0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3746, Validation Accuracy: 0.8900
Precision: 0.8811, Recall: 0.9951, F1 Score: 0.9346
Training with: 3 hidden layers, learning rate = 0.01, optimizer = adagrad, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6d39fdf40>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5148, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 3 hidden layers, learning rate = 0.1, optimizer = sgd, activation = relu


<keras.src.callbacks.history.History at 0x1b6d2226990>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3549, Validation Accuracy: 0.8989
Precision: 0.8880, Recall: 0.9979, F1 Score: 0.9397
Training with: 3 hidden layers, learning rate = 0.1, optimizer = sgd, activation = tanh


<keras.src.callbacks.history.History at 0x1b6dfbb9af0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3138, Validation Accuracy: 0.9028
Precision: 0.8924, Recall: 0.9972, F1 Score: 0.9419
Training with: 3 hidden layers, learning rate = 0.1, optimizer = sgd, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6f09da7b0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5141, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 3 hidden layers, learning rate = 0.1, optimizer = adam, activation = relu


<keras.src.callbacks.history.History at 0x1b6e4e078f0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3277, Validation Accuracy: 0.8983
Precision: 0.8928, Recall: 0.9902, F1 Score: 0.9390
Training with: 3 hidden layers, learning rate = 0.1, optimizer = adam, activation = tanh


<keras.src.callbacks.history.History at 0x1b6f1dbd7c0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3866, Validation Accuracy: 0.9033
Precision: 0.8920, Recall: 0.9986, F1 Score: 0.9423
Training with: 3 hidden layers, learning rate = 0.1, optimizer = adam, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6f72504a0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3473, Validation Accuracy: 0.8928
Precision: 0.8907, Recall: 0.9852, F1 Score: 0.9356
Training with: 3 hidden layers, learning rate = 0.1, optimizer = rmsprop, activation = relu


<keras.src.callbacks.history.History at 0x1b6f76248c0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.4504, Validation Accuracy: 0.8978
Precision: 0.8883, Recall: 0.9958, F1 Score: 0.9390
Training with: 3 hidden layers, learning rate = 0.1, optimizer = rmsprop, activation = tanh


<keras.src.callbacks.history.History at 0x1b6f8b154c0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3363, Validation Accuracy: 0.9011
Precision: 0.8902, Recall: 0.9979, F1 Score: 0.9410
Training with: 3 hidden layers, learning rate = 0.1, optimizer = rmsprop, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6f9ec79b0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3337, Validation Accuracy: 0.8956
Precision: 0.8842, Recall: 0.9986, F1 Score: 0.9379
Training with: 3 hidden layers, learning rate = 0.1, optimizer = adagrad, activation = relu


<keras.src.callbacks.history.History at 0x1b6fa278e60>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3144, Validation Accuracy: 0.9022
Precision: 0.8928, Recall: 0.9958, F1 Score: 0.9415
Training with: 3 hidden layers, learning rate = 0.1, optimizer = adagrad, activation = tanh


<keras.src.callbacks.history.History at 0x1b6f1d5dc70>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3090, Validation Accuracy: 0.9017
Precision: 0.8913, Recall: 0.9972, F1 Score: 0.9413
Training with: 3 hidden layers, learning rate = 0.1, optimizer = adagrad, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6df8d0980>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3829, Validation Accuracy: 0.8822
Precision: 0.8725, Recall: 0.9965, F1 Score: 0.9304
Training with: 3 hidden layers, learning rate = 0.5, optimizer = sgd, activation = relu


<keras.src.callbacks.history.History at 0x1b6e0edb5f0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3177, Validation Accuracy: 0.8967
Precision: 0.8843, Recall: 1.0000, F1 Score: 0.9386
Training with: 3 hidden layers, learning rate = 0.5, optimizer = sgd, activation = tanh


<keras.src.callbacks.history.History at 0x1b6ef0716d0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3172, Validation Accuracy: 0.8978
Precision: 0.8869, Recall: 0.9979, F1 Score: 0.9391
Training with: 3 hidden layers, learning rate = 0.5, optimizer = sgd, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6eb608470>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3370, Validation Accuracy: 0.9017
Precision: 0.8922, Recall: 0.9958, F1 Score: 0.9412
Training with: 3 hidden layers, learning rate = 0.5, optimizer = adam, activation = relu


<keras.src.callbacks.history.History at 0x1b6f360e270>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5151, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 3 hidden layers, learning rate = 0.5, optimizer = adam, activation = tanh


<keras.src.callbacks.history.History at 0x1b6f613e6c0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5153, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 3 hidden layers, learning rate = 0.5, optimizer = adam, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6f60876b0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Validation Loss: 0.3051, Validation Accuracy: 0.9050
Precision: 0.8931, Recall: 0.9993, F1 Score: 0.9432
Training with: 3 hidden layers, learning rate = 0.5, optimizer = rmsprop, activation = relu


<keras.src.callbacks.history.History at 0x1b6fc5be480>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5151, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 3 hidden layers, learning rate = 0.5, optimizer = rmsprop, activation = tanh


<keras.src.callbacks.history.History at 0x1b6fc97dbe0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Validation Loss: 0.5366, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 3 hidden layers, learning rate = 0.5, optimizer = rmsprop, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6fdd41be0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.5254, Validation Accuracy: 0.7900
Precision: 0.7900, Recall: 1.0000, F1 Score: 0.8827
Training with: 3 hidden layers, learning rate = 0.5, optimizer = adagrad, activation = relu


<keras.src.callbacks.history.History at 0x1b6fe123500>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Validation Loss: 0.3108, Validation Accuracy: 0.9033
Precision: 0.8925, Recall: 0.9979, F1 Score: 0.9422
Training with: 3 hidden layers, learning rate = 0.5, optimizer = adagrad, activation = tanh


<keras.src.callbacks.history.History at 0x1b6ff4a33b0>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3168, Validation Accuracy: 0.9022
Precision: 0.8913, Recall: 0.9979, F1 Score: 0.9416
Training with: 3 hidden layers, learning rate = 0.5, optimizer = adagrad, activation = sigmoid


<keras.src.callbacks.history.History at 0x1b6f2014e90>

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation Loss: 0.3099, Validation Accuracy: 0.9033
Precision: 0.8929, Recall: 0.9972, F1 Score: 0.9422
Best Validation Accuracy: 0.9050 with parameters: {'num_hidden_layers': 3, 'learning_rate': 0.5, 'optimizer': 'adam', 'activation': 'sigmoid'}


In [17]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from keras import models, layers
import tensorflow as tf
import random
from sklearn.model_selection import train_test_split

# Set seeds for reproducibility
random.seed(999)
tf.random.set_seed(999)
np.random.seed(999)

from sklearn.preprocessing import StandardScaler
df_age_scaled = df_age.copy()
X = df_age_scaled.drop('LUNG_CANCER', axis=1).values
y = df_age_scaled['LUNG_CANCER'].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=999)


model = models.Sequential([
    layers.Input(shape=(15,)),                 # Input layer for 15 features
    layers.Dense(8, activation='sigmoid'),  
    layers.Dense(8, activation='sigmoid'),
    layers.Dense(1, activation='sigmoid'),
])

# Compile the model
opt = tf.keras.optimizers.Adagrad(learning_rate=0.5)
model.compile(optimizer=opt, 
              loss='binary_crossentropy',   # Loss function for binary-class classification
              metrics=['accuracy'])              # Track accuracy as performance metric during training

# Train the model for 2000 epochs (each epoch uses the full training set, i.e. BGD)
model.fit(X_train, y_train, epochs=2000, batch_size=len(X_train), verbose=0)

# Display optimized weights and biases after training (useful for analysis)
for layer in model.layers:
    weights, biases = layer.get_weights()
    print(f"Weights for {layer.name}:\n{weights}\nBiases:\n{biases}")

# Evaluate model on validation data (i.e., calculate loss function & accuracy performance metric of fitted model on validation data)
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

# Get predicted probabilities and convert to binary predictions
y_pred_prob = model.predict(X_val)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calculate precision, recall, and F1-score
precision = precision_score(y_val, y_pred, zero_division=0)
recall = recall_score(y_val, y_pred, zero_division=0)
f1 = f1_score(y_val, y_pred, zero_division=0)

# Display metrics for this model
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

<keras.src.callbacks.history.History at 0x28b326bc950>

Weights for dense_18:
[[ 2.91461408e-01 -1.32055506e-01  4.60511237e-01 -1.79489717e-01
   3.69956672e-01 -3.73686647e+00  2.97818005e-01  3.04175973e-01]
 [-3.68170023e-01  2.40529060e-01 -4.39015597e-01 -2.00770527e-01
  -4.31229204e-01  3.54164317e-02 -4.36963737e-01  3.12815726e-01]
 [-2.62970269e-01  3.02383341e-02 -3.83269161e-01 -4.20703590e-01
   5.08247197e-01  1.41545677e+00 -3.02256495e-01  1.66263580e-01]
 [ 1.01934791e-01 -9.24689919e-02  3.47698405e-02 -2.64150470e-01
  -6.79847598e-02  5.91204357e+00  1.13648772e-01 -4.53297913e-01]
 [-7.05936551e-02  3.34141076e-01 -3.63175452e-01 -3.12635064e-01
   2.89185107e-01 -3.13429683e-01  1.39420211e-01  4.00422394e-01]
 [-4.48926002e-01 -1.31803170e-01  1.17181428e-02  4.24719244e-01
  -4.99302268e-01  8.71758699e-01  2.71649361e-02  5.92690930e-02]
 [ 2.01948345e-01 -2.95041502e-01  3.30424696e-01 -1.86098725e-01
  -4.02902663e-01 -3.17089647e-01  3.27551603e-01 -2.68281519e-01]
 [ 8.28257799e-02  3.19959313e-01 -5.25905713e-

In [19]:
model = models.Sequential([
    layers.Input(shape=(15,)),                 # Input layer for 15 features
    layers.Dense(8, activation='relu'),  
    layers.Dense(8, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid'),
])

# Compile the model
opt = tf.keras.optimizers.RMSprop(learning_rate=0.5)
model.compile(optimizer=opt, 
              loss='binary_crossentropy',   # Loss function for binary-class classification
              metrics=['accuracy'])              # Track accuracy as performance metric during training

# Train the model for 2000 epochs (each epoch uses the full training set, i.e. BGD)
model.fit(X_train, y_train, epochs=2000, batch_size=len(X_train), verbose=0)

# Display optimized weights and biases after training (useful for analysis)
for layer in model.layers:
    weights, biases = layer.get_weights()
    print(f"Weights for {layer.name}:\n{weights}\nBiases:\n{biases}")

# Evaluate model on validation data (i.e., calculate loss function & accuracy performance metric of fitted model on validation data)
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

# Get predicted probabilities and convert to binary predictions
y_pred_prob = model.predict(X_val)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calculate precision, recall, and F1-score
precision = precision_score(y_val, y_pred, zero_division=0)
recall = recall_score(y_val, y_pred, zero_division=0)
f1 = f1_score(y_val, y_pred, zero_division=0)

# Display metrics for this model
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

<keras.src.callbacks.history.History at 0x28b30dfe960>

Weights for dense_25:
[[-4.13610101e-01 -1.07718921e+00  2.67355323e-01 -1.75665915e-01
   6.90819621e-02  1.01803541e-01 -2.37419933e-01 -1.03221989e+00]
 [-5.78494892e-02 -1.23818922e+00 -3.87658238e-01 -7.60924220e-02
  -2.18136638e-01 -3.69764388e-01 -9.38527435e-02 -1.07158446e+00]
 [-6.83042765e-01 -8.74702394e-01 -9.65039432e-02  4.01674688e-01
  -1.11358583e-01 -2.90986478e-01  4.73052114e-02 -9.23569560e-01]
 [-1.86836398e+00  4.35417086e-01  2.56793320e-01 -4.63939309e-01
   4.88798082e-01 -2.71769583e-01 -2.68443799e+00  1.01668358e+00]
 [-5.61010361e-01 -1.35631454e+00 -1.06856495e-01  1.12082779e-01
  -3.21099818e-01 -2.07663476e-02  8.04104507e-02 -1.09161508e+00]
 [-1.15155399e+00 -1.57637930e+00 -1.09778196e-01 -4.38383013e-01
   2.66855061e-01  5.60151935e-02 -8.69712234e-02 -6.54828310e-01]
 [-9.38837230e-01 -9.93765533e-01  2.06665993e-01 -3.04169297e-01
   2.37070322e-02 -3.03938419e-01 -8.43465328e-04 -1.03611052e+00]
 [-8.94713759e-01 -1.37599802e+00  4.69580710e-

### Method 1) try to get feature importance, then determine optimal k
### Method 2) will try the other way as well. determine optimal k, then feature importance
### what is permutation importance?  The permutation feature importance is defined to be the decrease in a model score when a single feature value is randomly shuffled 1.

##### Method 1

In [7]:
df_age.columns

Index(['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY',
       'PEER_PRESSURE', 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',
       'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',
       'SWALLOWING DIFFICULTY', 'CHEST PAIN', 'LUNG_CANCER'],
      dtype='object')

In [9]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split

X = df_age[['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY',
       'PEER_PRESSURE', 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',
       'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',
       'SWALLOWING DIFFICULTY', 'CHEST PAIN']]
y = df_age['LUNG_CANCER'].values

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=888)

# Fit the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Apply permutation importance
result = permutation_importance(knn, X_test, y_test, n_repeats=10, random_state=888)

# Display importance
importance = result.importances_mean
print(importance)

[ 0.01772222  0.00994444  0.00655556  0.03572222 -0.00466667  0.00205556
 -0.00116667  0.00138889  0.005       0.01094444  0.03194444 -0.00094444
  0.00077778  0.00833333 -0.00244444]


In [17]:
feature_names = ['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY',
       'PEER_PRESSURE', 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',
       'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',
       'SWALLOWING DIFFICULTY', 'CHEST PAIN']

df_feature_importance = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importance
})

# Display the DataFrame
df_feature_importance.sort_values(by = 'Importance', ascending = False)

Unnamed: 0,Feature,Importance
3,YELLOW_FINGERS,0.035722
10,ALCOHOL CONSUMING,0.031944
0,GENDER,0.017722
9,WHEEZING,0.010944
1,AGE,0.009944
13,SWALLOWING DIFFICULTY,0.008333
2,SMOKING,0.006556
8,ALLERGY,0.005
5,PEER_PRESSURE,0.002056
7,FATIGUE,0.001389


##### Order of importance: yellow fingers, alcohol consuming, gender, wheezing, age
kiv swallowing difficulty, smoking, allergy, peer pressure, fatigue, shortness of breath
negative variables: coughing, chronic disease, chest pain, anxiety, can try to exclude these first, might be negatively affecting model accuracy