In [129]:
%pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


## **Import Required Libraries**

In [130]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report,accuracy_score,f1_score,precision_score,recall_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,BatchNormalization
from tensorflow.keras.utils import to_categorical

## **Load the Datasets**

In [131]:
train_data=pd.read_csv("C:\\UOC pdf\\4th Year\\Machine Learning-02\\Data-Analysis-Project-2\\data\\train_data.csv")
test_data=pd.read_csv("C:\\UOC pdf\\4th Year\\Machine Learning-02\\Data-Analysis-Project-2\\data\\test_data.csv")

In [132]:
train_data=train_data.drop(columns=["Unnamed: 0"],axis=1)
test_data=test_data.drop(columns=["Unnamed: 0"],axis=1)

## **Split X_train,X_test,Y_train,Y_test**

In [133]:
X_train=train_data.drop(columns=["diagnosis"],axis=1)
Y_train=train_data["diagnosis"]
X_test=test_data.drop(columns=["diagnosis"],axis=1)
Y_test=test_data["diagnosis"]

In [134]:
print(f"Shape of the Training set:{X_train.shape,Y_train.shape}")
print(f"Shape of the Testing set:{X_test.shape,Y_test.shape}")

Shape of the Training set:((700, 15), (700,))
Shape of the Testing set:((300, 15), (300,))


## **Build the Pipeline**

In [135]:
num_cols=X_train.select_dtypes(include="number").columns  ##get the all numerical column names
cat_cols=X_train.select_dtypes(include=["object","category"]).columns ##get the all categorical column names

In [136]:
nominal_cols=["gender","pem_present","meditation_or_mindfulness"] ##nominal columns
ordinal_cols=["work_status","social_activity_level","exercise_frequency"] ##ordinal columns

#### **Define the Pipeline**

In [137]:
numerical_pipeline=Pipeline(steps=[
    ("scaler",StandardScaler())]) ##numerical pipeline

ordinal_pipeline=Pipeline(steps=[
    ("Ordinal Encoder",OrdinalEncoder())]) ##ordinal pipeline

nominal_pipeline=Pipeline(steps=[
    ("One-Hot Encoder",OneHotEncoder(sparse_output=False,handle_unknown="ignore"))]) ##nominal pipeline

#### **Combine Pipeline**

In [138]:
preprocessors=ColumnTransformer(transformers=[
    ("Numerical Pipeline",numerical_pipeline,num_cols),
    ("Ordinal Pipeline",ordinal_pipeline,ordinal_cols),
    ("Nominal Pipeline",nominal_pipeline,nominal_cols)
],
    remainder="drop",
    n_jobs=-1,
    verbose=True
)

#### **Execute the Pipeline**

In [139]:
X_train_encoded=preprocessors.fit_transform(X_train)
X_test_encoded=preprocessors.transform(X_test)

In [140]:
# Get feature names after transformation
feature_names = preprocessors.get_feature_names_out()
# Convert encoded array to DataFrame with feature names
X_train_encoded_df = pd.DataFrame(X_train_encoded, columns=feature_names)
X_test_encoded_df = pd.DataFrame(X_test_encoded, columns=feature_names)

## **Apply SMOTE**

In [141]:
##Apply SMOTE on the preprocessed training data
sm = SMOTE(random_state=42)
X_train_sm, y_train_sm = sm.fit_resample(X_train_encoded_df, Y_train)



In [144]:
y_train_sm=pd.DataFrame(y_train_sm,columns=["diagnosis"])

## **Prepare the Target Variable**

In [145]:
## Prepare target variable
num_classes = len(np.unique(y_train_sm))
if y_train_sm["diagnosis"].dtype == "object" or y_train_sm["diagnosis"].dtype.name == "category":
    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()
    y_train_sm["diagnosis"] = le.fit_transform(y_train_sm["diagnosis"])
    Y_test = le.transform(Y_test)

# Convert to one-hot for multiclass
if num_classes > 2:
    y_train_cat = to_categorical(y_train_sm, num_classes)
    y_test_cat = to_categorical(Y_test, num_classes)
else:
    y_train_cat = y_train_sm
    y_test_cat = Y_test

## **Build the ANN model**

In [146]:
model = Sequential()

# Input layer
model.add(Dense(128, input_shape=(X_train_sm.shape[1],)))
model.add(BatchNormalization())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

# Hidden layer
model.add(Dense(32))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

# Output layer
model.add(Dense(num_classes if num_classes > 2 else 1, activation='softmax' if num_classes > 2 else 'sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## **Compile the ANN**

In [147]:
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy" if num_classes > 2 else "binary_crossentropy",
    metrics=["accuracy"]
)

## **Train the Model**

In [148]:
 ##Train the model
history = model.fit(
    X_train_sm,
    y_train_cat,
    validation_data=(X_test_encoded_df,y_test_cat),
    epochs=50,
    batch_size=32,
    verbose=1,
)

Epoch 1/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.4448 - loss: 1.1236 - val_accuracy: 0.8100 - val_loss: 0.7382
Epoch 2/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7379 - loss: 0.6375 - val_accuracy: 0.8967 - val_loss: 0.4803
Epoch 3/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8126 - loss: 0.4417 - val_accuracy: 0.9300 - val_loss: 0.3121
Epoch 4/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8798 - loss: 0.3193 - val_accuracy: 0.9333 - val_loss: 0.2509
Epoch 5/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8748 - loss: 0.3253 - val_accuracy: 0.9367 - val_loss: 0.2138
Epoch 6/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8732 - loss: 0.2775 - val_accuracy: 0.9200 - val_loss: 0.1992
Epoch 7/50
[1m26/26[0m [32m━━━━━━━━━

## **Model Evaluation**

In [149]:
# Get raw predictions
y_pred_probs = model.predict(X_test_encoded_df)
y_pred_train_probs = model.predict(X_train_sm)

# Convert probabilities to class labels
if num_classes > 2:
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_pred_train = np.argmax(y_pred_train_probs, axis=1)
else:
    y_pred = (y_pred_probs > 0.5).astype(int).flatten()
    y_pred_train = (y_pred_train_probs > 0.5).astype(int).flatten()

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


#### **Get the Evaluation Metrics for Testing Data**

In [150]:
f1_test=f1_score(Y_test,y_pred,average='weighted') ##F1 score for testing data
precision_test=precision_score(Y_test,y_pred,average="weighted") ##precision for testing data
accuracy_test=accuracy_score(Y_test,y_pred) ## accuracy score for testing data
recall_test=recall_score(Y_test,y_pred,average="weighted") ## recall score for testing data

In [151]:
# Print results
print(f"Accuracy:  {accuracy_test:.4f}")
print(f"Precision: {precision_test:.4f}")
print(f"F1 Score:  {f1_test:.4f}")
print(f"Recall Score: {recall_test:.4f}")

Accuracy:  0.9300
Precision: 0.9302
F1 Score:  0.9276
Recall Score: 0.9300


#### **Get the Evaluation Metrics for Training Data**

In [152]:
f1_train=f1_score(y_train_sm, y_pred_train,average="weighted") ##F1 score for training data
precision_train=precision_score(y_train_sm, y_pred_train,average="weighted") ## precision score for training data
accuracy_train=accuracy_score(y_train_sm, y_pred_train) ## accuracy score for training data
recall_train=recall_score(y_train_sm, y_pred_train,average="weighted") ##recall score for training data

In [153]:
# Print results
print(f"Accuracy:  {accuracy_train:.4f}")
print(f"Precision: {precision_train:.4f}")
print(f"F1 Score:  {f1_train:.4f}")
print(f"Recall Score:{recall_train:.4f}")

Accuracy:  0.9758
Precision: 0.9769
F1 Score:  0.9758
Recall Score:0.9758


## **Model Deployment using Mlflow**

In [154]:
import mlflow

In [155]:
mlflow.set_tracking_uri("http://127.0.0.1:5000/")
mlflow.set_experiment("Artificial-Neural-Network-Classifier")

with mlflow.start_run(run_name="Artificial Neural Network Classifier Hyperparameter Tune"):
    ## Hyperparameters
    mlflow.log_param("Epochs", 50)
    mlflow.log_param("Batch size", 32)
    mlflow.log_param("Dropout", 0.3)
    mlflow.log_param("Activation Functions", "relu,softmax")

    ### Validation metrics
    mlflow.log_metric("f1_score_test", f1_test)
    mlflow.log_metric("precision_test", precision_test)
    mlflow.log_metric("accuracy_test", accuracy_test)
    mlflow.log_metric("recall_test", recall_test)

    # Log the model
    mlflow.keras.log_model(model, "Aritificial_Neural_Network_Classifier")

2025/07/25 22:42:24 INFO mlflow.tracking.fluent: Experiment with name 'Artificial-Neural-Network-Classifier' does not exist. Creating a new experiment.


🏃 View run Artificial Neural Network Classifier Hyperparameter Tune at: http://127.0.0.1:5000/#/experiments/517660236777405868/runs/2669c079a82d435d8d8ef30319902730
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/517660236777405868
