In [None]:
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.datasets import make_classification
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input
from keras.utils import to_categorical
from ucimlrepo import fetch_ucirepo 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix


In [2]:
dry_bean = fetch_ucirepo(id=602) # fetch dataset
X = dry_bean.data.features # data (as pandas dataframes) 
y = dry_bean.data.targets 

# print("--- Loading Data ---")
# print(f"Shape of features (X): {X.shape}")
# print(f"Shape of target (y): {y.shape}")
# print("\nUnique classes:", y['Class'].unique())
# print(f"\nNumber of unique classes: {y['Class'].nunique()}")

In [3]:
onehot_encoder = OneHotEncoder(sparse_output=False)
y_onehot = onehot_encoder.fit_transform(y)

# print("\n--- Data Encoding ---")
# print("Shape of one-hot encoded y:", y_onehot.shape)
# print("Example of original y:", y.iloc[0].values)
# print("Example of one-hot encoded y:", y_onehot[0])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_onehot, test_size=0.2, random_state=42, stratify=y
)

# print("\n--- Data Splitting ---")
# print("Training features shape:", X_train.shape)
# print("Testing features shape:", X_test.shape)
# print("Training labels shape:", y_train.shape)
# print("Testing labels shape:", y_test.shape)

In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# print("\n--- Feature Scaling ---")
# print(f"Mean of a feature in scaled training data (should be ~0): {X_train_scaled[:, 0].mean():.4f}")
# print(f"Std dev of a feature in scaled training data (should be ~1): {X_train_scaled[:, 0].std():.4f}")
# print(f"Mean of a feature in scaled test data (will not be exactly 0): {X_test_scaled[:, 0].mean():.4f}")


In [6]:
input_features = X_train_scaled.shape[1] 
output_classes = y_train.shape[1]
model = Sequential()
model.add(Dense(128, input_shape=(input_features,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(output_classes, activation='softmax'))

# print("\n--- Model Definition ---")
# model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# -------------------- Model Compilation -------------------- #
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# print("\n--- Model Compilation ---")
# print(f"Optimizer: {model.optimizer.get_config()['name']}")
# print(f"Loss Function: {model.loss}")
# print(f"Metrics: {model.metrics_names}")



In [8]:
# -------------------- Model Training -------------------- #
epochs = 50
batch_size = 32
print("\n--- Starting Model Training ---")
history = model.fit(
    X_train_scaled,
    y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_test_scaled, y_test),
    verbose=1
)
print("\n--- Model Training Finished ---")


--- Starting Model Training ---
Epoch 1/50
[1m341/341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8398 - loss: 0.4897 - val_accuracy: 0.9108 - val_loss: 0.2368
Epoch 2/50
[1m341/341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9186 - loss: 0.2415 - val_accuracy: 0.9185 - val_loss: 0.2143
Epoch 3/50
[1m341/341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9211 - loss: 0.2302 - val_accuracy: 0.9251 - val_loss: 0.2112
Epoch 4/50
[1m341/341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9254 - loss: 0.2202 - val_accuracy: 0.9225 - val_loss: 0.2108
Epoch 5/50
[1m341/341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9264 - loss: 0.2133 - val_accuracy: 0.9251 - val_loss: 0.2095
Epoch 6/50
[1m341/341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9271 - loss: 0.2057 - val_accuracy: 0.9229 - val_loss: 0.

In [9]:
# Evaluate the model on the test data using `evaluate()`
print("\n--- Evaluating Model on Test Data ---")
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy*100:.2f}%")


--- Evaluating Model on Test Data ---
Test Loss: 0.2018
Test Accuracy: 92.80%


In [10]:
# Predict class probabilities
y_pred_proba = model.predict(X_test_scaled)

# Convert one-hot back to class indices
y_true = np.argmax(y_test, axis=1)
y_pred = np.argmax(y_pred_proba, axis=1)

print("\n--- Classification Report ---")
print(classification_report(y_true, y_pred))

print("\n--- Confusion Matrix ---")
print(confusion_matrix(y_true, y_pred))

[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  

--- Classification Report ---
              precision    recall  f1-score   support

           0       0.92      0.93      0.93       265
           1       1.00      1.00      1.00       104
           2       0.96      0.94      0.95       326
           3       0.92      0.92      0.92       709
           4       0.96      0.96      0.96       386
           5       0.93      0.97      0.95       406
           6       0.88      0.87      0.87       527

    accuracy                           0.93      2723
   macro avg       0.94      0.94      0.94      2723
weighted avg       0.93      0.93      0.93      2723


--- Confusion Matrix ---
[[246   0  11   1   0   1   6]
 [  0 104   0   0   0   0   0]
 [ 10   0 307   0   5   2   2]
 [  0   0   0 651   2  16  40]
 [  2   0   2   5 370   0   7]
 [  3   0   0   3   0 392   8]
 [  5   0   0  45  10  10 457]]
