
# Assignment: ANN



In [15]:

!pip -q install tensorflow keras-tuner

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Keras Tuner for structured hyperparameter tuning
import keras_tuner as kt

print("TensorFlow version:", tf.__version__)


TensorFlow version: 2.19.0


In [16]:
df = pd.read_csv('sonardataset.csv')
print("Shape:", df.shape)
df.head()

Shape: (208, 61)


Unnamed: 0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,x_10,...,x_52,x_53,x_54,x_55,x_56,x_57,x_58,x_59,x_60,Y
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [17]:
n_rows, n_cols = df.shape
print("Number of samples:", n_rows)
print("Number of columns:", n_cols)

target_col_name = 'Y'
print("Target column name:", target_col_name)

print("\nClass counts:")
print(df[target_col_name].value_counts())

Number of samples: 208
Number of columns: 61
Target column name: Y

Class counts:
Y
M    111
R     97
Name: count, dtype: int64


In [18]:
print("Missing values per column (first 10 columns):")
print(df.isna().sum().head(10))

print("\nTotal missing values:", df.isna().sum().sum())


Missing values per column (first 10 columns):
x_1     0
x_2     0
x_3     0
x_4     0
x_5     0
x_6     0
x_7     0
x_8     0
x_9     0
x_10    0
dtype: int64

Total missing values: 0


In [19]:
X = df.iloc[:, :target_col].copy()
y = df.iloc[:, target_col].copy()

print("X shape:", X.shape)
print("y shape:", y.shape)
X.head()


X shape: (208, 60)
y shape: (208,)


Unnamed: 0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,x_10,...,x_51,x_52,x_53,x_54,x_55,x_56,x_57,x_58,x_59,x_60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0232,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0125,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0033,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0241,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0156,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094


In [20]:
le = LabelEncoder()
y_enc = le.fit_transform(y)

print("Label mapping:", dict(zip(le.classes_, le.transform(le.classes_))))
print("Encoded y sample:", y_enc[:10])


Label mapping: {'M': np.int64(0), 'R': np.int64(1)}
Encoded y sample: [1 1 1 1 1 1 1 1 1 1]


In [21]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_enc, test_size=0.2, random_state=42, stratify=y_enc
)

print("Train:", X_train.shape, "Test:", X_test.shape)


Train: (166, 60) Test: (42, 60)


In [22]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Scaled sample (first row, first 10 features):")
print(X_train_scaled[0][:10])


Scaled sample (first row, first 10 features):
[ 0.92218084 -0.30070762  0.49511043  2.0668377   2.08665107  1.43764398
  1.6350301  -0.66867259 -0.44725823 -0.01794878]


# 2) Baseline ANN Model

In [23]:
def build_baseline_model(input_dim):
    model = keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid")
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model

baseline_model = build_baseline_model(X_train_scaled.shape[1])
baseline_model.summary()


In [24]:
early_stop = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = baseline_model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=16,
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 89ms/step - accuracy: 0.4984 - loss: 0.7670 - val_accuracy: 0.5294 - val_loss: 0.6897
Epoch 2/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.5816 - loss: 0.6545 - val_accuracy: 0.5882 - val_loss: 0.6263
Epoch 3/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 64ms/step - accuracy: 0.6717 - loss: 0.5974 - val_accuracy: 0.6471 - val_loss: 0.5739
Epoch 4/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.7689 - loss: 0.4975 - val_accuracy: 0.7353 - val_loss: 0.5291
Epoch 5/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.7807 - loss: 0.4941 - val_accuracy: 0.7647 - val_loss: 0.4929
Epoch 6/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.8277 - loss: 0.4213 - val_accuracy: 0.7647 - val_loss: 0.4601
Epoch 7/100
[1m9/9[0m [32m━━━━━━━━━━━

In [25]:
y_prob_base = baseline_model.predict(X_test_scaled).ravel()
y_pred_base = (y_prob_base >= 0.5).astype(int)

acc_base = accuracy_score(y_test, y_pred_base)
prec_base = precision_score(y_test, y_pred_base)
rec_base = recall_score(y_test, y_pred_base)
f1_base = f1_score(y_test, y_pred_base)

print("Baseline Metrics")
print("Accuracy :", acc_base)
print("Precision:", prec_base)
print("Recall   :", rec_base)
print("F1-score :", f1_base)

print("\nClassification Report:\n", classification_report(y_test, y_pred_base, target_names=le.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_base))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
Baseline Metrics
Accuracy : 0.8333333333333334
Precision: 0.9333333333333333
Recall   : 0.7
F1-score : 0.8

Classification Report:
               precision    recall  f1-score   support

           M       0.78      0.95      0.86        22
           R       0.93      0.70      0.80        20

    accuracy                           0.83        42
   macro avg       0.86      0.83      0.83        42
weighted avg       0.85      0.83      0.83        42

Confusion Matrix:
 [[21  1]
 [ 6 14]]


# 3) Hyperparameter Tuning (Random Search via KerasTuner)

In [26]:
def build_tunable_model(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=(X_train_scaled.shape[1],)))

    num_layers = hp.Int("num_layers", min_value=1, max_value=3, step=1)
    activation = hp.Choice("activation", values=["relu", "tanh"])

    for i in range(num_layers):
        units = hp.Int(f"units_{i}", min_value=16, max_value=128, step=16)
        model.add(layers.Dense(units, activation=activation))

        dropout_rate = hp.Choice(f"dropout_{i}", values=[0.0, 0.1, 0.2, 0.3])
        if dropout_rate > 0:
            model.add(layers.Dropout(dropout_rate))

    model.add(layers.Dense(1, activation="sigmoid"))

    lr = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model


In [27]:
tuner = kt.RandomSearch(
    build_tunable_model,
    objective="val_accuracy",
    max_trials=10,
    executions_per_trial=1,
    directory="kt_dir",
    project_name="sonar_ann"
)

tuner.search(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=60,
    batch_size=16,
    callbacks=[early_stop],
    verbose=1
)

best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters:")
for k in best_hp.values.keys():
    print(f"  {k}: {best_hp.get(k)}")


Trial 10 Complete [00h 00m 14s]
val_accuracy: 0.8823529481887817

Best val_accuracy So Far: 0.9117646813392639
Total elapsed time: 00h 01m 44s
Best Hyperparameters:
  num_layers: 1
  activation: relu
  units_0: 48
  dropout_0: 0.1
  learning_rate: 0.001


In [28]:
tuned_model = tuner.hypermodel.build(best_hp)
tuned_model.summary()

history_tuned = tuned_model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=16,
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.4452 - loss: 0.8842 - val_accuracy: 0.2647 - val_loss: 0.9415
Epoch 2/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5553 - loss: 0.7445 - val_accuracy: 0.3529 - val_loss: 0.7433
Epoch 3/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7331 - loss: 0.5800 - val_accuracy: 0.6471 - val_loss: 0.6259
Epoch 4/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8002 - loss: 0.5055 - val_accuracy: 0.6765 - val_loss: 0.5592
Epoch 5/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7710 - loss: 0.5043 - val_accuracy: 0.7647 - val_loss: 0.5188
Epoch 6/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7824 - loss: 0.4922 - val_accuracy: 0.7647 - val_loss: 0.4925
Epoch 7/100
[1m9/9[0m [32m━━━━━━━━━━━

In [29]:
y_prob_tuned = tuned_model.predict(X_test_scaled).ravel()
y_pred_tuned = (y_prob_tuned >= 0.5).astype(int)

acc_tuned = accuracy_score(y_test, y_pred_tuned)
prec_tuned = precision_score(y_test, y_pred_tuned)
rec_tuned = recall_score(y_test, y_pred_tuned)
f1_tuned = f1_score(y_test, y_pred_tuned)

print("Tuned Metrics")
print("Accuracy :", acc_tuned)
print("Precision:", prec_tuned)
print("Recall   :", rec_tuned)
print("F1-score :", f1_tuned)

print("\nClassification Report:\n", classification_report(y_test, y_pred_tuned, target_names=le.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_tuned))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
Tuned Metrics
Accuracy : 0.8333333333333334
Precision: 0.9333333333333333
Recall   : 0.7
F1-score : 0.8

Classification Report:
               precision    recall  f1-score   support

           M       0.78      0.95      0.86        22
           R       0.93      0.70      0.80        20

    accuracy                           0.83        42
   macro avg       0.86      0.83      0.83        42
weighted avg       0.85      0.83      0.83        42

Confusion Matrix:
 [[21  1]
 [ 6 14]]


# 4) Comparison and short report

In [30]:
comparison = pd.DataFrame({
    "Model": ["Baseline ANN", "Tuned ANN"],
    "Accuracy": [acc_base, acc_tuned],
    "Precision": [prec_base, prec_tuned],
    "Recall": [rec_base, rec_tuned],
    "F1-score": [f1_base, f1_tuned]
})
comparison


Unnamed: 0,Model,Accuracy,Precision,Recall,F1-score
0,Baseline ANN,0.833333,0.933333,0.7,0.8
1,Tuned ANN,0.833333,0.933333,0.7,0.8
