In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Load the dataset directly by its name
data = pd.read_csv('Alphabets_data.csv')

# Show the first few rows to make sure it's loaded
print("Dataset loaded successfully!")
print(data.head())


Dataset loaded successfully!
  letter  xbox  ybox  width  height  onpix  xbar  ybar  x2bar  y2bar  xybar  \
0      T     2     8      3       5      1     8    13      0      6      6   
1      I     5    12      3       7      2    10     5      5      4     13   
2      D     4    11      6       8      6    10     6      2      6     10   
3      N     7    11      6       6      3     5     9      4      6      4   
4      G     2     1      3       1      1     8     6      6      6      6   

   x2ybar  xy2bar  xedge  xedgey  yedge  yedgex  
0      10       8      0       8      0       8  
1       3       9      2       8      4      10  
2       3       7      3       7      3       9  
3       4      10      6      10      2       8  
4       5       9      1       7      5      10  


In [2]:
# --- 1. Data Exploration ---
print("Dataset Information:")
data.info()

print("\nStatistical Summary:")
print(data.describe())

# Summarize key features
num_samples = data.shape[0]
num_features = data.shape[1] - 1  # Subtracting the target column 'letter'
num_classes = data['letter'].nunique()

print(f"\nNumber of Samples: {num_samples}")
print(f"Number of Features: {num_features}")
print(f"Number of Classes: {num_classes}")

# --- 2. Data Preprocessing ---
# Check for missing values
print("\nMissing Values Check:")
print(data.isnull().sum())
# No missing values found, so no imputation is needed.

# Separate features (X) and target (y)
X = data.drop('letter', axis=1)
y = data['letter']

# Normalize the feature data to a range of 0-1
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Encode the categorical target labels into integers
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Convert integer labels to one-hot encoded vectors
y_one_hot = to_categorical(y_encoded)

print("\nShape of scaled features (X):", X_scaled.shape)
print("Shape of one-hot encoded target (y):", y_one_hot.shape)


Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB

Statistical Summary:
               xbox          ybox         width       height         onpix  \
count  20000.000000  20000.000000  

In [3]:
# Split the dataset into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_one_hot, test_size=0.2, random_state=42, stratify=y_one_hot
)

print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)

# Define the basic ANN model
basic_model = Sequential([
    # Input layer and one hidden layer with 128 neurons and ReLU activation
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    # Output layer with 26 neurons (for 26 alphabets) and softmax activation
    Dense(num_classes, activation='softmax')
])

# Compile the model
basic_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Print the model summary
print("Basic ANN Model Summary:")
basic_model.summary()

# Train the model
print("\nTraining the basic model...")
history_basic = basic_model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=64,
    validation_split=0.1,
    verbose=1
)

# Evaluate the model on the test set
print("\nEvaluating the basic model...")
loss, accuracy = basic_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy (Basic Model): {accuracy:.4f}")

# Make predictions and generate a classification report
y_pred_basic_prob = basic_model.predict(X_test)
y_pred_basic = np.argmax(y_pred_basic_prob, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

print("\nClassification Report (Basic Model):")
print(classification_report(y_test_labels, y_pred_basic, target_names=encoder.classes_, zero_division=0))


Training data shape: (16000, 16)
Test data shape: (4000, 16)
Basic ANN Model Summary:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Training the basic model...
Epoch 1/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.1498 - loss: 3.1469 - val_accuracy: 0.4781 - val_loss: 2.5801
Epoch 2/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4795 - loss: 2.4210 - val_accuracy: 0.5481 - val_loss: 1.9616
Epoch 3/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5519 - loss: 1.9067 - val_accuracy: 0.5813 - val_loss: 1.6384
Epoch 4/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6145 - loss: 1.6045 - val_accuracy: 0.6400 - val_loss: 1.4513
Epoch 5/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6434 - loss: 1.4365 - val_accuracy: 0.6494 - val_loss: 1.3315
Epoch 6/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6622 - loss: 1.3228 - val_accuracy: 0.6656 - val_loss: 1.2529

In [4]:
!pip install -q keras-tuner

import keras_tuner as kt

def build_model(hp):
    model = Sequential()

    # Tune the number of units in the first dense layer
    hp_units = hp.Int('units_1', min_value=32, max_value=512, step=32)
    model.add(Dense(units=hp_units, activation='relu', input_shape=(X_train.shape[1],)))

    # Tune the number of hidden layers and their units
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Dense(
            units=hp.Int(f'units_{i+2}', min_value=32, max_value=256, step=32),
            activation=hp.Choice('activation', ['relu', 'tanh'])
        ))

    # Add a dropout layer to prevent overfitting
    model.add(Dropout(rate=hp.Float('dropout', 0.1, 0.5, step=0.1)))

    model.add(Dense(num_classes, activation='softmax'))

    # Tune the learning rate for the optimizer
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Instantiate the tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,  # Number of hyperparameter combinations to try
    executions_per_trial=2, # Number of models to train for each combination
    directory='tuner_dir',
    project_name='alphabet_classification'
)

print("KerasTuner setup complete.")
tuner.search_space_summary()


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hKerasTuner setup complete.
Search space summary
Default search space size: 6
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
units_2 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 256, 'step': 32, 'sampling': 'linear'}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
dropout (Float)
{'default': 0.1, 'conditions': [], 'min_value': 0.1, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered':

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
# Start the hyperparameter search
print("\nStarting hyperparameter search...")
tuner.search(X_train, y_train, epochs=20, validation_split=0.2)

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
Best Hyperparameters Found:
- Units in first layer: {best_hps.get('units_1')}
- Number of hidden layers: {best_hps.get('num_layers')}
- Activation function: {best_hps.get('activation')}
- Dropout rate: {best_hps.get('dropout')}
- Learning rate: {best_hps.get('learning_rate')}
""")


Trial 10 Complete [00h 01m 03s]
val_accuracy: 0.8876562416553497

Best val_accuracy So Far: 0.8876562416553497
Total elapsed time: 00h 12m 34s

Best Hyperparameters Found:
- Units in first layer: 480
- Number of hidden layers: 2
- Activation function: tanh
- Dropout rate: 0.30000000000000004
- Learning rate: 0.01



In [7]:
# Build the model with the best hyperparameters and train it
tuned_model = tuner.get_best_models(num_models=1)[0]
history_tuned = tuned_model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=64,
    validation_split=0.1,
    verbose=1
)

# Evaluate the tuned model
print("\nEvaluating the tuned model...")
loss_tuned, accuracy_tuned = tuned_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy (Tuned Model): {accuracy_tuned:.4f}")

# Make predictions and generate a classification report
y_pred_tuned_prob = tuned_model.predict(X_test)
y_pred_tuned = np.argmax(y_pred_tuned_prob, axis=1)

print("\nClassification Report (Tuned Model):")
print(classification_report(y_test_labels, y_pred_tuned, target_names=encoder.classes_, zero_division=0))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


Epoch 1/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8720 - loss: 0.4083 - val_accuracy: 0.8913 - val_loss: 0.3571
Epoch 2/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8796 - loss: 0.3948 - val_accuracy: 0.9125 - val_loss: 0.3077
Epoch 3/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8880 - loss: 0.3704 - val_accuracy: 0.8994 - val_loss: 0.3241
Epoch 4/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8907 - loss: 0.3516 - val_accuracy: 0.8994 - val_loss: 0.3077
Epoch 5/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8909 - loss: 0.3458 - val_accuracy: 0.9137 - val_loss: 0.2895
Epoch 6/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8887 - loss: 0.3804 - val_accuracy: 0.9075 - val_loss: 0.2880
Epoch 7/50
[1m225/225[0m 

**Discuss the performance differences between the model with default hyperparameters and the tuned model, emphasizing the effects of hyperparameter tuning.**

The transition from the basic model to the tuned model demonstrates the critical importance of hyperparameter tuning in machine learning. The default model serves as a solid starting point, but its "one-size-fits-all" architecture is rarely optimal. Through hyperparameter tuning, we systematically explore different configurations—adjusting the network's depth and width, the learning rate, and regularization techniques like dropout. This process allows the model to better adapt to the specific patterns in the dataset. As a result, the tuned model almost always shows superior performance, with higher accuracy, precision, and recall, because it has found an architecture that more effectively balances the trade-off between learning from the data (low bias) and generalizing to new, unseen examples (low variance)