# **1. Data Exploration and Preprocessing**

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('Alphabets_data.csv')

In [3]:
df.head()

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [4]:
# Summarize the key features of the dataset
print("Number of samples:", df.shape[0])
print("Number of features:", df.shape[1] - 1)  # excluding the target variable
print("Number of classes:", len(np.unique(df['letter'])))


Number of samples: 20000
Number of features: 16
Number of classes: 26


In [5]:
#checking missing values
df.isnull().sum()

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64

In [6]:
#Data Normalization
from sklearn.preprocessing import StandardScaler

In [7]:
# Split the dataset into features (X) and target (y)
X = df.drop('letter', axis=1)
y = df['letter']

In [8]:
# Normalize the features(X)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [9]:
X_scaled

array([[-1.0576983 ,  0.29187713, -1.05327668, ..., -0.21908163,
        -1.4381527 ,  0.12291107],
       [ 0.51038497,  1.5023577 , -1.05327668, ..., -0.21908163,
         0.12008142,  1.35944092],
       [-0.01230945,  1.19973756,  0.43590966, ..., -0.8656262 ,
        -0.26947711,  0.74117599],
       ...,
       [ 1.03307939,  0.59449727,  0.43590966, ...,  2.36709667,
        -0.65903564, -2.35014863],
       [-1.0576983 , -1.22122359, -0.55688123, ...,  0.42746295,
         0.50963994,  0.12291107],
       [-0.01230945,  0.59449727,  0.43590966, ..., -0.8656262 ,
        -0.65903564,  0.12291107]])

# **2. Model Implementation**

In [10]:
#Split the Dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.preprocessing import LabelEncoder

#encoding the target y
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [11]:
#Construct the ANN Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [12]:
#Define the model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))  # Hidden layer with 64 neurons
model.add(Dense(32, activation='relu'))  # Another hidden layer with 32 neurons
model.add(Dense(len(y.unique()), activation='softmax'))  # Output layer for multi-class classification
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
#compile the model
from keras.optimizers import Adam,RMSprop,SGD
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [14]:
#Train the Model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.2607 - loss: 2.6514 - val_accuracy: 0.6423 - val_loss: 1.2871
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.6618 - loss: 1.2162 - val_accuracy: 0.7017 - val_loss: 1.0271
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.7255 - loss: 0.9737 - val_accuracy: 0.7517 - val_loss: 0.8851
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.7526 - loss: 0.8803 - val_accuracy: 0.7577 - val_loss: 0.8403
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.7585 - loss: 0.8213 - val_accuracy: 0.7732 - val_loss: 0.7849
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.7772 - loss: 0.7616 - val_accuracy: 0.7803 - val_loss: 0.7543
Epoch 7/10
[1m500/500[0m 

# **3. Hyperparameter Tuning**

In [15]:
!pip install scikeras


Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0


In [16]:
from scikeras.wrappers import KerasClassifier 
from sklearn.model_selection import GridSearchCV


In [17]:
# Define the hyperparameter
param_grid = {
    'epochs': [10, 15], # Pass epochs directly
    'batch_size': [64, 128], # Pass batch_size directly
    'model__optimizer': ['adam', 'sgd', 'rmsprop'], # Keep model__ prefix for arguments specific to create_model function
    'model__activation': ['relu', 'tanh', 'sigmoid'] # Keep model__ prefix for arguments specific to create_model function
}

In [18]:
# Define the model architecture as a function
def create_model(optimizer='adam', activation='relu'):
    from tensorflow import keras # Import keras inside the function to avoid potential conflicts
    from keras.models import Sequential
    from keras.layers import Dense

    model = Sequential()
    model.add(Dense(64, activation=activation, input_shape=(X_scaled.shape[1],)))
    model.add(Dense(32, activation=activation))
    model.add(Dense(len(np.unique(y_num)), activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [19]:
# Perform hyperparameter tuning
keras_model = KerasClassifier(model=create_model)
grid_search = GridSearchCV(keras_model, param_grid, cv=3)
grid_search.fit(X_train, y_train, epochs=5, batch_size=128) 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: 
All the 108 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
108 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Dell\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Dell\anaconda3\lib\site-packages\scikeras\wrappers.py", line 1501, in fit
    super().fit(X=X, y=y, sample_weight=sample_weight, **kwargs)
  File "C:\Users\Dell\anaconda3\lib\site-packages\scikeras\wrappers.py", line 770, in fit
    self._fit(
  File "C:\Users\Dell\anaconda3\lib\site-packages\scikeras\wrappers.py", line 925, in _fit
    X, y = self._initialize(X, y)
  File "C:\Users\Dell\anaconda3\lib\site-packages\scikeras\wrappers.py", line 862, in _initialize
    self.model_ = self._build_keras_model()
  File "C:\Users\Dell\anaconda3\lib\site-packages\scikeras\wrappers.py", line 433, in _build_keras_model
    model = final_build_fn(**build_params)
  File "C:\Users\Dell\AppData\Local\Temp\ipykernel_14652\2173566413.py", line 10, in create_model
    model.add(Dense(len(np.unique(y_num)), activation='softmax'))
NameError: name 'y_num' is not defined


In [None]:
# Print the best hyperparameters and the corresponding score
print("Best hyperparameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

# **4.Evaluation**

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
y_pred = grid_search.best_estimator_.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1-score:", f1_score(y_test, y_pred, average='macro'))

In [None]:
# Compare the performance of the default model and the tuned model
print("\n")
default_model = create_model()
default_model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_test, y_test))
y_pred_default = default_model.predict(X_test)

In [None]:
# Convert probabilities to class labels using argmax
y_pred_default = np.argmax(y_pred_default, axis=1)  # Select the class with the highest probability

print("\nDefault model accuracy:", accuracy_score(y_test, y_pred_default))
print("Default model precision:", precision_score(y_test, y_pred_default, average='macro'))
print("Default model recall:", recall_score(y_test, y_pred_default, average='macro'))
print("Default model F1-score:", f1_score(y_test, y_pred_default, average='macro'))