# ARTIFICIAL NEURAL NETWORKS

In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np

In [26]:
# Load the dataset
data = pd.read_csv('Alphabets_data.csv')

In [45]:
# Explore the dataset
print(data.head())

  letter  xbox  ybox  width  height  onpix  xbar  ybar  x2bar  y2bar  xybar  \
0      T     2     8      3       5      1     8    13      0      6      6   
1      I     5    12      3       7      2    10     5      5      4     13   
2      D     4    11      6       8      6    10     6      2      6     10   
3      N     7    11      6       6      3     5     9      4      6      4   
4      G     2     1      3       1      1     8     6      6      6      6   

   x2ybar  xy2bar  xedge  xedgey  yedge  yedgex  
0      10       8      0       8      0       8  
1       3       9      2       8      4      10  
2       3       7      3       7      3       9  
3       4      10      6      10      2       8  
4       5       9      1       7      5      10  


In [47]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB
None


In [49]:
print(data.describe())

               xbox          ybox         width       height         onpix  \
count  20000.000000  20000.000000  20000.000000  20000.00000  20000.000000   
mean       4.023550      7.035500      5.121850      5.37245      3.505850   
std        1.913212      3.304555      2.014573      2.26139      2.190458   
min        0.000000      0.000000      0.000000      0.00000      0.000000   
25%        3.000000      5.000000      4.000000      4.00000      2.000000   
50%        4.000000      7.000000      5.000000      6.00000      3.000000   
75%        5.000000      9.000000      6.000000      7.00000      5.000000   
max       15.000000     15.000000     15.000000     15.00000     15.000000   

               xbar          ybar         x2bar         y2bar         xybar  \
count  20000.000000  20000.000000  20000.000000  20000.000000  20000.000000   
mean       6.897600      7.500450      4.628600      5.178650      8.282050   
std        2.026035      2.325354      2.699968      2.38082

In [30]:
# Check for missing values
print(data.isnull().sum())

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64


In [32]:
# Normalize the features
scaler = StandardScaler()
X = data.drop('letter', axis=1)  # Assuming 'label' is the target column
y = data['letter']

In [34]:
# Convert string labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_scaled = scaler.fit_transform(X)

In [36]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Model Implementation and Training

In [38]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [40]:
# Function to create the ANN model
def create_model(hidden_layers, neurons, activation='relu'):
    model = Sequential()
    model.add(Dense(neurons, activation=activation, input_shape=(X_train.shape[1],)))
    
    for _ in range(hidden_layers - 1):
        model.add(Dense(neurons, activation=activation))
    
    model.add(Dense(len(np.unique(y_encoded)), activation='softmax'))  # Output layer for multi-class classification
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [8]:
# Hyperparameter tuning
results = []

# Define hyperparameter options
hidden_layers_options = [1, 2]
neurons_options = [32, 64, 128]
activation_options = ['relu', 'tanh']

for hidden_layers in hidden_layers_options:
    for neurons in neurons_options:
        for activation in activation_options:
            print(f'Training model with {hidden_layers} hidden layers, {neurons} neurons, activation: {activation}')
            model = create_model(hidden_layers, neurons, activation)
            model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)
            loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
            results.append((hidden_layers, neurons, activation, accuracy))

Training model with 1 hidden layers, 32 neurons, activation: relu


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training model with 1 hidden layers, 32 neurons, activation: tanh
Training model with 1 hidden layers, 64 neurons, activation: relu
Training model with 1 hidden layers, 64 neurons, activation: tanh
Training model with 1 hidden layers, 128 neurons, activation: relu
Training model with 1 hidden layers, 128 neurons, activation: tanh
Training model with 2 hidden layers, 32 neurons, activation: relu
Training model with 2 hidden layers, 32 neurons, activation: tanh
Training model with 2 hidden layers, 64 neurons, activation: relu
Training model with 2 hidden layers, 64 neurons, activation: tanh
Training model with 2 hidden layers, 128 neurons, activation: relu
Training model with 2 hidden layers, 128 neurons, activation: tanh


In [11]:
# Find the best model
best_model = max(results, key=lambda x: x[3])
print(f'Best model: {best_model}')

Best model: (2, 128, 'tanh', 0.9707499742507935)


# Evaluation

In [12]:
from sklearn.metrics import classification_report

In [14]:
# Create the best model
best_hidden_layers, best_neurons, best_activation, _ = best_model
best_model_instance = create_model(best_hidden_layers, best_neurons, best_activation)
best_model_instance.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<keras.src.callbacks.history.History at 0x265aca3fd70>

In [15]:
# Make predictions
y_pred = np.argmax(best_model_instance.predict(X_test), axis=-1)

# Evaluate the model
print(classification_report(y_test, y_pred))

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       149
           1       0.91      0.95      0.93       153
           2       0.97      0.93      0.95       137
           3       0.95      0.95      0.95       156
           4       0.95      0.98      0.97       141
           5       0.97      0.94      0.96       140
           6       0.93      0.96      0.94       160
           7       0.94      0.90      0.92       144
           8       0.96      0.95      0.96       146
           9       0.96      0.95      0.96       149
          10       0.95      0.95      0.95       130
          11       0.96      0.97      0.96       155
          12       0.99      0.99      0.99       168
          13       0.97      0.97      0.97       151
          14       0.93      0.97      0.95       145
          15       0.98      0.97      0.97       173
      