## **Neural Networks:**

### **Data Exploration:**

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score


In [2]:
data = pd.read_csv('Alphabets_data.csv')

In [3]:
data.head()

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [4]:
data.tail()

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
19995,D,2,2,3,3,2,7,7,7,6,6,6,4,2,8,3,7
19996,C,7,10,8,8,4,4,8,6,9,12,9,13,2,9,3,7
19997,T,6,9,6,7,5,6,11,3,7,11,9,5,2,12,2,4
19998,S,2,3,4,2,1,8,7,2,6,10,6,8,1,9,5,8
19999,A,4,9,6,6,2,9,5,3,1,8,1,8,2,7,2,8


In [5]:
data.shape

(20000, 17)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB


In [7]:
data.describe()

Unnamed: 0,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
count,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0
mean,4.02355,7.0355,5.12185,5.37245,3.50585,6.8976,7.50045,4.6286,5.17865,8.28205,6.454,7.929,3.0461,8.33885,3.69175,7.8012
std,1.913212,3.304555,2.014573,2.26139,2.190458,2.026035,2.325354,2.699968,2.380823,2.488475,2.63107,2.080619,2.332541,1.546722,2.567073,1.61747
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.0,5.0,4.0,4.0,2.0,6.0,6.0,3.0,4.0,7.0,5.0,7.0,1.0,8.0,2.0,7.0
50%,4.0,7.0,5.0,6.0,3.0,7.0,7.0,4.0,5.0,8.0,6.0,8.0,3.0,8.0,3.0,8.0
75%,5.0,9.0,6.0,7.0,5.0,8.0,9.0,6.0,7.0,10.0,8.0,9.0,4.0,9.0,5.0,9.0
max,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0


In [8]:
#Check for Null Values
data.isnull().sum()

Unnamed: 0,0
letter,0
xbox,0
ybox,0
width,0
height,0
onpix,0
xbar,0
ybar,0
x2bar,0
y2bar,0


### **Data Preprocessing:**

In [9]:
# Encode the target label 'letter'
label_encoder = LabelEncoder()
data['letter'] = label_encoder.fit_transform(data['letter'])

In [10]:
# Normalize the feature values using MinMaxScaler
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.drop('letter', axis=1))

In [11]:
# Create a new DataFrame with the scaled features and the encoded target
df = pd.DataFrame(scaled_data, columns=data.columns[1:])
df['letter'] = data['letter']

### **Model Implementation:**

**Divide the dataset into training and test sets**

In [12]:
# Split the dataset into features and target
X = df.drop('letter', axis=1)
y = df['letter']

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2, stratify=y)

# Check the shapes of the splits
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


(16000, 16) (4000, 16) (16000,) (4000,)


In [13]:
# Determine the number of unique classes for the output layer
num_classes = len(y.unique())

# Convert the target variable to categorical (one-hot encoding)
y_train_categorical = to_categorical(y_train, num_classes)
y_test_categorical = to_categorical(y_test, num_classes)

# Construct the ANN model
model = Sequential()

# Input layer and first hidden layer with 32 neurons and ReLU activation
model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))

# Output layer with softmax activation (for multi-class classification)
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


**Train your model on the training set and then use it to make predictions on the test set**

In [14]:
# Train the model
history = model.fit(X_train, y_train_categorical, epochs=20, batch_size=30, validation_data=(X_test, y_test_categorical))


Epoch 1/20
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.1285 - loss: 3.1370 - val_accuracy: 0.3643 - val_loss: 2.5712
Epoch 2/20
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4323 - loss: 2.3987 - val_accuracy: 0.5255 - val_loss: 2.0200
Epoch 3/20
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5447 - loss: 1.9219 - val_accuracy: 0.5840 - val_loss: 1.7245
Epoch 4/20
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5949 - loss: 1.6584 - val_accuracy: 0.5928 - val_loss: 1.5634
Epoch 5/20
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6169 - loss: 1.5010 - val_accuracy: 0.6198 - val_loss: 1.4506
Epoch 6/20
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6283 - loss: 1.4047 - val_accuracy: 0.6507 - val_loss: 1.3677
Epoch 7/20
[1m534/534[0m 

### **Evaluation:**

In [15]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

# Predict the classes on the test set
y_pred = model.predict(X_test)
y_pred_classes = y_pred.argmax(axis=1)

# Classification report
print(classification_report(y_test, y_pred_classes))

# Overall accuracy
print(f'Overall Accuracy: {accuracy_score(y_test, y_pred_classes)}')


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7487 - loss: 1.0012
Test Loss: 0.9788363575935364
Test Accuracy: 0.7519999742507935
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
              precision    recall  f1-score   support

           0       0.87      0.86      0.87       158
           1       0.61      0.83      0.70       153
           2       0.81      0.78      0.79       147
           3       0.68      0.65      0.67       161
           4       0.63      0.82      0.71       154
           5       0.79      0.71      0.75       155
           6       0.67      0.59      0.63       155
           7       0.76      0.50      0.61       147
           8       0.92      0.81      0.86       151
           9       0.90      0.81      0.85       149
          10       0.66      0.71      0.69       148
          11       0.88      0.74      0.80       152
          12       0.88      0.87      0.88      

In [16]:
precision = precision_score(y_test, y_pred_classes, average='weighted')
recall = recall_score(y_test, y_pred_classes, average='weighted')
f1 = f1_score(y_test, y_pred_classes, average='weighted')
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.7596439319901205
Recall: 0.752
F1-score: 0.7519320901915445
