## ARTIFICIAL NEURAL NETWORKS

### 1. Data Exploration and Preprocessing

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder,StandardScaler

In [4]:
df=pd.read_csv('Alphabets_data.csv')

In [5]:
df

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,D,2,2,3,3,2,7,7,7,6,6,6,4,2,8,3,7
19996,C,7,10,8,8,4,4,8,6,9,12,9,13,2,9,3,7
19997,T,6,9,6,7,5,6,11,3,7,11,9,5,2,12,2,4
19998,S,2,3,4,2,1,8,7,2,6,10,6,8,1,9,5,8


In [6]:
df.shape

(20000, 17)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB


In [8]:
#Check for missing values

print(df.isnull().sum())

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64


In [9]:
df.describe()

Unnamed: 0,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
count,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0
mean,4.02355,7.0355,5.12185,5.37245,3.50585,6.8976,7.50045,4.6286,5.17865,8.28205,6.454,7.929,3.0461,8.33885,3.69175,7.8012
std,1.913212,3.304555,2.014573,2.26139,2.190458,2.026035,2.325354,2.699968,2.380823,2.488475,2.63107,2.080619,2.332541,1.546722,2.567073,1.61747
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.0,5.0,4.0,4.0,2.0,6.0,6.0,3.0,4.0,7.0,5.0,7.0,1.0,8.0,2.0,7.0
50%,4.0,7.0,5.0,6.0,3.0,7.0,7.0,4.0,5.0,8.0,6.0,8.0,3.0,8.0,3.0,8.0
75%,5.0,9.0,6.0,7.0,5.0,8.0,9.0,6.0,7.0,10.0,8.0,9.0,4.0,9.0,5.0,9.0
max,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0


In [10]:

# Encode categorical variables if necessary
label_encoder = LabelEncoder()
df['letter'] = label_encoder.fit_transform(df['letter'])

In [11]:
# Separate features and target variable
x = df.drop(columns=['letter'])   #feature
y = df['letter']                  #target

In [12]:
scaler = StandardScaler()
scaler

In [13]:
X_scaled = scaler.fit_transform(x)

In [14]:
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense

In [15]:
feature_scaled = scaler.fit_transform(x)

In [16]:
x_train, x_test, y_train, y_test = train_test_split(feature_scaled,y, test_size=0.2, random_state=42)

In [17]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(16000, 16)
(4000, 16)
(16000,)
(4000,)


### 2. Model Implementation

In [19]:
model = Sequential()
model

<Sequential name=sequential, built=False>

In [20]:
model.add(Dense(64,activation='relu',input_shape=(x_train.shape[1],)))    #Input Layer

model.add(Dense(32, activation='relu'))  # Hidden layer

model.add(Dense(len(y.unique()), activation='softmax'))  # Output layer for multi-class classification

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])     # Compile the model

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [21]:
#Train the model

history = model.fit(x_train, y_train, epochs=50, validation_split=0.2, batch_size=32)

Epoch 1/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.2627 - loss: 2.6696 - val_accuracy: 0.6653 - val_loss: 1.2085
Epoch 2/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7007 - loss: 1.0531 - val_accuracy: 0.7472 - val_loss: 0.8842
Epoch 3/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7686 - loss: 0.8008 - val_accuracy: 0.7928 - val_loss: 0.7326
Epoch 4/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8074 - loss: 0.6685 - val_accuracy: 0.8059 - val_loss: 0.6574
Epoch 5/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8205 - loss: 0.6108 - val_accuracy: 0.8272 - val_loss: 0.5926
Epoch 6/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8403 - loss: 0.5444 - val_accuracy: 0.8434 - val_loss: 0.5368
Epoch 7/50
[1m400/400[0m 

### 3. Hyperparameter Tuning

In [23]:
pip install tensorflow scikit-learn pandas




In [24]:
pip install scikeras

Note: you may need to restart the kernel to use updated packages.


In [25]:
import tensorflow as tf
print(tf.__version__)

2.18.0


In [26]:
pip install keras-tuner

Note: you may need to restart the kernel to use updated packages.


In [27]:
from keras_tuner import RandomSearch
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [53]:
pip install --upgrade tensorflow

Note: you may need to restart the kernel to use updated packages.


In [82]:
from kerastuner import RandomSearch

def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units1', min_value=32, max_value=128, step=16), activation='relu', input_shape=(x_train.shape[1],)))
    model.add(Dense(units=hp.Int('units2', min_value=16, max_value=64, step=16), activation='relu'))
    model.add(Dense(len(label_encoder.classes_), activation='softmax'))
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [86]:
tuner = RandomSearch(build_model, objective='val_accuracy', max_trials=5, executions_per_trial=3)

tuner.search(x_train, y_train, epochs=50, validation_split=0.2)

Trial 5 Complete [00h 02m 00s]
val_accuracy: 0.8458333214124044

Best val_accuracy So Far: 0.9337499936421713
Total elapsed time: 00h 10m 24s


In [88]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best units1: {best_hps.get('units1')}, Best units2: {best_hps.get('units2')}, Best learning_rate: {best_hps.get('learning_rate')}")

Best units1: 48, Best units2: 32, Best learning_rate: 0.001


In [90]:
# Build the best model with optimal hyperparameters
best_model = tuner.hypermodel.build(best_hps)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [94]:
# Train the model
best_model.fit(x_train, y_train, epochs=50, validation_split=0.2, batch_size=32)

Epoch 1/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2395 - loss: 2.7311 - val_accuracy: 0.6506 - val_loss: 1.2778
Epoch 2/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.6862 - loss: 1.1187 - val_accuracy: 0.7266 - val_loss: 0.9393
Epoch 3/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7499 - loss: 0.8451 - val_accuracy: 0.7719 - val_loss: 0.8012
Epoch 4/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7920 - loss: 0.7261 - val_accuracy: 0.7969 - val_loss: 0.7117
Epoch 5/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8070 - loss: 0.6546 - val_accuracy: 0.8094 - val_loss: 0.6484
Epoch 6/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8304 - loss: 0.5853 - val_accuracy: 0.8284 - val_loss: 0.5972
Epoch 7/50
[1m400/400[0m 

<keras.src.callbacks.history.History at 0x29d344c45f0>

In [97]:
# Evaluate the model
test_loss, test_accuracy = best_model.evaluate(x_test, y_test)
print(f'Test Accuracy: {test_accuracy}')

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 934us/step - accuracy: 0.9346 - loss: 0.2091
Test Accuracy: 0.9380000233650208
