# Importing libraries

The recgonition model will be based on the famous CNN, VGG16 which is a 16 layer conv neural network trained on the imagenet dataset

In [1]:
import tensorflow as tf
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.layers import Flatten, Dense

2024-10-31 15:00:08.597540: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-31 15:00:08.650886: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-31 15:00:09.055381: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-31 15:00:09.057141: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
model = VGG16(include_top=False,
                        input_shape=(380,380,3),
                        pooling="avg",
                        classes=13,
                        weights="imagenet")

for layer in model.layers[:-1]:
        layer.trainable = False

In [3]:
# Classification block, we need to add the classification block separately which will be using softmax in our case for 13 different outputs
vgg_model = Flatten()(model.output)
vgg_model = Dense(4096, activation="relu", name="fc1")(vgg_model)
vgg_model = Dense(4096, activation="relu", name="fc2")(vgg_model)
vgg_model = Dense(13, activation="softmax", name="predictions")(vgg_model)

model = Model(model.input, vgg_model, name='VGG_Model')

model.summary()

Model: "VGG_Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 380, 380, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 380, 380, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 380, 380, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 190, 190, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 190, 190, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 190, 190, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 95, 95, 128)       0 

In [4]:
model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])

# EDA
Now let's dive into the available dataset downlaoded from Kaggle with over 2000 training images for 13 different Naruto hand signs

In [5]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

hand_signs = os.listdir("data/train")
print(hand_signs)

print("Number of Hand Signs found: ", len(hand_signs))

['boar', 'rat', 'horse', 'dragon', 'ram', 'dog', 'hare', 'snake', 'ox', 'tiger', 'bird', 'monkey', 'zero']
Number of Hand Signs found:  13


In [6]:
training = []

for sign in hand_signs:
    for image in os.listdir("data/train/" + sign):
        training.append((sign, image))

In [7]:
signs_df = pd.DataFrame(data=training, columns=["Sign", "Image"])
signs_df.sample(5)

Unnamed: 0,Sign,Image
1663,bird,IMG_e8a75177-547b-11ea-b0af-48f17fc25591.png
77,boar,IMG_e880312f-547b-11ea-a08c-48f17fc25591.png
911,dog,IMG_e85239f0-547b-11ea-b8db-48f17fc25591.png
176,rat,IMG_e897cbae-547b-11ea-821a-48f17fc25591.png
1042,hare,IMG_e8849aef-547b-11ea-899b-48f17fc25591.png


In [8]:
print("Total number of hand signs: ", len(signs_df))

signs_count = signs_df["Sign"].value_counts()

print(signs_count)

Total number of hand signs:  2159
Sign
dog       263
zero      199
bird      188
hare      174
boar      172
ox        169
tiger     167
horse     157
dragon    146
snake     146
monkey    136
rat       125
ram       117
Name: count, dtype: int64


In [9]:
import cv2 as cv
import os
from tqdm import tqdm

images = []
labels = []

for sign in hand_signs:
    path = "data/train/" + sign
    
    for image in tqdm(os.listdir(path), desc=f"Loading images for {sign}"):
        img = cv.imread(path + "/" + image)
        img = cv.resize(img, (380, 380))

        if img is not None:
            images.append(img)
            labels.append(sign)

Loading images for boar: 100%|██████████| 172/172 [00:03<00:00, 56.54it/s]
Loading images for rat: 100%|██████████| 125/125 [00:02<00:00, 58.74it/s]
Loading images for horse: 100%|██████████| 157/157 [00:02<00:00, 59.42it/s]
Loading images for dragon: 100%|██████████| 146/146 [00:02<00:00, 62.77it/s]
Loading images for ram: 100%|██████████| 117/117 [00:01<00:00, 62.29it/s]
Loading images for dog: 100%|██████████| 263/263 [00:04<00:00, 60.47it/s]
Loading images for hare: 100%|██████████| 174/174 [00:02<00:00, 62.81it/s]
Loading images for snake: 100%|██████████| 146/146 [00:02<00:00, 64.70it/s]
Loading images for ox: 100%|██████████| 169/169 [00:02<00:00, 59.99it/s]
Loading images for tiger: 100%|██████████| 167/167 [00:02<00:00, 62.02it/s]
Loading images for bird: 100%|██████████| 188/188 [00:03<00:00, 58.49it/s]
Loading images for monkey: 100%|██████████| 136/136 [00:02<00:00, 62.91it/s]
Loading images for zero: 100%|██████████| 199/199 [00:03<00:00, 58.76it/s]


In [10]:
images = np.array(images)

images = images.astype("float32") / 255.0
images.shape

(2159, 380, 380, 3)

In [11]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

y = signs_df["Sign"].values

y_labelencoder = LabelEncoder()
y = y_labelencoder.fit_transform(y)

In [12]:
y = y.reshape(-1, 1)
one_hot_encoder = OneHotEncoder()

Y = one_hot_encoder.fit_transform(y)
Y.shape

(2159, 13)

In [13]:
Y = tf.convert_to_tensor(Y.todense(), dtype=tf.float32)

In [14]:
print(type(Y))
print(type(images))

<class 'tensorflow.python.framework.ops.EagerTensor'>
<class 'numpy.ndarray'>


In [16]:
adam = tf.keras.optimizers.Adam(learning_rate=0.001)
early_stopper = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0, patience=5, verbose=1,
                                            mode='auto', baseline=None, restore_best_weights=True)

model.compile(loss='categorical_crossentropy',
                  optimizer=adam, metrics=['accuracy'])

history = model.fit(
      images,
      Y,
      steps_per_epoch= 2159 // 16,
      shuffle=True,
      epochs=10,
      callbacks=[early_stopper],
      use_multiprocessing=False,
  )

: 

In [1]:
model.save("./models/model")

NameError: name 'model' is not defined