# SET the Image Resolution

In [1]:
#from helper import IMAGE_RES
IMAGE_RES = 226 #75

In [2]:
import cv2
import time
import json
import keras
import tensorflow
import numpy as np
from PIL import Image
import mediapipe as mp
import matplotlib.pyplot as plt
from keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import (
    Conv2D,
    MaxPooling2D,
    Flatten,
    Dense,
    Dropout,
    BatchNormalization,
)

In [3]:
file_name = "lookup_train.json"

# Load the JSON data from the file into a dictionary
with open(file_name, "r") as json_file:
    lookup = json.load(json_file)

lookup = {str(value): key.upper() for key, value in lookup.items()}
print(lookup)

{'0': 'A', '1': 'B', '2': 'C', '3': 'D', '4': 'E', '5': 'F', '6': 'G', '7': 'H', '8': 'I', '9': 'J', '10': 'K', '11': 'L', '12': 'M', '13': 'N', '14': 'O', '15': 'P', '16': 'Q', '17': 'R', '18': 'S', '19': 'T', '20': 'U', '21': 'V', '22': 'W', '23': 'X', '24': 'Y', '25': 'Z', '26': 'DEL', '27': 'NOTHING', '28': 'SPACE'}


In [4]:
from keras.layers import GlobalAveragePooling2D
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.applications import InceptionV3
from keras.layers import Dense,Flatten,Conv2D,MaxPool2D,Dropout
num_classes = len(lookup.keys())


#model = keras.Sequential()

base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(IMAGE_RES, IMAGE_RES, 3))
#base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMAGE_RES, IMAGE_RES, 3))

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global Average Pooling instead of Flatten
x = Dense(1024, activation='relu', kernel_regularizer=l2(0.01))(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu', kernel_regularizer=l2(0.01))(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu', kernel_regularizer=l2(0.01))(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax')(x)  # Assuming there are 29 classes for ASL recognition

model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


model.summary()

# Load the saved model weights into the new model
#model.load_weights("model_weights_online_2.h5")
#model.load_weights("best_weights_offline.h5")
model.load_weights("best_weights_online_final.h5")

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 226, 226, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 112, 112, 32)         864       ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 112, 112, 32)         96        ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 activation (Activation)     (None, 112, 112, 32)         0         ['batch_normalization[0][0

In [5]:
camera = cv2.VideoCapture(0)
def capture_image():
    return_value,frame = camera.read()

    return frame


In [6]:
mphands = mp.solutions.hands
hands = mphands.Hands(max_num_hands=1)
mp_drawing = mp.solutions.drawing_utils
cap = cv2.VideoCapture(0)

cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)


_, frame = cap.read()

h, w, c = frame.shape

# print(h,w)

while True:
    _, frame = cap.read()
    frame = cv2.flip(frame, 1)
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(framergb)
    hand_landmarks = result.multi_hand_landmarks
    if hand_landmarks:
        for handLMs in hand_landmarks:
            x_max = 0
            y_max = 0
            x_min = w
            y_min = h
            for lm in handLMs.landmark:
                x, y = int(lm.x * w), int(lm.y * h)
                if x > x_max:
                    x_max = x
                if x < x_min:
                    x_min = x
                if y > y_max:
                    y_max = y
                if y < y_min:
                    y_min = y
            cv2.rectangle(
                frame,
                (x_min - 50, y_min - 50),
                (x_max + 50, y_max + 50),
                (0, 255, 0),
                2,
            )
            label = cv2.rectangle(
                frame,
                (x_min - 50, y_min - 50),
                (x_max + 50, y_max + 50),
                (0, 255, 0),
                2,
            )
            # cv2.rectangle(frame, (70, 70), (w-70,h-70), (0, 0,255), 2)

            start = time.time()

            img_crop = frame[y_min - 50 : y_max + 50, x_min - 50 : x_max + 50]
            try:
                img_crop = Image.fromarray(np.uint8(img_crop))
                img_crop = img_crop.resize((IMAGE_RES, IMAGE_RES))
                img_crop = np.array(img_crop)
                img_crop = np.fliplr(img_crop) 
                img_crop = np.array(
                    img_crop[:, :, ::-1], dtype="float32"
                )  # convert bgr to rgb
                img_crop = img_crop / 255
                # plt.imshow(img_crop)
                # plt.show()
                img_crop = img_crop.reshape((1, IMAGE_RES, IMAGE_RES, 3))

                # test_datagen = ImageDataGenerator()
                # test_generator = test_datagen.flow(img_crop, shuffle=False)

                predict_test = model.predict(img_crop)
                print("Raw Predictions:", predict_test)

                predicted_label = np.argmax(predict_test)
                print(predicted_label)

                end = time.time()
                """ cv2.putText(
                    label,
                    str(directory_reverse_lookup[str(predicted_label)]),
                    (x - 60, y - 60),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.9,
                    (36, 255, 12),
                    2,
                ) """
                cv2.putText(
                    label,
                    # f'Label: {str(directory_reverse_lookup[str(predicted_label)])}',
                    f"Label: {lookup[str(predicted_label)]}",
                    (20, 60),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.9,
                    (36, 255, 12),
                    2,
                )
                # label= cv2.flip(label,1)
                # print(predicted_label)

                fps = 1 / (end - start)

                # string formatting for fps
                cv2.putText(
                    frame,
                    f"FPS: {fps:.2f}",
                    (20, 30),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.9,
                    (36, 255, 12),
                    2,
                )
            except:
                print("Outside")
    cv2.imshow("Alphabet", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()
cap.release()

Raw Predictions: [[8.3942575e-10 6.9472312e-09 1.7000888e-04 4.4628787e-06 2.4804638e-09
  2.8592813e-06 6.5373951e-09 4.8528546e-09 1.6988680e-10 1.5645401e-09
  9.6375602e-11 1.2879556e-08 1.9133724e-08 1.8483806e-08 9.9698609e-01
  2.8300434e-03 4.9216455e-06 2.7718097e-10 6.7528951e-09 6.1867937e-08
  1.7056376e-10 3.3995073e-10 1.5304795e-09 2.0820932e-09 1.2313757e-09
  2.1370477e-07 7.8941821e-07 3.1437594e-07 2.4962171e-08]]
14
Raw Predictions: [[1.1891797e-06 9.1677015e-05 8.4636474e-01 1.1514132e-03 4.3883138e-06
  1.2264570e-04 1.2836908e-05 1.0030074e-05 3.0934635e-07 2.0046070e-06
  3.0849725e-07 3.1337542e-05 2.9745502e-06 3.7774505e-06 1.3055947e-01
  4.3977107e-04 1.7056454e-02 5.1697011e-06 1.2173675e-06 2.3553812e-05
  2.3042257e-06 4.7305414e-07 8.6947335e-07 1.9483928e-06 4.7654888e-07
  1.6559434e-06 4.0669986e-03 2.5768156e-06 3.7475234e-05]]
2
Raw Predictions: [[2.1041575e-04 5.8119569e-04 4.9209434e-01 8.7619439e-05 3.5702379e-04
  3.6025452e-04 1.3058565e-03 8.