**DEPENDENCIES AND NEEDED MODULES **

In [19]:
!pip install mediapipe opencv-python



In [20]:
import mediapipe as mp
import cv2
import numpy as np
import uuid
import os
import pickle
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [6]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [7]:
os.mkdir('Output Images')

**SIMPLE HAND DETECTION**

In [14]:
#complete hand detection by cv2.... if we place our hand infront of our webcam ,, we can see the lines which specifies the hand we are showing by mediapipe
from google.colab.patches import cv2_imshow
cap = cv2.VideoCapture(0)
# check whether the webcam is opened or not
if not cap.isOpened():
  raise Exception("Cannot open webcam")
# hand processing method
with mp_hands.Hands(min_detection_confidence=0.8 , min_tracking_confidence=0.5) as hands:
  # process the cap
  while cap.isOpened():
    ret , frame = cap.read()
    # check frame is read correctly
    if not ret:
          print("Can't receive frame (Stream end). Exiting......")
          break
    # BGR 2 RGB
    img = cv2.cvtColor(frame , cv2.COLOR_BGR2RGB)
    # flip the image on horizontal
    img = cv2.flip(img,1)
    # set the flag as false
    img.flags.writeable = False
    # detect the hand in the web cam
    results = hands.process(img)
    # set the flag as true
    img.flags.writeable = True
    # RGB to BGR
    img = cv2.cvtColor(img ,cv2.COLOR_RGB2BGR)
    # print the results
    print(results)
    # render the whole result of the detected hand
    if results.multi_hand_landmarks:
      for num , hand in enumerate(results.multi_hand_landmarks):
        mp_drawing.draw_landmarks(img , hand , mp_hands.HAND_CONNECTIONS,mp_drawing.DrawingSpec(color = (121,22,76), thickness=2 , circle_radius=4),mp_drawing.DrawingSpec(color = (250 , 44 , 250) , thickness = 2 , circle_radius = 2), )
    cv2.imwrite(os.path.join('Output Images' , '{}.jpg'.format(uuid.uuid1())) , img)
    cv2_imshow('Hand Tracking' , img)
    if cv2.waitKey(10) & 0xFF == ord('q'):
      break
cap.release()
cv2.destroyAllWindows()

Exception: Cannot open webcam

 **COLLECT THE IMAGES FROM REAL TIME WEBCAM FROM OUR COMPUTER**

In [13]:
from google.colab.patches import cv2_imshow

DATA_DIR = './E:/Sign_Language_Detection/Data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)
# collect images of 36 different classes with 1000 jpg files of each ... so our ultimate dataset will have 3600 jpg imgaes as the training data
number_of_classes = 36
dataset_size = 100
# now capture the real time images
cap = cv2.VideoCapture(2)
# check if the webcam is opened
if not cap.isOpened():
  raise Exception("Cannot open webcam")

for j in range(number_of_classes):
    if not os.path.exists(os.path.join(DATA_DIR, str(j))):
        os.makedirs(os.path.join(DATA_DIR, str(j)))
# data collection is going on .............
    print('Collect the data for class {}'.format(j))

    done = False
    while True:
        ret, frame = cap.read()
        # check frame is read correctly
        if not ret:
          print("Can't receive frame (Stream end). Exiting......")
          break
        cv2.putText(frame, 'Ready? Press "Q" ! :)', (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                    cv2.LINE_AA)
        cv2_imshow(frame)
        if cv2.waitKey(25) == ord('q'):
            break
# after every class of different data is collected counter will increase by 1 and will follow the loops and capture images and make 1000 jpgs of every class
    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        # check frame is read correctly
        if not ret:
          print("Can't receive frame (Stream end). Exiting......")
          break
        cv2_imshow(frame)
        cv2.waitKey(25)
        cv2.imwrite(os.path.join(DATA_DIR, str(j), '{}.jpg'.format(counter)), frame)

        counter += 1
# data collection done !!!!!!!!!!!!!!! yayyyyyy!!!!
cap.release()
cv2.destroyAllWindows()

Exception: Cannot open webcam

In [15]:
# MAKING A DATASET OF OUR OWN
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './E:/Sign_Language_Detection/Data'

data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        # MAKING THE LANDMARKS ON OUR BEFORE CREATED JPG FILES [AN ALTERNATIVE IDEA OF USING LABLEIMG]
        # Because Python has Mediapipe for all this mannual works ................ hehe
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)

f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

**Train and tune the model using Random Forest Classifier**

In [17]:
# Basic model formation by using the random forest classifier for the finite node containing graphs which is created on the hands we are showing in cv2
data_dict = pickle.load(open('./data.pickle', 'rb'))
# data and labels are being processed in an np array
data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])
# splitting our training and testing set by using train_test_split on all the data we have collected real timely
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)
# train set = train and tune our classifier using cross validation
# test set = Don't ever touch it untill the end or I will kill you
# shuffle the datas thus we can't figure out which is train set and which is test set
# 3600*0.2 = 720 datas are in test set and remaining 2880 datas are in train set to validate and fine tune our classifier model
# number of classes are = 36 ..... This is gonna be very interesting if it can guess rightly or not .... let's see

# Random Forest Classifier is being used for this type of classification because of traversing and classifing finite node containe graphs
model = RandomForestClassifier()

#very fast and robust .... that's why I have chosen this simple ML algorithm in this case

model.fit(x_train, y_train)

# prediction of the model
y_predict = model.predict(x_test)

score = accuracy_score(y_predict, y_test)

print('{}% of samples were classified correctly !'.format(score * 100))

f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

**Testing the classifier model using real time sign language conversation**

In [18]:
# Access the web cam and test the classifier model[model.p] using real time images and sign language detection
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

cap = cv2.VideoCapture(2)
# check if the webcam is opened
if not cap.isOpened():
  raise Exception("Cannot open webcam")


mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {0: 'A', 1: 'B', 2: 'C' , 3: 'D' , 4: 'E' , 5:'F' , 6:'G' , 7:'H' , 8:'I' , 9:'J' , 10:'K' , 11:'L' , 12:'M' , 13:'N' , 14:'O' , 15:'P', 16:'Q' , 17:'R' , 18:'S' ,
               19:'T' , 20:'U' , 21:'V' , 22:'W' , 23:'X' , 24:'Y' , 25:'Z' , 26:'0' , 27:'1' , 28:'2' , 29:'3' , 30:'4' , 31:'5' , 32:'6' , 33:'7' , 34:'8' , 35:'9'}
while True:

    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()
    # check frame is read correctly
    if not ret:
          print("Can't receive frame (Stream end). Exiting......")
          break

    H, W, _ = frame.shape

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# result will be defined the process on the coloured image BGR 2 RGB
    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
      # checking and displaying the hand detection with landmarkings
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())
# putting the landmarks again in the real time video
        for hand_landmarks in results.multi_hand_landmarks:
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))
# the eucledian parameter or projection and the dimensions which is making to be traversed [the corners of the rectangle around the hand]
        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10

        x2 = int(max(x_) * W) - 10
        y2 = int(max(y_) * H) - 10

        # make the  prediction by making the data in a form of nparray

        prediction = model.predict([np.asarray(data_aux)])

        predicted_character = labels_dict[int(prediction[0])]
# make a rectangle around the hand and make prediction in text
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
        cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                    cv2.LINE_AA)

    cv2_imshow('frame', frame)
    cv2.waitKey(1)


cap.release()
cv2.destroyAllWindows()

FileNotFoundError: [Errno 2] No such file or directory: './model.p'