# Sign Language Translation

## Import

In [1]:
import os
import time
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
from pynq_dpu import DpuOverlay
from pynq.lib.video import *

overlay = DpuOverlay("dpu.bit")

### CONSTANTS

In [2]:
FRAME_W = 640
FRAME_H = 480

_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94

MEANS = [_B_MEAN,_G_MEAN,_R_MEAN]

class_map = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

### Utilities

In [3]:
def resize_shortest_edge(image, size):
    H, W = image.shape[:2]
    if H >= W:
        nW = size
        nH = int(float(H)/W * size)
    else:
        nH = size
        nW = int(float(W)/H * size)
    return cv2.resize(image,(nW,nH))

def normalize_image(image, mean = [0.8969, 0.8335, 0.8416], std = [0.1565, 0.25, 0.24]):
    # Convert to float32 for safety
    image = image.astype("float32")

    # Split channels (OpenCV loads as BGR by default)
    B, G, R = cv2.split(image)

    # Normalize: (channel - mean) / std
    B = (B - mean[0]) / std[0]
    G = (G - mean[1]) / std[1]
    R = (R - mean[2]) / std[2]

    # Merge back (keep channel order consistent with training, usually RGB)
    image = cv2.merge([R, G, B])

    return image

def BGR2RGB(image):
    B, G, R = cv2.split(image)
    image = cv2.merge([R, G, B])
    return image

def central_crop(image, crop_height, crop_width):
    image_height = image.shape[0]
    image_width = image.shape[1]
    offset_height = (image_height - crop_height) // 2
    offset_width = (image_width - crop_width) // 2
    return image[offset_height:offset_height + crop_height, offset_width:
                 offset_width + crop_width, :]

def central_resize(image, new_height, new_width):
    return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)

def normalize(image):
    image=image/256.0
    image=image-0.5
    image=image*2
    return image

def to_tensor(image):
    return image.astype(np.float32) / 255.0  # divide by 255

def preprocess_fn(image, crop_height = 224, crop_width = 224):
    # image = resize_shortest_edge(image, 256)
    image = BGR2RGB(image)
    image = central_resize(image, crop_height, crop_width)
    image = to_tensor(image)
    image = normalize_image(image)
    # image = central_crop(image, crop_height, crop_width)
    return image

def calculate_softmax(data):
    result = np.exp(data)
    return result

def predict_label(softmax):
    with open("images/words.txt", "r") as f:
        lines = f.readlines()
    return lines[np.argmax(softmax)-1]

### webcam setup

load the model:

In [4]:
overlay.load_model("models/CNN_kv260.xmodel")

In [5]:
videoIn = cv2.VideoCapture(0 + cv2.CAP_V4L2)
videoIn.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_W);
videoIn.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_H);


print("Capture device is open: " + str(videoIn.isOpened()))

Capture device is open: True


### Display port setup

In [6]:
displayport = DisplayPort()

displayport.configure(VideoMode(640, 480, 24), PIXEL_RGB)

In [None]:
from IPython.display import clear_output

dpu = overlay.runner

inputTensors = dpu.get_input_tensors()
outputTensors = dpu.get_output_tensors()

shapeIn = tuple(inputTensors[0].dims)
shapeOut = tuple(outputTensors[0].dims)
outputSize = int(outputTensors[0].get_data_size() / shapeIn[0])

output_data = [np.empty(shapeOut, dtype=np.float32, order="C")]
input_data = [np.empty(shapeIn, dtype=np.float32, order="C")]
image = input_data[0]

while True:
    #for i in range(10):
    ret, frame_vga = videoIn.read()

    if (ret):
        outframe = displayport.newframe()

        outframe[:] = frame_vga
        displayport.writeframe(outframe)
        # TODO: add classification
        preprocessed = preprocess_fn(frame_vga)
        image[0,...] = preprocessed.reshape(shapeIn[1:])
        # p_img = preprocessed.reshape(shapeIn[1:])
        job_id = dpu.execute_async(input_data, output_data)
        dpu.wait(job_id)
        temp = [j.reshape(1, outputSize) for j in output_data]
        softmax = calculate_softmax(temp[0][0])
        
        prediction = np.argmax(softmax)
        
        clear_output(wait=True)
        print(class_map[np.argmax(softmax)])
        #crop_img = central_resize(frame_vga, 224, 224)
        #plt.imshow(crop_img)
        # Show classification result on processed image

        
        #break
        # print("Classification: {}".format(class_map[np.argmax(softmax)]))
        
    else:
        raise RuntimeError("Error while reading from camera.")
    time.sleep(0.05)
#videoIn.release()

B


test the camera: