# Sign Language Translation

## Import

In [1]:
import os
import time
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
from pynq_dpu import DpuOverlay
from pynq.lib.video import *

overlay = DpuOverlay("dpu.bit")

### CONSTANTS

In [None]:
FRAME_W = 640
FRAME_H = 480

class_map = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']

### Utilities

In [None]:
def resize_shortest_edge(image, size):
    H, W = image.shape[:2]
    if H >= W:
        nW = size
        nH = int(float(H)/W * size)
    else:
        nH = size
        nW = int(float(W)/H * size)
    return cv2.resize(image,(nW,nH))

def normalize_image(image, mean = [0.519, 0.4992, 0.5139], std = [0.2283, 0.2557, 0.2639]):
    image = image.astype("float32")

    B, G, R = cv2.split(image)

    B = (B - mean[0]) / std[0]
    G = (G - mean[1]) / std[1]
    R = (R - mean[2]) / std[2]

    image = cv2.merge([R, G, B])

    return image

def BGR2RGB(image):
    B, G, R = cv2.split(image)
    image = cv2.merge([R, G, B])
    return image

def central_crop(image, crop_height, crop_width):
    image_height = image.shape[0]
    image_width = image.shape[1]
    offset_height = (image_height - crop_height) // 2
    offset_width = (image_width - crop_width) // 2
    return image[offset_height:offset_height + crop_height, offset_width:
                 offset_width + crop_width, :]

def central_resize(image, new_height, new_width):
    return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)

def normalize(image):
    image=image/256.0
    image=image-0.5
    image=image*2
    return image

def to_tensor(image):
    return image.astype(np.float32) / 255.0  # divide by 255

def preprocess_fn(image, crop_height = 200, crop_width = 200):
    image = central_crop(image, crop_height, crop_width)
    image = to_tensor(image)
    image = normalize_image(image)
    return image

def calculate_softmax(data):
    result = np.exp(data)
    return result

def predict_label(softmax):
    with open("images/words.txt", "r") as f:
        lines = f.readlines()
    return lines[np.argmax(softmax)-1]

### webcam setup

load the model:

In [4]:
overlay.load_model("models/CNN_best_kv260.xmodel")

In [5]:
videoIn = cv2.VideoCapture(0 + cv2.CAP_V4L2)
videoIn.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_W);
videoIn.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_H);


print("Capture device is open: " + str(videoIn.isOpened()))

Capture device is open: True


### Display port setup

In [6]:
displayport = DisplayPort()

displayport.configure(VideoMode(640, 480, 24), PIXEL_RGB)

In [None]:
from IPython.display import clear_output

dpu = overlay.runner

inputTensors = dpu.get_input_tensors()
outputTensors = dpu.get_output_tensors()

shapeIn = tuple(inputTensors[0].dims)
shapeOut = tuple(outputTensors[0].dims)
outputSize = int(outputTensors[0].get_data_size() / shapeIn[0])

output_data = [np.empty(shapeOut, dtype=np.float32, order="C")]
input_data = [np.empty(shapeIn, dtype=np.float32, order="C")]
image = input_data[0]

# load the logo
logo = cv2.imread("./images/logo_no_text.png", cv2.IMREAD_UNCHANGED)
logo = cv2.resize(logo, (100, 100))
lh, lw = logo.shape[:2]  # logo height, width

# For FPS calculation
prev_time = time.time()
fps = 0.0
INF_CNT_MAX = 20
inf_cnt = INF_CNT_MAX # keep high so that inference runs on first frame and vars are initialized

while True:
    ret, frame_vga = videoIn.read()

    if (ret):
        # process for DPU 
        preprocessed = preprocess_fn(frame_vga)
        image[0,...] = preprocessed.reshape(shapeIn[1:])
        
        # run inference
        if inf_cnt < INF_CNT_MAX:
            inf_cnt += 1
        else:
            inf_cnt = 0
            job_id = dpu.execute_async(input_data, output_data)
            dpu.wait(job_id)
            temp = [j.reshape(1, outputSize) for j in output_data]
            softmax = calculate_softmax(temp[0][0])
            prediction = np.argmax(softmax)
            class_name = class_map[np.argmax(softmax)]
        
        # FPS computation
        curr_time = time.time()
        fps = 1.0 / (curr_time - prev_time)
        prev_time = curr_time
        
        # draw overlay text
        display_frame = frame_vga.copy()
        h, w, _ = display_frame.shape
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 2.0
        thickness = 3
        # Class name: top-left
        cv2.putText(display_frame, f"{class_name}", (20, 60),
                    font, font_scale, (255, 0, 0), thickness, cv2.LINE_AA)

        # FPS: top-right (shifted left by ~200 px for space)
        cv2.putText(display_frame, f"FPS: {int(fps)}", (w - 320, 60),
                    font, font_scale, (0, 255, 255), thickness, cv2.LINE_AA)
        
        box_size = 200
        x1 = w // 2 - box_size // 2
        y1 = h // 2 - box_size // 2
        x2 = x1 + box_size
        y2 = y1 + box_size

        cv2.rectangle(display_frame, (x1, y1), (x2, y2), (0, 0, 255), 3)
        
        # print logo
        h, w, _ = display_frame.shape
        lx, ly = 20, h - lh - 20   # 20px margin from left and bottom

        roi = display_frame[ly:ly+lh, lx:lx+lw]
        roi[:] = logo

        # send to display port
        outframe = displayport.newframe()
        outframe[:] = display_frame
        displayport.writeframe(outframe)

    else:
        raise RuntimeError("Error while reading from camera.")

    time.sleep(0.001)


In [None]:
videoIn.release()