# Setup Paths

In [1]:
# this path are in a variable for easy access
WORKSPACE_PATH = 'Tensorflow/workspace'
SCRIPTS_PATH = 'Tensorflow/scripts'
APIMODEL_PATH = 'Tensorflow/models/research/object_detection'
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
IMAGE_PATH = WORKSPACE_PATH+'/images'
MODEL_PATH = WORKSPACE_PATH+'/models'
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'
CONFIG_PATH = MODEL_PATH+'/ssd_mobilenet2/pipeline.config'
CHECKPOINT_PATH = MODEL_PATH+'/ssd_mobilenet2/'

# Create Label Map

In [22]:
# Labels for all the detection that are going to be identifying from an image or video.
labels = [{'name':"Hello",'id':1},{'name':"Thanks",'id':2},{'name':"Yes",'id':3},{'name':"No",'id':4},{'name':"I Love You",'id':5}]

In [3]:
# to create a file called, label_map.pbtxt and write to it. This is the format TENSORFLOW MODEL API expects the labels for detection.
# item {
#     name: name of object to detect
#    id: id of the object to detect
# }

with open(ANNOTATION_PATH+'\label_map.pbtxt','w') as f:
    for label in labels:
        f.write('item{\n')
        f.write('\t name:\'{}\'\n'.format(label["name"]))
        f.write('\t id:{}\n'.format(label["id"]))
        f.write('}\n')

# Create TF Records

In [5]:
# Python script to generate train and test record
!python {SCRIPTS_PATH+'/generate_tfrecord.py'} -x {IMAGE_PATH+"/train"} -l {ANNOTATION_PATH+"/label_map.pbtxt"} -o {ANNOTATION_PATH + '/train.record'}

!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x {IMAGE_PATH + '/test'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/test.record'}

Successfully created the TFRecord file: Tensorflow/workspace/annotations/train.record




Successfully created the TFRecord file: Tensorflow/workspace/annotations/test.record




# Download TF models Pretrained models from tensorflow model Zoo

In [71]:
!cd Tensorflow && git clone https://github.com/tensorflow/models.git

Cloning into 'models'...
Updating files:  31% (1205/3871)
Updating files:  32% (1239/3871)
Updating files:  33% (1278/3871)
Updating files:  34% (1317/3871)
Updating files:  35% (1355/3871)
Updating files:  36% (1394/3871)
Updating files:  37% (1433/3871)
Updating files:  38% (1471/3871)
Updating files:  39% (1510/3871)
Updating files:  40% (1549/3871)
Updating files:  41% (1588/3871)
Updating files:  42% (1626/3871)
Updating files:  43% (1665/3871)
Updating files:  44% (1704/3871)
Updating files:  45% (1742/3871)
Updating files:  46% (1781/3871)
Updating files:  47% (1820/3871)
Updating files:  48% (1859/3871)
Updating files:  49% (1897/3871)
Updating files:  50% (1936/3871)
Updating files:  50% (1951/3871)
Updating files:  51% (1975/3871)
Updating files:  52% (2013/3871)
Updating files:  53% (2052/3871)
Updating files:  54% (2091/3871)
Updating files:  55% (2130/3871)
Updating files:  56% (2168/3871)
Updating files:  57% (2207/3871)
Updating files:  58% (2246/3871)
Updating files:  5

In [83]:
## Shell: go into the research folder and confirm the path to the working directory.
!cd Tensorflow/models/research && pwd

/c/Users/emryz/OneDrive/Desktop/ML/computer_Vision/Object_Detection/SignLanguageDetectionAndCaption/Tensorflow/models/research


In [85]:
# Extract the product from this directory and spill out the corresponding python equivalent
!cd Tensorflow/models/research && protoc object_detection/protos/*.proto --python_out=.

In [86]:
# Copy from setup.py to research
!cp Tensorflow/models/research/object_detection/packages/tf2/setup.py Tensorflow/models/research

In [None]:
# This commmand installs all the package in the setup.py file, These packages are what the model relies on to run seamlessly
# For more details refer to TENSORFLOW MODEL API INSTALLATION: https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html#tf-models-install
python -m pip install Tensorflow/models/research 

In [89]:
# After installation, to confirm is everything is well setuo and can proceed to help us train and and make detections 
# !cd Tensorflow/models/research && python object_detection/builders/model_builder_tf2_test.py

# Copy Model Config For Transfer Learning

In [6]:
MODEL_NAME = "ssd_mobilenet2"

In [7]:
"Tensorflow/workspace/models\\" + MODEL_NAME

'Tensorflow/workspace/models\\ssd_mobilenet2'

In [185]:
## Structure o fthe folder is being created accoring to the documantation in  TENSORFLOW OBJECT DETECTION API
!mkdir {"Tensorflow\workspace\models\\" + MODEL_NAME}

A subdirectory or file Tensorflow\workspace\models\ssd_mobilenet2 already exists.


In [186]:
# Copy the config file t the newly created folder , this is where all the adjustment and fine tunning of the model will happen
!cp {PRETRAINED_MODEL_PATH+"\\"+"ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8"+"\\"+"pipeline.config"} {"Tensorflow\workspace\models\\" + MODEL_NAME}

# Update config for Transfer Learning

In [13]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

In [98]:
CONFIG_PATH = MODEL_PATH + "/" + MODEL_NAME + "/pipeline.config"
config = config_util.get_configs_from_pipeline_file(CONFIG_PATH)

# I manually modifified the pipeline.config file

In [99]:
NUMCLASS = len(labels)
BATCHSIZE = 4
PATH_TO_PRETRAINED_CHECKPOINT = PRETRAINED_MODEL_PATH + "/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8" + "/" + "checkpoint" + "/" + "ckpt-0"
LABEL_MAP_PATH = ANNOTATION_PATH + "/label_map.pbtxt"
TRAIN_INPUT_READER_PATH = ANNOTATION_PATH + "/train.record"
TEST_INPUT_READER_PATH = ANNOTATION_PATH + "/test.record"

In [100]:
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(CONFIG_PATH,"r") as f:
    proto_str = f.read()
    text_format.Merge(proto_str,pipeline_config)

In [187]:
# Fine Tiune the model, also set the approrptate path to the train and tes record file
pipeline_config.model.ssd.num_classes = NUMCLASS
pipeline_config.train_config.batch_size = BATCHSIZE
pipeline_config.train_config.fine_tune_checkpoint = PATH_TO_PRETRAINED_CHECKPOINT
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path = LABEL_MAP_PATH
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [TRAIN_INPUT_READER_PATH]
pipeline_config.eval_input_reader[0].label_map_path = LABEL_MAP_PATH
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [TEST_INPUT_READER_PATH]

NameError: name 'NUMCLASS' is not defined

In [102]:
config_text = text_format.MessageToString(pipeline_config)
with tf.io.gfile.GFile(CONFIG_PATH,"wb") as f:
    f.write(config_text)

# Train The model

In [189]:
# to print out the command for training the model
print("""python {}/model_main_tf2.py --model_dir={}/{} --pipeline_config_path={}/{}/pipeline.config --num_train_steps=1000""".format(APIMODEL_PATH,MODEL_PATH,MODEL_NAME,MODEL_PATH,MODEL_NAME))

python Tensorflow/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/ssd_mobilenet2 --pipeline_config_path=Tensorflow/workspace/models/ssd_mobilenet2/pipeline.config --num_train_steps=1000


In [None]:
# Training happens on in windows shell, To keep track of the output

In [103]:
print("{}/{}".format(MODEL_PATH,MODEL_NAME))

Tensorflow/workspace/models/ssd_mobilenet2


In [None]:
# Command to use tensorflow board,  to monitor training
tensorboard --logdir=Tensorflow/workspace/models/ssd_mobilenet2

# Load Train Model From Checkpoint

In [2]:
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder



In [3]:
import os
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
model_config = configs['model']
detection_model = model_builder.build(model_config=model_config, is_training=False)

ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-7')).expect_partial()

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x226898f38e0>

In [4]:
import cv2 as cv
import numpy as np

# test Model on image

In [194]:
def detection_fn(image):
    image, shape = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image,shape)
    detections = detection_model.postprocess(prediction_dict, shape)
    return detections

In [217]:
image = cv.imread("Tensorflow/workspace/images/test/WIN_20240923_01_49_50_Pro.jpg")
image_display = image.copy()

In [218]:
image = tf.cast(image, tf.float32)

In [219]:
image, shape = detection_model.preprocess(image[tf.newaxis, :])

In [220]:
detections = detection_fn(image)

In [221]:
val = {key:value[0].numpy() for key,value in detections.items()}

In [222]:
val_max = np.argmax(val["detection_scores"])
index_label_max = val["detection_classes"][val_max]
label_name = category_index[index_label_max+1]["name"]

In [49]:
# image = cv.imread("Tensorflow/workspace/images/test/WIN_20240923_01_49_50_Pro.jpg")

In [223]:
## Funtion to caption image based on the detection hand signal.

def putTextWrapped(image,text):
    font = cv.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    thickness = 2
    max_width = image.shape[0] - 20
    words = text.split(" ")
    
    space_width, height = cv.getTextSize(' ', font, font_scale, thickness)
    current_line = ''
    lines = []
    
    for word in words:
        word_width, _ = cv.getTextSize(word, font, font_scale, thickness)
        if cv.getTextSize(current_line,font, font_scale, thickness)[0][0] + word_width[0] + space_width[0] > max_width:
            lines.append(current_line)
            current_line = word + " "
        else:
            current_line += word + " "
    
    lines.append(current_line)
    
    x,y = (250, 600)
    next_y_rectangle= y - 25
    line_height = cv.getTextSize("Test", font, font_scale, thickness)[0][1] + 10
    for line in lines[-2:]:
        # text_height = cv.getTextSize(line, font, font_scale, thickness)[0][1] + 10
        cv.rectangle(image, (int(x), int(next_y_rectangle)), (int(x+700), int(next_y_rectangle + 30)), (255, 255, 255), 30)
        next_y_rectangle +=  70
        cv.putText(image, line, (x,y), font, 1, (0, 0, 0), 1, 17)
        y += line_height + 35
    return image

cv.imshow('Image with Wrapped Text', putTextWrapped(image_display,label_name))
cv.waitKey(0)
cv.destroyAllWindows()

# Detect in real time

In [178]:
# Function for detection
@tf.function
def detection_fn(image):
    image, shape = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image,shape)
    detections = detection_model.postprocess(prediction_dict, shape)
    return detections

# Function to wrap the text with a background 
def putTextWrapped(image,text):
    font = cv.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    thickness = 2
    max_width = image.shape[0] - 20
    words = text.split(" ")
    
    space_width, height = cv.getTextSize(' ', font, font_scale, thickness)
    current_line = ''
    lines = []
    
    for word in words:
        word_width, _ = cv.getTextSize(word, font, font_scale, thickness)
        if cv.getTextSize(current_line,font, font_scale, thickness)[0][0] + word_width[0] + space_width[0] > max_width:
            lines.append(current_line)
            current_line = word + " "
        else:
            current_line += word + " "
    
    lines.append(current_line)
    
    x,y = (120, 380)
    next_y_rectangle= y - 25
    line_height = cv.getTextSize("Test", font, font_scale, thickness)[0][1] + 10
    for line in lines[-2:]:
        # text_height = cv.getTextSize(line, font, font_scale, thickness)[0][1] + 10
        cv.rectangle(image, (int(x), int(next_y_rectangle)), (int(x+400), int(next_y_rectangle + 30)), (255, 255, 255), 30)
        next_y_rectangle +=  70
        cv.putText(image, line, (x,y), font, 1, (0, 0, 0), 1, 17)
        y += line_height + 35
    
    return image

# Funcion that determines when to caption add new caption to a new line (New detection is added to the line, when a new detection is noticed)
def captionImage(detections,prev_text,text_to_display):
    val_max = np.argmax(detections["detection_scores"])
    index_label_max = detections["detection_classes"][val_max]
    label = category_index[index_label_max+1]["name"]
    if prev_text != label:
        text_to_display = text_to_display + " " + label
        prev_text = label
    return prev_text,text_to_display

In [101]:
import cv2 as cv
import numpy as np

In [102]:
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+"/label_map.pbtxt",
                                                                    use_display_name=True)

In [181]:
cap = cv.VideoCapture(0)
width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))

In [183]:
fourcc = cv.VideoWriter_fourcc(*'mp4v')  # Specify the codec
fps = cap.get(cv.CAP_PROP_FPS)
out = cv.VideoWriter('out.mp4', fourcc, fps, (width, height))

In [184]:
prev_text = ""
text_to_display = ""

while True:
    ret,frame = cap.read()
    input_np = np.array(frame)
    
    input_np = np.fliplr(input_np)
    
    input_tensor = tf.convert_to_tensor(np.expand_dims(input_np,0),dtype=tf.float32)
    detections = detection_fn(input_tensor)
    num_detections = int(detections.pop("num_detections"))
    
    detections = {key:value[0,:num_detections].numpy() for key,value in detections.items()}
    detections["num_detections"] = num_detections
    detections["detection_classes"] = detections["detection_classes"].astype(np.int64)
    
    label_id_offset = 1
    image_np_with_detections = input_np.copy()

    prev_text,text_to_display = captionImage(detections,prev_text,text_to_display)
    prev_text = prev_text
    text_to_display=text_to_display

    image_np_with_detections = putTextWrapped(image_np_with_detections,text_to_display)
    
    
    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'],
          detections['detection_classes'] + label_id_offset,
          detections['detection_scores'],
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=1,
          min_score_thresh=.30,
          agnostic_mode=False)

    
    out.write(image_np_with_detections)
    cv.imshow('object detection', cv.resize(image_np_with_detections, (1280, 720)))
    

    if cv.waitKey(25) & 0xFF == ord('q'):
        text_to_display = ""
        prev_text = ""
        break

out.release()
cap.release()
cv.destroyAllWindows()