In [1]:
import os
import wget
import cv2
import uuid
import time
import xml.etree.ElementTree as ET

In [2]:
number_imgs = 4

In [3]:
CUSTOM_MODEL_NAME = 'my_ssd_mobilenet-hand' 
PRETRAINED_MODEL_NAME = 'ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8'
PRETRAINED_MODEL_URL = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz'
TF_RECORD_SCRIPT_NAME = 'generate_tfrecord.py'
LABEL_MAP_NAME = 'label_map.pbtxt'
IMAGES_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'collectedimages')
LABELIMG_PATH = os.path.join('Tensorflow', 'labelimg')
ACTIONS_RECORD = os.path.join('Tensorflow', 'workspace', 'actions.txt')

In [4]:
paths = {
    'WORKSPACE_PATH': os.path.join('Tensorflow_hand', 'workspace'),
    'SCRIPTS_PATH': os.path.join('Tensorflow_hand','scripts'),
    'APIMODEL_PATH': os.path.join('Tensorflow_hand','models'),
    'ANNOTATION_PATH': os.path.join('Tensorflow_hand', 'workspace','annotations'),
    'IMAGE_PATH': os.path.join('Tensorflow_hand', 'workspace','images'),
    'MODEL_PATH': os.path.join('Tensorflow_hand', 'workspace','models'),
    'PRETRAINED_MODEL_PATH': os.path.join('Tensorflow_hand', 'workspace','pre-trained-models'),
    'CHECKPOINT_PATH': os.path.join('Tensorflow_hand', 'workspace','models',CUSTOM_MODEL_NAME), 
    'OUTPUT_PATH': os.path.join('Tensorflow_hand', 'workspace','models',CUSTOM_MODEL_NAME, 'export'), 
    'TFJS_PATH':os.path.join('Tensorflow_hand', 'workspace','models',CUSTOM_MODEL_NAME, 'tfjsexport'), 
    'TFLITE_PATH':os.path.join('Tensorflow_hand', 'workspace','models',CUSTOM_MODEL_NAME, 'tfliteexport'), 
    'PROTOC_PATH':os.path.join('Tensorflow_hand','protoc')
 }

In [5]:
files = {
    'PIPELINE_CONFIG':os.path.join('Tensorflow_hand', 'workspace','models', CUSTOM_MODEL_NAME, 'pipeline.config'),
    'TF_RECORD_SCRIPT': os.path.join(paths['SCRIPTS_PATH'], TF_RECORD_SCRIPT_NAME), 
    'LABELMAP': os.path.join(paths['ANNOTATION_PATH'], LABEL_MAP_NAME)
}

In [6]:
labels = []
with open(ACTIONS_RECORD, 'r') as file:
    labels = file.read().splitlines()

labels

['hello', 'how', 'you', 'thanks']

In [7]:
for path in paths.values():
    if not os.path.exists(path):
        if os.name == 'posix':
            !mkdir -p {path}
        if os.name == 'nt':
            !mkdir {path}

In [8]:
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

In [None]:
import tensorflow as tf
    from object_detection.utils import label_map_util
    from object_detection.utils import visualization_utils as viz_utils
    from object_detection.builders import model_builder
    from object_detection.utils import config_util

In [None]:
# Load pipeline config and build a detection model
    configs = config_util.get_configs_from_pipeline_file(files['PIPELINE_CONFIG'])
    detection_model = model_builder.build(model_config=configs['model'], is_training=False)

    # Restore checkpoint
    ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
    ckpt.restore(os.path.join(paths['CHECKPOINT_PATH'], 'ckpt-6')).expect_partial()

    @tf.function
    def detect_fn(image):
        image, shapes = detection_model.preprocess(image)
        prediction_dict = detection_model.predict(image, shapes)
        detections = detection_model.postprocess(prediction_dict, shapes)
        return detections

In [10]:
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [11]:
# Function to create an annotation XML file

def create_annotation_file(action, filename, path, xmin, ymin, xmax, ymax):
    annotation = ET.Element("annotation")

    folder = ET.SubElement(annotation, "folder")
    folder.text = "collectedimages"

    filename_element = ET.SubElement(annotation, "filename")
    filename_element.text = filename

    path_element = ET.SubElement(annotation, "path")
    path_element.text = "C:\\Users\\anujv\\Desktop\\EPICS\\" + path

    source = ET.SubElement(annotation, "source")
    database = ET.SubElement(source, "database")
    database.text = "Unknown"

    size = ET.SubElement(annotation, "size")
    width = ET.SubElement(size, "width")
    width.text = "640"  # Assuming width is fixed
    height = ET.SubElement(size, "height")
    height.text = "480"  # Assuming height is fixed
    depth = ET.SubElement(size, "depth")
    depth.text = "3"

    segmented = ET.SubElement(annotation, "segmented")
    segmented.text = "0"

    # Object
    object_element = ET.SubElement(annotation, "object")
    name = ET.SubElement(object_element, "name")
    name.text = action
    pose = ET.SubElement(object_element, "pose")
    pose.text = "Unspecified"
    truncated = ET.SubElement(object_element, "truncated")
    truncated.text = "0"
    difficult = ET.SubElement(object_element, "difficult")
    difficult.text = "0"
    bndbox = ET.SubElement(object_element, "bndbox")
    xmin_element = ET.SubElement(bndbox, "xmin")
    xmin_element.text = str(xmin)
    ymin_element = ET.SubElement(bndbox, "ymin")
    ymin_element.text = str(ymin)
    xmax_element = ET.SubElement(bndbox, "xmax")
    xmax_element.text = str(xmax)
    ymax_element = ET.SubElement(bndbox, "ymax")
    ymax_element.text = str(ymax)

    # Write the annotation to a file
    annotation_file = os.path.join(IMAGES_PATH, filename.replace(".jpg", ".xml"))
    tree = ET.ElementTree(annotation)
    tree.write(annotation_file)


In [12]:
import tkinter as tk
from tkinter import messagebox

def ask_to_save_image():
    root = tk.Tk()
    root.withdraw()

    answer = messagebox.askyesno("Save Image", "Do you want to save this image?")

    root.destroy()

    return answer

In [13]:
# Input action from the user
action = input('Enter action: ')
no_of_images = 0

# Initialize the camera
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Collect frames
for imgnum in range(5):
    ret, frame = cap.read()
    cv2.putText(frame, f'Collecting frames for {action} - Image Number {imgnum}', (15, 12),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

    for countdown in range(5, 0, -1):
        countdown_frame = frame.copy()

        input_tensor = tf.convert_to_tensor(np.expand_dims(countdown_frame, 0), dtype=tf.float32)
        detections = detect_fn(input_tensor)
            
        num_detections = int(detections.pop('num_detections'))
        detections = {key: value[0, :num_detections].numpy()
                    for key, value in detections.items()}
        detections['num_detections'] = num_detections

        # detection_classes should be ints.
        detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

        label_id_offset = 1
        image_np_with_detections = frame.copy()

        viz_utils.visualize_boxes_and_labels_on_image_array(
                    image_np_with_detections,
                    detections['detection_boxes'],
                    detections['detection_classes']+label_id_offset,
                    detections['detection_scores'],
                    category_index,
                    use_normalized_coordinates=True,
                    max_boxes_to_draw=1,
                    min_score_thresh=.8,
                    agnostic_mode=False)
            
        image_np_with_detections_copy = image_np_with_detections.copy()
        cv2.putText(image_np_with_detections_copy, f'Capturing in {countdown} seconds', (120, 200),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
            
        cv2.imshow('Countdown', image_np_with_detections_copy)
        cv2.waitKey(1000)

    if imgnum != 0:
        # After countdown, save the image and create the annotation file
        most_accurate_box_index = np.argmax(detections['detection_scores'])
        box = detections['detection_boxes'][most_accurate_box_index]
        ymin, xmin, ymax, xmax = box

        # Convert normalized coordinates to pixel coordinates
        xmin_pixel = int(xmin * width)
        ymin_pixel = int(ymin * height)
        xmax_pixel = int(xmax * width)
        ymax_pixel = int(ymax * height)

        if ask_to_save_image():
            filename = action + '.' + f'{str(uuid.uuid1())}.jpg'
            path = os.path.join(IMAGES_PATH, filename)
            cv2.imwrite(path, frame)
            create_annotation_file(action, filename, path, xmin_pixel, ymin_pixel, xmax_pixel, ymax_pixel)
            no_of_images += 1

# Release the camera and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()

print(no_of_images)

NameError: name 'detect_fn' is not defined