Resources Used
- wget.download('https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/_downloads/da4babe668a8afb093cc7776d7e630f3/generate_tfrecord.py')
- Setup https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html

# 0. Setup Paths

In [1]:
WORKSPACE_PATH = 'Tensorflow/workspace'
SCRIPTS_PATH = 'Tensorflow/scripts'
APIMODEL_PATH = 'Tensorflow/models'
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
IMAGE_PATH = WORKSPACE_PATH+'/images'
MODEL_PATH = WORKSPACE_PATH+'/models'
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'
CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config'
CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/'

# 1. Create Label Map

In [2]:
labels = [{'name':'A', 'id':1}, 
          {'name':'B', 'id':2},
          {'name':'C', 'id':3},
          {'name':'D', 'id':4},
          {'name':'E', 'id':5},
         {'name':'F', 'id':6},
         {'name':'G', 'id':7},
         {'name':'H', 'id':8},
         {'name':'I', 'id':9},
         {'name':'K', 'id':10},
         {'name':'L', 'id':11},
         {'name':'M', 'id':12},
         {'name':'N', 'id':13},
         {'name':'O', 'id':14},
         {'name':'P', 'id':15},
         {'name':'Q', 'id':16},
         {'name':'R', 'id':17},
         {'name':'S', 'id':18},
         {'name':'T', 'id':19},
         {'name':'U', 'id':20},
         {'name':'V', 'id':21},
         {'name':'W', 'id':22},
         {'name':'X', 'id':23},
         {'name':'Y', 'id':24},
         {'name':'_', 'id':25}]

with open(ANNOTATION_PATH + '\label_map.pbtxt', 'w') as f:
    for label in labels:
        f.write('item { \n')
        f.write('\tname:\'{}\'\n'.format(label['name']))
        f.write('\tid:{}\n'.format(label['id']))
        f.write('}\n')

# 2. Create TF records

In [3]:
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x {IMAGE_PATH + '/train'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/train.record'}
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x{IMAGE_PATH + '/test'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/test.record'}

Successfully created the TFRecord file: Tensorflow/workspace/annotations/train.record
Successfully created the TFRecord file: Tensorflow/workspace/annotations/test.record


# 3. Download TF Models Pretrained Models from Tensorflow Model Zoo

In [4]:
!cd Tensorflow && git clone https://github.com/tensorflow/models

fatal: destination path 'models' already exists and is not an empty directory.


In [5]:
#wget.download('http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz')
#!mv ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz {PRETRAINED_MODEL_PATH}
#!cd {PRETRAINED_MODEL_PATH} && tar -zxvf ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz

# 4. Copy Model Config to Training Folder

In [6]:
CUSTOM_MODEL_NAME = 'my_ssd_mobnet' 

# 5. Update Config For Transfer Learning

In [7]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

In [8]:
CONFIG_PATH = MODEL_PATH+'/'+CUSTOM_MODEL_NAME+'/pipeline.config'

In [9]:
config = config_util.get_configs_from_pipeline_file(CONFIG_PATH)

In [10]:
config

{'model': ssd {
   num_classes: 24
   image_resizer {
     fixed_shape_resizer {
       height: 320
       width: 320
     }
   }
   feature_extractor {
     type: "ssd_mobilenet_v2_fpn_keras"
     depth_multiplier: 1.0
     min_depth: 16
     conv_hyperparams {
       regularizer {
         l2_regularizer {
           weight: 4e-05
         }
       }
       initializer {
         random_normal_initializer {
           mean: 0.0
           stddev: 0.01
         }
       }
       activation: RELU_6
       batch_norm {
         decay: 0.997
         scale: true
         epsilon: 0.001
       }
     }
     use_depthwise: true
     override_base_feature_extractor_hyperparams: true
     fpn {
       min_level: 3
       max_level: 7
       additional_layer_depth: 128
     }
   }
   box_coder {
     faster_rcnn_box_coder {
       y_scale: 10.0
       x_scale: 10.0
       height_scale: 5.0
       width_scale: 5.0
     }
   }
   matcher {
     argmax_matcher {
       matched_threshold: 0.5
   

In [11]:
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(CONFIG_PATH, "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)  

In [12]:
pipeline_config.model.ssd.num_classes = 25
pipeline_config.train_config.batch_size = 4
pipeline_config.train_config.fine_tune_checkpoint = PRETRAINED_MODEL_PATH+'/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0'
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path= ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/train.record']
pipeline_config.eval_input_reader[0].label_map_path = ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/test.record']

In [13]:
config_text = text_format.MessageToString(pipeline_config)                                                                                                                                                                                                        
with tf.io.gfile.GFile(CONFIG_PATH, "wb") as f:                                                                                                                                                                                                                     
    f.write(config_text)   

# 6. Train the model

In [14]:
print("""python {}/research/object_detection/model_main_tf2.py --model_dir={}/{} --pipeline_config_path={}/{}/pipeline.config --num_train_steps=5000""".format(APIMODEL_PATH, MODEL_PATH,CUSTOM_MODEL_NAME,MODEL_PATH,CUSTOM_MODEL_NAME))

python Tensorflow/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/my_ssd_mobnet --pipeline_config_path=Tensorflow/workspace/models/my_ssd_mobnet/pipeline.config --num_train_steps=5000


# 7. Load Train Model From Checkpoint

In [15]:
import os
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
print(viz_utils.__file__)

C:\Users\User\miniconda3\envs\tensorflow\lib\site-packages\object_detection\utils\visualization_utils.py


In [16]:
# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-61')).expect_partial()

@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

In [17]:
import tensorflow as tf
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 8887810729932026612
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 5732106240
locality {
  bus_id: 1
  links {
  }
}
incarnation: 14313685866482029760
physical_device_desc: "device: 0, name: GeForce RTX 3070, pci bus id: 0000:01:00.0, compute capability: 8.6"
]


# 8. Detect in Real-Time

In [18]:
import cv2 
import numpy as np
from HandTrackingModule import handDetector
from collections import Counter

In [19]:
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')

In [53]:
# Setup capture
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
detector = handDetector()
message=[]
message_dict={}
message_index=0
alphabet_store=[]
current_fingers=[0,0,0,0,0]
new_label_class=''
current_label_class=''
most_label = ['']
pending_label = most_label
change_confident_number = 6
confident_threshold = 8
non_hand_counting = 0
non_hand_append = False

In [54]:
def checkChange(alphabet_store):
    global most_label
    global pending_label
    most_label = ['']
    pending_label = most_label
    if len(alphabet_store)>=(change_confident_number+confident_threshold):
        most_label = list(Counter(alphabet_store).keys())[0]
        pending_label = list(Counter(alphabet_store[-change_confident_number:]).keys())[0]
    print(pending_label,most_label)
    if pending_label == most_label:
        return False
    else:
        return True
    

In [55]:
while True: 
    ret, frame = cap.read()
    image_np = np.array(frame)
        
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections = detect_fn(input_tensor)
    
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
    detections['num_detections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    label_id_offset = 1
    image_np_with_detections = image_np.copy()
    display_img,new_label_class,score = viz_utils.visualize_boxes_and_labels_on_image_array(
                image_np_with_detections,
                detections['detection_boxes'],
                detections['detection_classes']+label_id_offset,
                detections['detection_scores'],
                category_index,
                use_normalized_coordinates=True,
                max_boxes_to_draw=1,
                min_score_thresh=.9,
                agnostic_mode=False)
    
    
    if new_label_class != '' or score!= '':
        raw_score = score[3:5]
        alphabet_store.append(new_label_class)
        print(new_label_class,end=' ')
        print(raw_score)
    
    frame = detector.findHands(frame)
    lmList, bbox = detector.findPosition(frame)
    if len(lmList) != 0:
        new_fingers = detector.fingersUp()
        if checkChange(alphabet_store):
            print('changed!')
            counted_dict = Counter(alphabet_store)
            first_key = list(counted_dict.keys())[0]
            message.append(first_key)
            alphabet_store = []
            print(''.join(message))
            non_hand_append = True
        current_fingers = new_fingers
        current_label_class = new_label_class
    else:
        non_hand_counting += 1
        if (len(alphabet_store)>confident_threshold) and non_hand_counting >= 20 and non_hand_append:
            message.append(list(Counter(alphabet_store).keys())[0])
            alphabet_store = []
            non_hand_append = False
            non_hand_counting = 0
        
    cv2.putText(display_img, ''.join(message), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)
    if len(alphabet_store)>4:
        cv2.putText(display_img, 'Pending alphabet: '+list(Counter(alphabet_store).keys())[0], (100, 140), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 3)
    cv2.imshow('object detection', display_img)
    
    
    if cv2.waitKey(1) and 0xFF == ord('q'):
        cap.release()
        break

A 91
A 98
A 95
A 97
[''] ['']
A 99
[''] ['']
A 99
[''] ['']
A 98
[''] ['']
A 98
[''] ['']
A 98
[''] ['']
A 98
[''] ['']
A 99
[''] ['']
A 98
[''] ['']
A 98
[''] ['']
A 99
A A
A 99
A A
A 99
A A
A 99
A A
A 99
A A
A 99
A A
A 99
A A
A 99
A A
A 98
A A
A 99
A A
A 98
A A
A A
A A
A A
B 91
A A
B 92
A A
B 93
A A
B 93
A A
B 92
A A
B 92
B A
changed!
A
B 91
[''] ['']
B 92
[''] ['']
B 92
[''] ['']
B 92
[''] ['']
B 91
[''] ['']
B 92
[''] ['']
B 93
[''] ['']
B 92
[''] ['']
B 93
[''] ['']
B 92
[''] ['']
B 92
[''] ['']
B 92
[''] ['']
B 92
[''] ['']
B 92
B B
B 92
B B
B 93
B B
B 92
B B
B 92
B B
B 93
B B
B 93
B B
B 93
B B
B 91
B B
B 92
B B
B 92
B B
B 92
B B
B 92
B B
B 91
B B
B 92
B B
B 92
B B
B B
B 90
B B
B 92
B B
B 90
B B
B 91
B B
B 91
B B
B 93
B B
B 92
B B
B 91
B B
B 91
B B
B 94
B B
B 97
B B
B 98
B B
[''] ['']
B 95
[''] ['']
B 94
[''] ['']
B 97
[''] ['']
B 98
[''] ['']
B 98
[''] ['']
B 98
[''] ['']
B 98
[''] ['']
B 98
[''] ['']
B 98
[''] ['']
B 98
[''] ['']
B 98
[''] ['']
B 98
[''] ['']
B 98
[''] ['']
B 9

H H
H 97
H H
H 97
H H
H 97
H H
H 96
H H
H 96
H H
H 97
H H
H 97
H H
H 96
H 97
H 95
H H
H 96
H H
H 95
H H
H 95
H H
H 95
H H
H 90
H 97
H 91
H 91
H 92
H 96
H 91
H 93
H 91
H 95
H 92
H H
H 94
H H
H 94
H H
H 94
H H
H 95
H H
H 95
H H
H 95
H H
H 95
H H
H 96
H H
H 95
H H
H 95
H H
H 95
H H
H 95
H H
H 95
H H
H 93
H H
H 93
H H
H 94
H H
H 94
H H
H 94
H H
H 94
H H
H 94
H H
H 94
H H
H 96
H H
H 96
H H
H 96
H 96
H 96
H 97
H 97
H 96
H 96
H 95
H 95
H 94
H 92
H 93
H 95
H 94
H 94
H 94
H 92
H 93
H 94
H 94
H 92
H 92
H 93
H 94
H 94
H 94
H 93
H 94
H 94
H 95
H 95
H 96
H 97
H 96
H 95
H 95
H 94
H 93
H 93
H 94
H 92
H 97
H H
H 94
H H
H H
H 91
H H
H 91
H H
H H
H H
H 92
H H
H H
H H
H H
H H
H H
H H
H H
H H
H H
H H
H H
H H
H H
H H
H H
H H
H H
H 91
H 90
H 92
H 92
H 96
H 93
H 94
H 93
H 92
H 93
H 92
H 93
H 92
H 92
H 94
H 92
H 93
H 90
H 92
H 92
H 90
H 90
H 91
H 91
H 92
H 93
H 96
H 93
H 97
H 97
H 95
H 92
H 94
H 95
H 93
H 95
H 93
H 92
H 94
H 94
H 95
H 94
H 95
H 94
H 96
H 95
H 97
H 97
H 96
H 98
H 96
H 96
H 96
H 95
H 95
H 94
H 

KeyboardInterrupt: 

In [None]:
print(frame.shape)
print(a.shape)

In [None]:
a = ['a','a','a','b','b','c','c','c','c']
from collections import Counter
print(Counter(a))

In [None]:
detections = detect_fn(input_tensor)

In [None]:
from matplotlib import pyplot as plt