1. Import Dependencies

In [4]:
import cv2
import uuid
import os
import time

2. Set Images to Collect

In [6]:
labels = ["ThumbsUp", "ThumbsDown", "Up", "Down", "Left", "Right"]
number_imgs = 10 # increase the number of images to improve performance!

3. Folder Setup

In [7]:
# path to folder with the images to be detected
IMAGES_FILE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'collected_images')
# create directory depending on linux/windows
if not os.path.exists(IMAGES_FILE_PATH):
  if os.name == 'posix': # linux
    %mkdir -p {IMAGES_FILE_PATH}
  if os.name == 'nt': # windows
    %mkdir {IMAGES_FILE_PATH}
for label in labels:
  path = os.path.join(IMAGES_FILE_PATH, label)
  if not os.path.exists(path):
    !mkdir {path}

4. Capture Images

In [12]:
for label in labels:
  # connect to device webcam
  webcam = cv2.VideoCapture(0)
  print('Collecting images for {}'.format(label))
  time.sleep(5)
  for image in range(number_imgs):
    print('Collecting image {}'.format(image))
    # capture image and save in specified path
    ret, frame = webcam.read()
    imgname = os.path.join(IMAGES_FILE_PATH,label,label+'.'+'{}.jpg'.format(str(uuid.uuid1())))
    # write down to screen and show
    cv2.imwrite(imgname, frame) 
    cv2.imshow('frame', frame)
    time.sleep(2)
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break
webcam.release()
cv2.destroyAllWindows()

Collecting images for thumbs_up


KeyboardInterrupt: 

5. Label Images

In [9]:
import os

LABEL_PATH = os.path.join('Tensorflow', 'labelimg')
# utilize LabelImg, a popular image annotation tool created by Tzutalin
if not os.path.exists(LABEL_PATH):
    !mkdir {LABEL_PATH}
    !git clone https://github.com/tzutalin/labelImg {LABEL_PATH}
if os.name == 'posix': # linux
    !cd {LABEL_PATH} && make qt5py3
if os.name =='nt': # windows
    !cd {LABEL_PATH} && pyrcc5 -o libs/resources.py resources.qrc
!cd {LABEL_PATH} && python labelImg.py
    

pyrcc5 -o libs/resources.py resources.qrc
2024-03-10 15:15:14.049 python[13036:22078630] TSM AdjustCapsLockLEDForKeyTransitionHandling - _ISSetPhysicalKeyboardCapsLockLED Inhibit
Image:/Users/bks/Desktop/collected_images/ThumbsUp/Photo on 2024-03-10 at 6.15 AM #2.jpg -> Annotation:/Users/bks/Desktop/collected_images/ThumbsUp/Photo on 2024-03-10 at 6.15 AM #2.xml
Image:/Users/bks/Desktop/collected_images/ThumbsUp/Photo on 2024-03-10 at 6.15 AM.jpg -> Annotation:/Users/bks/Desktop/collected_images/ThumbsUp/Photo on 2024-03-10 at 6.15 AM.xml
Image:/Users/bks/Desktop/collected_images/ThumbsUp/Photo on 2024-03-10 at 6.16 AM #2.jpg -> Annotation:/Users/bks/Desktop/collected_images/ThumbsUp/Photo on 2024-03-10 at 6.16 AM #2.xml
Image:/Users/bks/Desktop/collected_images/ThumbsUp/Photo on 2024-03-10 at 6.16 AM #3.jpg -> Annotation:/Users/bks/Desktop/collected_images/ThumbsUp/Photo on 2024-03-10 at 6.16 AM #3.xml
Image:/Users/bks/Desktop/collected_images/ThumbsDown/Photo on 2024-03-10 at 6.21 AM

6. Train and Test



In [30]:
"""
Use LabelImg and identify portions to be detected in each image. Save the portions in XML format. Manually copy a portion of the images captured above and their respective XML files  to the "train" folder, and others to the "test" folder. This trains a portion of the images and tests the model's accuracy by comparing the results.
"""
# Generate "train", "test", and "archive" directories
TRAIN_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'train')
TEST_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'test')
ARCHIVE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'archive.tar.gz')
if not os.path.exists(TRAIN_PATH):
  if os.name == 'posix': # linux
    !mkdir -p {TRAIN_PATH}
  if os.name == 'nt': # windows
    !mkdir {TRAIN_PATH}
if not os.path.exists(TEST_PATH):
  if os.name == 'posix': # linux
    !mkdir -p {TEST_PATH}
  if os.name == 'nt': # windows
    !mkdir {TEST_PATH}
if not os.path.exists(ARCHIVE_PATH):
  if os.name == 'posix': # linux
    !mkdir -p {ARCHIVE_PATH}
  if os.name == 'nt': # windows
    !mkdir {ARCHIVE_PATH}
