# 1. Import Dependencies

In [None]:
# !pip install opencv-python

In [None]:
# Import opencv
import cv2 

# Import uuid
import uuid

# Import Operating System
import os

# Import time
import time

# 2. Define Images to Collect

In [None]:
labels = ['thumbsup', 'thumbsdown', 'thankyou', 'livelong', 'hello']
number_imgs = 10

# 3. Setup Folders 

first, we should `create a path` using `os.path.join()`.

In [None]:
IMAGES_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'collectedimages')

In [None]:
print(IMAGES_PATH)

---
now we want to create our folder directory or folder path:

<br>
we can see what type of os is being used with this code:

In [None]:
os.name

because I am using `windows` machine, It's going to read out `nt` but in `linlux` machine read out `posix`.

In [None]:
if not os.path.exists(IMAGES_PATH):
    if os.name == 'posix':
        !mkdir -p {IMAGES_PATH}
    if os.name == 'nt':
         !mkdir {IMAGES_PATH}
for label in labels:
    path = os.path.join(IMAGES_PATH, label)
    if not os.path.exists(path):
        !mkdir {path}

# x. Testing the capture device position

In [None]:
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    cv2.imshow('frame', cv2.resize(src=frame, dsize=(800, 600)))
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

# 4. Capture Images

In [None]:
def capture_image():
    for label in labels:
        cap = cv2.VideoCapture(0) # Connect to our Webcam or Capture device
        print('Collecting images for {}'.format(label))
        # delay execution for 5 seconds
        time.sleep(5) 
        for imgnum in range(number_imgs):
            print('{}. Collecting image {}'.format(imgnum + 1, imgnum))
            # Grabs, decodes and returns the next video frame.
            ret, frame = cap.read()
            # creates an unique path for each image
            imgname =  os.path.join(IMAGES_PATH,label,label+'.'+'{}.jpg'.format(str(uuid.uuid1())))
            # Saves an image/frame to a specified file
            cv2.imwrite(imgname, frame) 
            # Displays an image in the specified window
            cv2.imshow('frame', cv2.resize(src=frame, dsize=(800, 600)))
            time.sleep(2)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cap.release() 
    cv2.destroyAllWindows()
    
capture_image()

# 5. Image Labelling (using labelImg library)

[this link](https://github.com/heartexlabs/labelImg) is about the installation steps that we should go it through for the different types of OS.

---

<br>
the steps below is for windows local machine

---
first we should install pyqt5 and lxml with the below code:

In [None]:
# !pip install --upgrade pyqt5 lxml

with this code, we can see that pyqt5 and lxml are installed with many other dependencies.

In [None]:
# !pip list

<br>
create a new path for label image package.

In [None]:
LABELIMG_PATH = os.path.join('Tensorflow', 'labelimg')

<br>
create a new folder path (or folder directory) for the label image package.

In [None]:
if not os.path.exists(LABELIMG_PATH):
    !mkdir {LABELIMG_PATH}
    !git clone https://github.com/heartexlabs/labelImg {LABELIMG_PATH}

the next thing we need to do is actually install label image.

In [None]:
if os.name == 'posix':
    !cd {LABELIMG_PATH} && make qt5py3
if os.name =='nt':
    !cd {LABELIMG_PATH} && pyrcc5 -o libs/resources.py resources.qrc

And finally by runnig this code, the application, labelImg, open up.

In [None]:
!cd {LABELIMG_PATH} && python labelImg.py

# 6. Move them into a Training and Testing Partition

this step is used in a local machine

In [None]:
import random
import shutil

In [None]:
def M_pairFiles(jpg:list, xml:list): # create a list of pair-file tuples 
    pairFiles = list(zip(jpg, xml)) 
    random.shuffle(pairFiles)
    return pairFiles

def moveF_test_train():
    for clss in os.listdir(path=IMAGES_PATH):
        classinfo = os.listdir(os.path.join(IMAGES_PATH, clss))

        if len(classinfo) == 0:
            break

        jpgList = []
        xmlList = []
#BI: Background Images are images that don't have any label. we add them to the train folder to reduce false detections
        BI = []

        for file in classinfo:
            T = os.path.splitext(file)
            if T[1] == '.jpg':
                jpgList.append(file)
            else:
                xmlList.append(file)

        if len(jpgList) == len(xmlList):
            pairFiles = M_pairFiles(jpg=jpgList, xml=xmlList)

        elif len(jpgList) > len(xmlList):
            for jpg in jpgList: 
                if not clss in os.path.splitext(jpg)[0]: # we filter files not included a name same to the class names  
                    BI.append(jpg)
                    jpgList.remove(jpg)
            if len(jpgList) == len(xmlList):
                pairFiles = M_pairFiles(jpg=jpgList, xml=xmlList)

        else:
            raise ValueError("the length of xmlList must not be bigger than the length of jpgList")
        
        length = len(pairFiles)
        # We allocated 75% out of the whole data to the training set, the other data, 25%, to the test set, but You
        # can change them
        trainNum = int(length // (1 / 0.75)) # 75%
        testNum = length - trainNum # 25%

        trainLi = random.choices(population=pairFiles, k=trainNum)
        while len(set(trainLi)) != trainNum:
            trainLi = random.choices(population=pairFiles, k=trainNum)
        random.seed(12345)

        for file in trainLi:
            pairFiles.remove(file)  
        testLi = pairFiles   

        X_train, y_train = zip(*trainLi) # unzip the elements of the trainLi to the X_train and y_train
        X_train = list(X_train)
        X_train.extend(BI) # finally, we add background Images to X_train 
        X_test, y_test = zip(*testLi)


        src = os.path.join(IMAGES_PATH, clss)
        dst = {'dst1' : os.path.join('Tensorflow', 'workspace', 'images', 'train'),
              'dst2' : os.path.join('Tensorflow', 'workspace', 'images', 'test')}

        for path in dst.values():    
            if not os.path.exists(path):
                if os.name == 'posix':
                    !mkdir -p {path}
                if os.name == 'nt':
                    !mkdir {path}

        for image in X_train:
            shutil.move(os.path.join(src, image), dst['dst1'])

        for annotation in y_train:
            shutil.move(os.path.join(src, annotation), dst['dst1'])

        for image in X_test:
            shutil.move(os.path.join(src, image), dst['dst2'])

        for annotation in y_test:
            shutil.move(os.path.join(src, annotation), dst['dst2'])

    if os.path.exists(IMAGES_PATH):
        shutil.rmtree(IMAGES_PATH)

try:
    moveF_test_train()
except FileNotFoundError:
    pass

# OPTIONAL - 7. Compress them for Colab Training

In [None]:
TRAIN_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'train')
TEST_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'test')
ARCHIVE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'archive.tar.gz')

create a compress file:

In [None]:
!tar -czf {ARCHIVE_PATH} {TRAIN_PATH} {TEST_PATH}