# 1. Import Dependencies

In [2]:
# !pip install opencv-python

In [1]:
# Import opencv
import cv2 

# Import uuid
import uuid

# Import Operating System
import os

# Import time
import time

# for handling files and paths
from pathlib import Path

# 2. Define Images to Collect

In [2]:
labels = ['resistor', 'current_src', 'voltage_src', 'battery', 'capacitor', 'inductor']

# 3. Setup Folders

In [3]:
IMAGES_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'collectedimages')
REQ_FILES = ['test', 'train']

In [6]:
if not os.path.exists(IMAGES_PATH):
    if os.name == 'posix':
        !mkdir -p {IMAGES_PATH}
    if os.name == 'nt':
         !mkdir {IMAGES_PATH}

# 4. Image Renaming

In [None]:
# for filename in os.listdir(IMAGES_PATH):
#     src_path = os.path.join(IMAGES_PATH, filename)
#     dst_path = os.path.join(IMAGES_PATH,'{}.jpg'.format(str(uuid.uuid1())))
#     os.rename(src_path, dst_path)

# 5. Image Labeling

In [None]:
# !pip install --upgrade pyqt5 lxml

In [None]:
LABELIMG_PATH = os.path.join('Tensorflow', 'labelimg')

In [None]:
if not os.path.exists(LABELIMG_PATH):
    !mkdir {LABELIMG_PATH}
    !git clone https://github.com/tzutalin/labelImg {LABELIMG_PATH}

In [None]:
if os.name == 'posix':
    !make qt5py3
if os.name =='nt':
    !cd {LABELIMG_PATH} && pyrcc5 -o libs/resources.py resources.qrc

In [None]:
!cd {LABELIMG_PATH} && python labelImg.py

# 6. Move them into a Training and Testing Partition

In [None]:
# Enter in this format: 80, 20 
train_split, test_split = 80, 20

In [None]:
if not os.path.exists(IMAGES_PATH):
    if os.name == 'posix':
        !mkdir -p {IMAGES_PATH}
    if os.name == 'nt':
         !mkdir {IMAGES_PATH}
            
TT_PATH = str(Path(IMAGES_PATH).parents[0])
for req_files in REQ_FILES:
    path = os.path.join(TT_PATH, req_files)
    if not os.path.exists(path):
        !mkdir {path}

In [None]:
dir_list = os.listdir(IMAGES_PATH)
label_len = len(labels)
count, prev_item = 0, 0
train_split_val = int(len(dir_list) * 0.01 * train_split / label_len)
test_split_val = int(len(dir_list) * 0.01 * test_split / label_len)

for index, items in enumerate(dir_list):
    if count < train_split_val:
        
        # moving files
        source = IMAGES_PATH
        destination = os.path.join(TT_PATH, 'train')
        src_path = os.path.join(source, items)
        dst_path = os.path.join(destination, items)
        os.rename(src_path, dst_path)
        count += 1
        
    else:
        # moving files
        source = IMAGES_PATH
        destination = os.path.join(TT_PATH, 'test')
        src_path = os.path.join(source, items)
        dst_path = os.path.join(destination, items)
        os.rename(src_path, dst_path)

        # count reset
        test_complement = len(dir_list) / label_len - count
        count += 1
        current_item = items.split('.')[:-1]
        if prev_item == current_item and test_complement <= 1:
            count = 0
        prev_item = current_item

# OPTIONAL - 7. Compress them for Colab Training

In [4]:
TRAIN_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'train')
TEST_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'test')
ARCHIVE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'archive.tar.gz')

In [5]:
!tar -czf {ARCHIVE_PATH} {TRAIN_PATH} {TEST_PATH}