# Convert video to Image

In [1]:
import os
import cv2

In [2]:
rotate_video = True # rotate saved image 90* clockwise
frame_delta = 5 # save image for each x frame
max_sample = 100 # maximum number of image sample created per video (per class)

In [3]:
labels = []
for name in os.listdir("videos"):
    if name.endswith(".mp4") :
        video_name = name.split(".")[0]
        labels.append(video_name)

        cap = cv2.VideoCapture('videos/' + name)

        counter = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            if cap.get(cv2.CAP_PROP_POS_FRAMES) % frame_delta == 0 :
                if rotate_video :
                    frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
                cv2.imwrite("dataset/images/%s_%d.jpg" % (video_name, counter), frame)
                counter += 1

            if counter >= max_sample :
                break

        print("finish processing %d images %s..." % (counter, video_name))
        cap.release()

finish processing 100 images batman...
finish processing 100 images ironman...


# Annotate Dataset

- Download & Install `Git` :
    - https://git-scm.com/downloads

- Clone `labelImg` repository using git (***if not exist in dir & run only once***)
    - or just download repo as zip file. (~ 240MB)

In [None]:
! git clone https://github.com/tzutalin/labelImg.git

- install dependency (***run only once***)

In [4]:
! conda install pyqt=5 -y
! conda install -c anaconda lxml -y

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\yunus\Anaconda3\envs\CVDL

  added / updated specs:
    - pyqt=5


The following packages will be UPDATED:

  ca-certificates                      2020.12.8-haa95532_0 --> 2021.1.19-haa95532_0


Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done
Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\yunus\Anaconda3\envs\CVDL

  added / updated specs:
    - lxml


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    lxml-4.6.1                 |   py36h1350720_0         1.1 MB  anaconda
    openssl-1.1.1h             |       he774522_0         5.8 M

- build labelImg (***run only once***)

In [5]:
%cd labelImg

! pyrcc5 -o libs/resources.py resources.qrc

%cd ..

C:\Users\yunus\Documents\GitHub\ObjectDetection-Tensorflow\labelImg
C:\Users\yunus\Documents\GitHub\ObjectDetection-Tensorflow


- run `lableImg`

In [4]:
! python labelImg/labelImg.py

Image:C:\Users\yunus\Documents\GitHub\ObjectDetection-Tensorflow\dataset\images\batman_0.jpg -> Annotation:C:/Users/yunus/Documents/GitHub/ObjectDetection-Tensorflow/dataset/images/batman_0.xml
Image:C:\Users\yunus\Documents\GitHub\ObjectDetection-Tensorflow\dataset\images\batman_1.jpg -> Annotation:C:/Users/yunus/Documents/GitHub/ObjectDetection-Tensorflow/dataset/images/batman_1.xml


qt.qpa.fonts: Unable to open default EUDC font: "EUDC.TTE"


- open dir `images/` in this project
- set annotation type to `Yolo` <br>
![](resource/labelimg.png)
- annotate image dataset

### **[NOTE!]** 
- if you don't have a dataset for this tutorial, you can use the **Batman & Ironman dataset (yolo)** from the following link https://drive.google.com/file/d/1UASv-heGjnBB73jaT_pwhuqf7x3o-jEt/view?usp=sharing
- extract that zip file, and copy all extracted **image** & **.xml** file to `dataset/images/` folder this project.

# Split Dataset

In [1]:
split_size = 0.2 # 20% for test dataset 

In [2]:
import os
import shutil

try :
    os.mkdir("dataset/images/train")
    os.mkdir("dataset/images/test")
except :
    print("`train/` and `test/` dir already exist!")
    if input("do you want to delete `train/` and `test/` dir content? [y/n]") == 'y' :
        shutil.rmtree("dataset/images/train")
        shutil.rmtree("dataset/images/test")
        os.mkdir("dataset/images/train")
        os.mkdir("dataset/images/test")

`train/` and `test/` dir already exist!


do you want to delete `train/` and `test/` dir content? [y/n] n


In [3]:
all_filenames = []
for file in os.listdir("dataset/images"):
    if file.endswith(".jpg"):
        all_filenames.append(file.replace(".jpg", ""))

file_counter = {}
file_group = {}
for name in all_filenames:
    label = name.split("_")[0]
    try :
        file_counter[label] += 1
        file_group[label].append(name)
    except :
        file_counter[label] = 1
        file_group[label] = []
        file_group[label].append(name)
        
print(file_counter)
        
for label in file_counter:
    n_split = int(file_counter[label]*split_size)
    for i, name in enumerate(file_group[label]) :
        if i < n_split :
            shutil.move("dataset/images/" + name + ".jpg", "dataset/images/test")
            shutil.move("dataset/images/" + name + ".txt", "dataset/images/test")
        else :
            shutil.move("dataset/images/" + name + ".jpg", "dataset/images/train")
            shutil.move("dataset/images/" + name + ".txt", "dataset/images/train")        

{'batman': 100, 'ironman': 100}


# Create Label Map data
- label map structure (`.names`) -> for **Darknet** training phase (Colab):
    ```
    batman
    ironman
    xxxx
    ```
- label map structure (`.json`) -> for inference phase in Jetson Nano (**OpenCV DNN**):
    ```
    {
    "0" : "batman",
    "1" : "ironman",
    "2" : "xxxx"
    }
    ```

In [4]:
import json

# create label map pbtxt 
with open("dataset/data/object-detection.names", "w") as fw:
    for i, name in enumerate(file_counter, start=1) :
        fw.write('%s\n' % name)

# create label map json 
label_json = {}      
for i, name in enumerate(file_counter, start=0):
    label_json[str(i)] = name
    
with open("object-detection.json", 'w') as f:
    json.dump(label_json, f)

# Edit Label Id (not mandatory) 
- Use this script if **label id** is **wrong** in yolo annotation file `train/<filename>.txt` or `test/<filename>.txt`

In [11]:
import os
import json

labels = {}
with open("object-detection.json") as f:
    label_map = json.load(f)
    for key in label_map :
        labels[label_map[key]] = key

for folder in ['test', 'train']:
    for file in os.listdir("dataset/images/%s" % folder):
        if file.endswith(".txt") :
            with open("dataset/images/%s/%s" % (folder, file), 'r') as f:
                text = f.readlines() 
                f.close()
            text = [labels[file.split("_")[0]] + " " + " ".join(t.split(" ")[1:]) for t in text]
            with open("dataset/images/%s/%s" % (folder, file), 'w') as f1:
                f1.writelines(text)
                f1.close()

# Zip Dataset directory

In [17]:
import zipfile

In [18]:
def zipdir(path, ziph):
    for root, dirs, files in os.walk(path):
        for file in files:
            ziph.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.join(path, '..')))

zipf = zipfile.ZipFile('dataset.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('dataset/', zipf)
zipf.close()

print("`dataset.zip` created successfully!")

`dataset.zip` created successfully!


# Run Darknet - Yolo Training in Colab

- Open Google Colab : https://colab.research.google.com/notebooks/intro.ipynb#recent=true
- Click tab **Upload**,
- Upload `Training Darknet - Yolo using Custom Dataset.ipynb` to google colab