# **Initialization:**

In [None]:
!git clone https://github.com/ultralytics/yolov5 

In [None]:
%cd yolov5
!pip install -r requirements.txt 

In [None]:
# import dataset from google drive
from google.colab import drive
drive.mount('/content/gdrive')
!ln -s /content/gdrive/My\ Drive/ /mydrive
!ls /mydrive


In [None]:
!cp -rf /mydrive/last.pt ../yolov5/weights.pt

In [None]:
!unzip /mydrive/newPack.zip -d data

In [None]:
!cp /mydrive/last.pt data/last_trained.pt

In [None]:
!unzip /mydrive/extra.zip -d data/images/extra_test

# **split train and test files:**

In [None]:
import os
import glob
import random
import shutil

In [None]:
!rm -rf data/images
!rm -rf data/labels

!mkdir data/images
!mkdir data/images/test
!mkdir data/images/train

!mkdir data/labels
!mkdir data/labels/test
!mkdir data/labels/train

In [None]:
image_list = list()
label_list = list()
total_tuples = list()

total = glob.glob('data/newPack/*')
total.sort()

for i in range(0, len(total)-1, 2):
  total_tuples.append((total[i], total[i+1]))

# # validate list:
# for tpl in total_tuples:
#   if tpl[0][13: 23] != tpl[1][13: 23]:
#     print("ERROR")

random.shuffle(total_tuples)

num = 1085
for i in range(num):
  # IMG
  src = total_tuples[i][0]
  dst = 'data/images/train/'+src[13:]
  shutil.copyfile(src, dst)
  # LBL
  src = total_tuples[i][1]
  dst = 'data/labels/train/'+src[13:]
  shutil.copyfile(src, dst)

for i in range(num, 1550):
  # IMG
  src = total_tuples[i][0]
  dst = 'data/images/test/'+src[13:]
  shutil.copyfile(src, dst)
  # LBL
  src = total_tuples[i][1]
  dst = 'data/labels/test/'+src[13:]
  shutil.copyfile(src, dst)

In [None]:
# Check Distributions

alls = glob.glob('data/newPack/*.jpg')
trains = glob.glob('data/images/train/*')
tests = glob.glob('data/images/test/*')
nums = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']

cnt = 0
for all in alls:
  if all[13] in nums:
    cnt += 1
all_per = 100 * cnt / len(alls)


cnt = 0
for test in tests:
  if test[18] in nums:
    cnt += 1
test_per = 100 * cnt / len(tests)

cnt = 0
for train in trains:
  if train[19] in nums:
    cnt += 1
train_per = 100 * cnt / len(trains)

print(all_per, test_per, train_per)

58.064516129032256 57.204301075268816 58.433179723502306


# **train and test and visualization model:**

In [None]:
!rm -rf runs/train/yolo_idcard32

In [None]:
# TRAIN
!python train.py --img 640 --cfg data/yolov5s.yaml --batch 64 --epochs 100 --data idcard_data.yaml --weights yolov5s.pt --name yolo_idcard3 --cache

In [None]:
# Free Space!

In [None]:
# TEST
!python val.py --data idcard_data.yaml --weights runs/train/yolo_idcard35/weights/last.pt --name yolo_idcard3 --task val

In [None]:
# Visualize
from utils.plots import plot_results
plot_results('results.csv')

In [None]:
# DETECT

# import timeit
# start = timeit.default_timer()

!python detect.py --source 02.jpeg --weights weights_last.pt --name idcard_detect

# stop = timeit.default_timer()
# print('Time: ', stop - start)

In [None]:
!cp -r runs/detect/ /mydrive/YOLOv5Performance/new_detect

In [None]:
!mkdir /mydrive/YOLOv5Performance/200e/val

In [None]:
!rm -rf runs/detect/yolo_idcard_extra2

# **OCR:**

In [None]:
!pip uninstall imgaug

In [None]:
!pip install pytesseract

Collecting pytesseract
  Downloading pytesseract-0.3.9-py2.py3-none-any.whl (14 kB)
Collecting Pillow>=8.0.0
  Downloading Pillow-9.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.3 MB)
[K     |████████████████████████████████| 4.3 MB 5.1 MB/s 
Installing collected packages: Pillow, pytesseract
  Attempting uninstall: Pillow
    Found existing installation: Pillow 7.1.2
    Uninstalling Pillow-7.1.2:
      Successfully uninstalled Pillow-7.1.2
Successfully installed Pillow-9.1.0 pytesseract-0.3.9


In [None]:
!sudo apt-get install tesseract-ocr-fas

In [None]:
from PIL import Image
import pytesseract
import cv2 as cv
import numpy as np
from matplotlib import pyplot as plt
np.set_printoptions(threshold=np.inf)

In [None]:
IMG = Image.open('id.jpg')
# IMG.save("id2.jpg", dpi=(300,300))
# IMG = Image.open('id.jpg')
# IMG = cv.cvtColor(IMG, cv.COLOR_RGB2BGR)
# IMG = IMG.convert("0")
# IMG = cv.imread('id.jpg', cv.IMREAD_GRAYSCALE)
LANG = "fas"
CONFIG = "--psm 11 --oem 3"
print(pytesseract.image_to_string(IMG, lang=LANG))
IMG

# **Split IMG**

In [None]:
img = cv.imread('id.jpg')
main_img = img
ret1,dst = cv.threshold(img, 127, 255, cv.THRESH_BINARY)


new_dst = list()
for row in dst:
  tmp = list()
  for pixel in row:
    tmp.append(np.divide(np.sum(pixel), 3))
  new_dst.append(tmp)
dst = np.array(new_dst)
# print(dst)


cols = len(dst[0])
rows = len(dst)
print(cols, rows)

def find_free_space(dst, cols, rows):
  tmp_dst = dst.T
  i = 0
  while (np.divide(np.sum(tmp_dst[i]), rows) == 255):
    i += 1
  while i < cols:
    if (np.divide(np.sum(tmp_dst[i]), rows) == 255):
      return i
    i += 1

def split_img(img, dst):
  for i in range(10):
    split = find_free_space(dst, cols, rows)
    print(split)
    sub_img = img[:, :split]
    img = img[:, split:]
    dst = dst[:, split:]
    
    # plt.imshow(sub_img)
    # plt.show()

    name = "subs/id"+str(i)+".png"
    cv.imwrite(name, sub_img)

split_img(img, dst)






In [None]:
!rm -rf subs

In [None]:
!mkdir subs

In [None]:
%cd ..

In [None]:
!zip -r subs.zip subs