# **Crop Logos**
Đầu tiên, từ tập ảnh download được ở bước trước, nhóm tiến hành lọc ra các hình ảnh không liên quan và đánh nhãn cho các hình ảnh còn lại. Ở bước này nhóm sẽ cắt các ảnh logo từ tập ảnh có label.

In [0]:
# Import library
from skimage import io
from skimage import util
import os

In [0]:
# Define util functions

def read_all_file_in_dir(dir_name):
  files = []
  for path, dir_names, file_names in os.walk(dir_name):
    files += file_names
    break
  return files

def read_all_dir_in_dir(dir_name):
  dirs = []
  for path, dir_names, file_names in os.walk(dir_name):
    dirs += dir_names
    break
  return dirs

def read_labels(label_dir, img_name):
  labels = []
  f = open(os.path.join(label_dir, img_name + '.txt'))
  for line in f:
    label = [float(x) for x in line.split()]
    labels.append(label)
  f.close()
  return labels

def crop_logo_from_img(img, labels, dir_name, index=0, ext='png'):
  shape = img.shape
  for label in labels:
    x = label[1] * shape[1]
    y = label[2] * shape[0]
    width = label[3] * shape[1]
    height = label[4] * shape[0]
    logo = img[int(y-height / 2):int(y+height/2), int(x-width/2):int(x+width/2), :]
    io.imsave(os.path.join(dir_name, f'logo_{index}.{ext}'), logo)
    index += 1
  return index

In [0]:
# Dataset directory
img_dir = './dataset/images'
label_dir = './dataset/labels'
logo_dir = './dataset/logos'

In [0]:
# Create folder to store logos
if not os.path.exists(logo_dir):
  os.mkdir(logo_dir)

In [0]:
# Read child folders in image folder
child_dir_names = read_all_dir_in_dir(img_dir)

# Read all image in each child folder
index = 0
for dir_name in child_dir_names:
  child_image_dir = os.path.join(img_dir, dir_name)
  child_label_dir = os.path.join(label_dir, dir_name)
  child_logo_dir = os.path.join(logo_dir, dir_name)

  if not os.path.exists(child_logo_dir):
    os.mkdir(child_logo_dir)

  file_names = read_all_file_in_dir(child_image_dir)
  
  for file_name in file_names:
    ext = file_name.split('.')[-1]
    img = io.imread(os.path.join(child_image_dir, file_name))
    labels = read_labels(child_label_dir, file_name.split('.')[0])
    index = crop_logo_from_img(img, labels=labels, dir_name=os.path.join(logo_dir, dir_name), index=index, ext = ext)

# **Data Augmentation**

Ở bước này, nhóm sẽ thực hiện một số phép biến đổi trên các ảnh logo có được từ bước trên để tăng cường thêm dữ liệu.

In [0]:
# Import library

from skimage import filters
from skimage import transform
import numpy as np
import random

In [0]:
def augment_image(img, dir_name, index=0, ext='png'):
  blured_img = filters.gaussian(img, random.uniform(1, 2), multichannel=True)
  noised_img = util.random_noise(img, mode='gaussian', var=random.uniform(0.005, 0.05))
  horizontal_resized_img = transform.resize(img, (img.shape[0], int(img.shape[1] * random.uniform(1.2, 2.5))))
  vertical_resized_img = transform.resize(img, (int(img.shape[0] * random.uniform(1.2, 2.5)), img.shape[1]))
  io.imsave(os.path.join(dir_name, f'logo_{index}.{ext}'), blured_img, check_contrast=False)
  io.imsave(os.path.join(dir_name, f'logo_{index+1}.{ext}'), noised_img, check_contrast=False)
  io.imsave(os.path.join(dir_name, f'logo_{index+2}.{ext}'), horizontal_resized_img, check_contrast=False)
  io.imsave(os.path.join(dir_name, f'logo_{index+3}.{ext}'), vertical_resized_img, check_contrast=False)
  return index + 4

In [0]:
# Read child folders in image folder
child_dir_names = read_all_dir_in_dir(logo_dir)

# Read all image in each child folder
for dir_name in child_dir_names:
  child_logo_dir = os.path.join(logo_dir, dir_name)

  file_names = read_all_file_in_dir(child_logo_dir)
  
  for file_name in file_names:
    ext = file_name.split('.')[-1]
    img = io.imread(os.path.join(child_logo_dir, file_name))
    index = augment_image(img, child_logo_dir, index=index, ext=ext)