# Image preprocessing

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
# False for testing
ALL_DATASET = True
EXEC_ALL = True

In [None]:
# True to remake all images
REMAKE = False

In [None]:
from config import *
import imageio
import imgaug as ia
from imgaug import augmenters as iaa 
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
import json
from matplotlib import pyplot as plt
import numpy as np
from os import (
    environ,
    listdir
)
from os.path import (
    isfile,
    join
)
import pandas as pd
from PIL import Image
from tqdm import tqdm
from utils import (
    isCsv,
    isCustom,
    isImage,
    isTxt,
    orginalname_to_yoloname,
    withoutExt
)

In [None]:
assert(EXEC_ALL)

## $I/$ Access to Dataset

In [None]:
individuals = listdir(environ["DATASET_PATH"])
if not ALL_DATASET:
    individuals = [np.random.choice(individuals)]

print(len(individuals), " individuals")
if not ALL_DATASET:
    print(individuals)

In [None]:
files = []
for individual in individuals:
    for filename in listdir(join(environ["DATASET_PATH"], individual)):
        if (isImage(filename) or isCsv(filename) or isTxt(filename)) and not isCustom(filename):
            files += [filename]

print(len(files), " files")
if not ALL_DATASET:
    print(files)

In [None]:
csvs = []
pictures = []
txts = []

for filename in files:
    if isCsv(filename):
        csvs.append(filename)
    elif isImage(filename):
        pictures.append(filename)
    elif isTxt(filename):
        txts.append(filename)

if len(csvs) != len(individuals):
    print("Mismatch between number of individuals and number of csv")
    
if len(pictures) != len(txts):
    print("Mismatch between number of images and number of txt")

print(len(csvs), " csv")
if not ALL_DATASET:
    print(csvs)
print(len(pictures), "pictures")
if not ALL_DATASET:
    print(pictures)
print(len(txts), "txt")
if not ALL_DATASET:
    print(txts)

In [None]:
assert(EXEC_ALL)

## $II/$ Resize and Padding (Yolov5)

In [None]:
resize = iaa.Resize({"longer-side": (int)(environ["CROP_SIZE"]), "shorter-side": "keep-aspect-ratio"})
padding = iaa.PadToSquare(pad_mode="constant", pad_cval=0, position="left-bottom")

dataset_file = open("yolo_dataset.txt", 'a')

for csv in tqdm(csvs):

    individual = csv.split('_')[0]
    if individual not in individuals:
        print("Individual from csv unknown")

    path = join(environ["DATASET_PATH"], individual)
    df = pd.read_csv(join(path, csv))

    for picturename in df["filename"].unique():

        yolo_picturename = orginalname_to_yoloname(picturename)
        
        if isfile(join(path, yolo_picturename)) and not REMAKE:
            continue

        pic = imageio.imread(join(path, picturename))

        bbs = []

        for idx, data in df.loc[df["filename"]==picturename].iterrows():

            shape = json.loads(data["region_shape_attributes"])
            x = shape['x']
            y = shape['y']
            width = shape["width"]
            height = shape["height"]

            bbs.append(BoundingBox(x, y, x + width, y + height))

        bbs = BoundingBoxesOnImage(bbs, shape=pic.shape)

        pic, bbs = resize(image=pic, bounding_boxes=bbs)
        pic, bbs = padding(image=pic, bounding_boxes=bbs)
            
        imageio.imsave(join(path, yolo_picturename), pic)

        dataset_file.write(join(individual, yolo_picturename)+"\n")
            
        annotationtxt = open(join(path, withoutExt(yolo_picturename) + ".txt"), 'w')

        for bb in bbs:
            coords = "0 "
            coords += str(((bb.x1 + bb.x2)/2)/pic.shape[1]) + ' '
            coords += str(((bb.y1 + bb.y2)/2)/pic.shape[0]) + ' '
            coords += str((bb.x2-bb.x1)/pic.shape[1]) + ' '
            coords += str((bb.y2-bb.y1)/pic.shape[0]) + "\n"
            annotationtxt.write(coords)
        
        annotationtxt.close()
        
dataset_file.close()

In [None]:
assert(EXEC_ALL)

## $III/$ Crop (ReID)

In [None]:
resize = iaa.Resize({"longer-side": (int)(environ["CROP_SIZE"]), "shorter-side": "keep-aspect-ratio"})
padding = iaa.PadToSquare(pad_mode="constant", pad_cval=0, position="center")

for csv in tqdm(csvs):
    
    individual = csv.split('_')[0]
    if individual not in individuals:
        print("Individual from csv unknown")
        
    path = join(environ["DATASET_PATH"], individual)
    df = pd.read_csv(join(path, csv))
    
    for id, data_crop in df.iterrows():
        
        crop_path = join(path, data_crop["filename"] + ".crop.jpg")
        resize_path = join(path, data_crop["filename"] + ".crop.resize.jpg")
        
        if isfile(resize_path) and isfile(crop_path) and not REMAKE:
            continue
        
        img = Image.open(join(path, data_crop["filename"]))
        
        shape = json.loads(data_crop["region_shape_attributes"])
        
        x1 = shape["x"]
        y1 = shape["y"]
        x2 = x1 + shape["width"]
        y2 = y1 + shape["height"]
        
        img_crop = img.crop((x1,y1,x2,y2))
        
        img_crop = np.array(img_crop)
        
        plt.imsave(crop_path, img_crop)
        
        img_resize = padding(image=resize(image=img_crop))
        
        plt.imsave(resize_path, img_resize)
        


In [None]:
assert(EXEC_ALL)

## $IV/$ Data Augmentation

In [None]:
PATH_TEST_IMAGE = environ["DATASET_PATH"] + "\BF001\BF001_2021 ©P.Louisy 200608 DSC_0716.JPG.crop.resize.jpg"

In [None]:
nb_img = 9
i, j = 1, 1
while i*j < nb_img:
    if i==j:
        j += 1
    else:
        i +=1

### $0.$ Original Image

In [None]:
pic = imageio.imread(PATH_TEST_IMAGE)
resize = iaa.Resize({"longer-side": (int)(environ["CROP_SIZE"]), "shorter-side": "keep-aspect-ratio"})
pic = resize(image=pic)
plt.figure(num=0, figsize=(10,10))
plt.imshow(pic)
plt.show()

### $1.$ Particules (Salt)

In [None]:
salt = iaa.Salt((0.001, 0.05))

plt.figure(num=1, figsize=(i*10,j*10))
plt.subplot(j, i, 1)
plt.imshow(pic)
plt.title('original')

for id in range(2, nb_img+1):
    plt.subplot(j, i, id)
    plt.imshow(salt(image=pic))
    
plt.show()

### $2.$ Rotations (Rotate)

In [None]:
rotate_free = iaa.Rotate((-180,180))

plt.figure(num=21, figsize=(i*10,j*10))
plt.subplot(j, i, 1)
plt.imshow(pic)
plt.title('original')

for id in range(2, nb_img+1):
    plt.subplot(j, i, id)
    plt.imshow(rotate_free(image=pic))
    
plt.show()

### $3.$ Blur

In [None]:
blur_a = iaa.AverageBlur(k=(2,3))


plt.figure(num=32, figsize=(i*10,j*10))
plt.subplot(j, i, 1)
plt.imshow(pic)
plt.title('original')

for id in range(2, nb_img+1):
    plt.subplot(j, i, id)
    plt.imshow(blur_a(image=pic))
    
plt.show()

### $4.$ Coloration (ChangeColorTemperature)

In [None]:
import numpy as np

class MyParameter(ia.parameters.StochasticParameter):
    
    def __init__(self, lb, ub, mid):
        self.lb = lb
        self.ub = ub
        self.mid = mid

    def _draw_samples(self, size, random_state):
        samples = []
        if np.random.random() < 0.5:
            samples.append(np.random.uniform(self.lb, self.mid))
        else:
            samples.append(np.random.uniform(self.mid, self.ub))
        return samples

In [None]:
color = iaa.ChangeColorTemperature(MyParameter(4000, 20000, 6600))

plt.figure(num=5, figsize=(i*10,j*10))
plt.subplot(j, i, 1)
plt.imshow(pic)
plt.title('original')

for id in range(2, nb_img+1):
    plt.subplot(j, i, id)
    plt.imshow(color(image=pic))
    
plt.show()

### $5.$ Luminosité

In [None]:
bright = iaa.WithBrightnessChannels(iaa.Add((-30, 30)))

plt.figure(num=5, figsize=(i*10,j*10))
plt.subplot(j, i, 1)
plt.imshow(pic)
plt.title('original')

for id in range(2, nb_img+1):
    plt.subplot(j, i, id)
    plt.imshow(bright(image=pic))
    
plt.show()