# Custom setup


In [None]:
import os
import pandas as pd
import numpy as np

base_path = "../datasets/ears/images"
filenames = []
for infile in os.listdir(base_path):
    filenames.append("./images/" + infile)

filenames_df = pd.DataFrame(filenames)
filenames_shuffled_df = filenames_df.sample(frac=1, random_state=42) \
                                    .reset_index(drop=True)

train_df, validate_df, test_df = np.split(filenames_shuffled_df, [int(.6*len(filenames_shuffled_df)), int(.8*len(filenames_shuffled_df))])

train_df.to_csv('../datasets/ears/train_ears.txt', header=None, index=None, sep=' ', mode='a')
validate_df.to_csv('../datasets/ears/val_ears.txt', header=None, index=None, sep=' ', mode='a')
test_df.to_csv('../datasets/ears/test_ears.txt', header=None, index=None, sep=' ', mode='a')

In [None]:
# Move the sets in a different directory
import shutil
import os 

#Move images
os.mkdir("../datasets/ears/images/train") 
os.mkdir("../datasets/ears/images/val") 
os.mkdir("../datasets/ears/images/test") 

with open('../datasets/ears/train_ears.txt') as f:
    for line in f:
        fileName = os.path.join("../datasets/ears/", line.replace("\n", "")[2:])
        destination = '../datasets/ears/images/train'
        shutil.move(fileName, destination)
        print("Moved: " + fileName + " to: " + destination)

with open('../datasets/ears/val_ears.txt') as f:
    for line in f:
        fileName = os.path.join("../datasets/ears/", line.replace("\n", "")[2:])
        destination = '../datasets/ears/images/val'
        shutil.move(fileName, destination)
        print("Moved: " + fileName + " to: " + destination)

with open('../datasets/ears/test_ears.txt') as f:
    for line in f:
        fileName = os.path.join("../datasets/ears/", line.replace("\n", "")[2:])
        destination = '../datasets/ears/images/test'
        shutil.move(fileName, destination)
        print("Moved: " + fileName + " to: " + destination)
        
#Move labels
os.mkdir("../datasets/ears/labels/train") 
os.mkdir("../datasets/ears/labels/val") 
os.mkdir("../datasets/ears/labels/test") 

with open('../datasets/ears/train_ears.txt') as f:
    for line in f:
        fileName = os.path.join("../datasets/ears/", line.replace("\n", "").replace("jpg", "txt").replace("images","labels")[2:])
        destination = '../datasets/ears/labels/train'
        shutil.move(fileName, destination)
        print("Moved: " + fileName + " to: " + destination)

with open('../datasets/ears/val_ears.txt') as f:
    for line in f:
        fileName = os.path.join("../datasets/ears/", line.replace("\n", "").replace("jpg", "txt").replace("images","labels")[2:])
        destination = '../datasets/ears/labels/val'
        shutil.move(fileName, destination)
        print("Moved: " + fileName + " to: " + destination)

with open('../datasets/ears/test_ears.txt') as f:
    for line in f:
        fileName = os.path.join("../datasets/ears/", line.replace("\n", "").replace("jpg", "txt").replace("images","labels")[2:])
        destination = '../datasets/ears/labels/test'
        shutil.move(fileName, destination)
        print("Moved: " + fileName + " to: " + destination)

In [None]:
# Update the txts
search_text = "/images/"
data = ""

with open('../datasets/ears/train_ears.txt') as f:
    data = f.read()
    data = data.replace(search_text, search_text + "train/")
    
with open('../datasets/ears/train_ears.txt', 'w') as f:
    # Writing the replaced data in our
    # text file
    f.write(data)

with open('../datasets/ears/val_ears.txt') as f:
    data = f.read()
    data = data.replace(search_text, search_text + "val/")
    
with open('../datasets/ears/val_ears.txt', 'w') as f:
    # Writing the replaced data in our
    # text file
    f.write(data)
    
with open('../datasets/ears/test_ears.txt') as f:
    data = f.read()
    data = data.replace(search_text, search_text + "test/")
    
with open('../datasets/ears/test_ears.txt', 'w') as f:
    # Writing the replaced data in our
    # text file
    f.write(data)
                        

# Setup

Clone repo, install dependencies and check PyTorch and GPU.

In [None]:
#!git clone https://github.com/ultralytics/yolov5  # clone
#%cd yolov5
#%pip install -qr requirements.txt  # install

import torch
import utils
display = utils.notebook_init()  # checks

In [None]:
torch.zeros(1).cuda()

In [None]:
import torch
from IPython.display import Image  # for displaying images

print('torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images
display.Image(filename='runs/detect/exp/zidane.jpg', width=600)

In [None]:
# Weights & Biases  (optional)
import wandb
wandb.login(key="KEY_WANDB_SECRET")

# Training

In [None]:
# Nano training
!python train.py --img 640 --batch-size 16 --epochs 30 --data ears.yaml --weights yolov5n.pt --device 0

import gc
gc.collect()

In [None]:
# Run inference on a test model
!python detect.py --weights runs/train/exp18/weights/best.pt --img 640 --conf 0.1 --source ../datasets/ears/images/test

In [None]:
#display inference on ALL test images

import glob
from IPython.display import Image, display

i = 0
for imageName in glob.glob('runs/detect/exp7/*.jpg'): #assuming JPG
    display(Image(filename=imageName))
    print("\n")
    i+=1
    if i == 10:
        break

In [76]:
#Export to ONNX format
!python export.py --data data/ears.yaml --weights runs/train/exp33/weights/best.pt --img 640 --batch 1 --device 0 --include onnx 

[34m[1mexport: [0mdata=data/ears.yaml, weights=['runs/train/exp33/weights/best.pt'], imgsz=[640], batch_size=1, device=0, half=False, inplace=False, train=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=12, verbose=False, workspace=4, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, include=['onnx']
YOLOv5  2022-1-28 torch 1.10.2+cu113 CUDA:0 (NVIDIA GeForce GTX 1070, 8192MiB)

Fusing layers... 
Model Summary: 213 layers, 1761871 parameters, 0 gradients, 4.2 GFLOPs

[34m[1mPyTorch:[0m starting from runs\train\exp33\weights\best.pt (3.8 MB)

[34m[1mONNX:[0m starting export with onnx 1.10.2...
  if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
[34m[1mONNX:[0m export success, saved as runs\train\exp33\weights\best.onnx (7.5 MB)

Export complete (6.67s)
Results saved to [1mF:\face_recognition\yolov5\runs\train\exp33\weights[0m
Visualize with https://netron.app
Detect with `python detec

In [75]:
#Export to Tflite format
!python export.py --data data/ears.yaml --weights runs/train/exp33/weights/best.pt --img 640 --batch 1 --device cpu --include tflite

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(1, 640, 640, 3)]   0           []                               
                                                                                                  
 tf_conv (TFConv)               (1, 320, 320, 16)    1744        ['input_1[0][0]']                
                                                                                                  
 tf_conv_1 (TFConv)             (1, 160, 160, 32)    4640        ['tf_conv[0][0]']                
                                                                                                  
 tfc3 (TFC3)                    (1, 160, 160, 32)    4704        ['tf_conv_1[0][0]']              
                                                                                              

[34m[1mexport: [0mdata=data/ears.yaml, weights=['runs/train/exp33/weights/best.pt'], imgsz=[640], batch_size=1, device=cpu, half=False, inplace=False, train=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=12, verbose=False, workspace=4, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, include=['tflite']
YOLOv5  2022-1-28 torch 1.10.2+cu113 CPU

Fusing layers... 
Model Summary: 213 layers, 1761871 parameters, 0 gradients, 4.2 GFLOPs

[34m[1mPyTorch:[0m starting from runs\train\exp33\weights\best.pt (3.8 MB)

[34m[1mTensorFlow SavedModel:[0m starting export with tensorflow 2.7.0...

                 from  n    params  module                                  arguments                     
2022-02-02 20:37:24.705256: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-02-02 20:37:24.712279: I tensorflow/stream_executor/cuda

In [71]:
# Run inference on a test model
!python detect.py --data data/ears.yaml --weights runs/train/exp33/weights/best.pt --img 640 --conf 0.1 --source ../datasets/ears/images/test_various --device 0

[34m[1mdetect: [0mweights=['runs/train/exp33/weights/best.pt'], source=../datasets/ears/images/test_various, data=data/ears.yaml, imgsz=[640, 640], conf_thres=0.1, iou_thres=0.45, max_det=1000, device=0, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs\detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5  2022-1-28 torch 1.10.2+cu113 CUDA:0 (NVIDIA GeForce GTX 1070, 8192MiB)

Fusing layers... 
Model Summary: 213 layers, 1761871 parameters, 0 gradients, 4.2 GFLOPs
image 1/14 F:\face_recognition\datasets\ears\images\test_various\002_left_e1ar.jpg: 640x608 1 earRight, Done. (0.031s)
image 2/14 F:\face_recognition\datasets\ears\images\test_various\002_left_ear.jpg: 640x480 Done. (0.031s)
image 3/14 F:\face_recognition\datasets\ears\images\test_various\005222_up_ear.jpg: 640x480 1 earRight, Done. (0.016

## Create augmented dataset with random crops

In [None]:
from matplotlib import pyplot as plt
import numpy as np
import cv2

def format_yolov5(frame):
    col, row, _ = frame.shape
    _max = max(col, row)
    result = np.zeros((_max, _max, 3), np.uint8)
    result[0:col, 0:row] = frame
    return result

def predict_yolov5(net, input_image, orig_image):
    import random 
    blob = cv2.dnn.blobFromImage(input_image , 1/255.0, (640, 640), swapRB=True)
    net.setInput(blob)
    predictions = net.forward()

    class_ids = []
    confidences = []
    boxes = []
    crop_boxes = []
    crop_areas = []

    output_data = predictions[0]
    
    image_height = input_image.shape[0]
    image_width  = input_image.shape[1]
    
    y_factor =  float(image_height) / float(640.0)
    x_factor = float(image_width) / float(640.0)
    
    for r in range(25200):
        row = output_data[r]
        confidence = row[4]
        if confidence >= 0.4:

            classes_scores = row[5:]
            _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
            class_id = max_indx[1]
            if (classes_scores[class_id] > .25):

                confidences.append(confidence)

                class_ids.append(class_id)

                x, y, w, h = row[0].item() , row[1].item() , row[2].item(), row[3].item() 
                
                left   = int((x - 0.5 * w) * x_factor)
                top    = int((y - 0.5 * h) * y_factor)
                width  = int(w  * x_factor)
                height = int(h * y_factor)
                
                box = np.array([left, top, width, height])
                boxes.append(box)

                ## CROP THE IMAGE
                random.seed()
                randoms        = np.array([str(random.random()/2.5) for i in range(4) ])
                randoms = np.array([float(ran) for ran in randoms])
                neg_randoms    = [1-ran for ran in randoms]

                padding_left   = int( left * randoms[0] )
                padding_right  = int((image_width - (left + width)) * randoms[1])
                padding_top    = int( top * randoms[2] )
                padding_bottom = int((image_height - (top + height)) * randoms[3])
                
                crop_area = np.array([int(top-padding_top), int(top+height+padding_bottom), int(left-padding_left), int(left+width+padding_right)])
                crop_areas.append(crop_area)

                crop_box = np.array([left-int(neg_randoms[0]*left), top-int((neg_randoms[2])*top), width, height])
                crop_boxes.append(crop_box)


    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45) 

    result_class_ids   = []
    result_confidences = []
    result_boxes       = []
    result_areas       = []

    for i in indexes:
        result_confidences.append(confidences[i])
        result_class_ids.append(class_ids[i])
        result_boxes.append(crop_boxes[i])
        result_areas.append(crop_areas[i])
        
    crop_final = []

    max_conf = np.argmax(result_confidences)

    box        = result_boxes[max_conf]
    class_id   = result_class_ids[max_conf]
    crop_final = result_areas[max_conf]

    image_cropped = orig_image[crop_final[0]:crop_final[1], crop_final[2]:crop_final[3]]

        
    #cv2.rectangle(image_cropped, box, (255, 255, 0), 2)

    cropped_height, cropped_width = image_cropped.shape[:2]
    
    if cropped_height < 640 and cropped_width < 640:
        import imutils
        try:
            image_cropped = imutils.resize(image_cropped, width=640)
        except:
            print("Skipping cropping")
    #img2 = image_cropped[:,:,::-1]
    #plt.imshow(img2)
    #plt.show()
    
    try: 
        label_width  = box[2] / cropped_width
        label_heigth = box[3] / cropped_height

        label_left   = box[0] / cropped_width  + 0.5 * label_width
        label_top    = box[1] / cropped_height + 0.5 * label_heigth

        txt_content = "{} {} {} {} {}".format(class_id, label_left, label_top, label_width, label_heigth)
    except: 
        print("Null values")
        return None, ""
    return image_cropped, txt_content
    
    
def elements(array):
    return array.ndim and array.size
    
def crop_dataset(net):
    import os
    i = 0
    checkpoint = 0
    print("Training set")
    with open('../datasets/ears/train_ears.txt') as f:
        for line in f:
            fileName = os.path.join("../datasets/ears/", line.replace("\n", ""))      
            image = cv2.imread(fileName)
            input_image = format_yolov5(image) 
            image_cropped, txt_content = predict_yolov5(net, input_image, image)
            if image_cropped is not None and elements(image_cropped) > 0:
                cv2.imwrite('../datasets/ears_cropped/images/train/'+ str(i)+".jpg",image_cropped)
                with open('../datasets/ears_cropped/labels/train/' + str(i) + '.txt', 'w') as output:
                    output.write(txt_content)
                with open('../datasets/ears_cropped/train_ear.txt', 'w') as fileList:
                    fileList.writelines("./images/train/%s.jpg\n" % j for j in range(checkpoint, i))
            i+=1
            
    checkpoint = i
    print("Validation set")
    with open('../datasets/ears/val_ears.txt') as f:
        for line in f:
            fileName = os.path.join("../datasets/ears/", line.replace("\n", ""))      
            image = cv2.imread(fileName)
            input_image = format_yolov5(image) 
            image_cropped, txt_content = predict_yolov5(net, input_image, image)
            if image_cropped is not None and elements(image_cropped) > 0:
                cv2.imwrite('../datasets/ears_cropped/images/val/'+ str(i)+".jpg",image_cropped)
                with open('../datasets/ears_cropped/labels/val/' + str(i) + '.txt', 'w') as output:
                    output.write(txt_content)
                with open('../datasets/ears_cropped/val_ear.txt', 'w') as fileList:
                    fileList.writelines("./images/val/%s.jpg\n" % j for j in range(checkpoint, i))
            i+=1
    checkpoint = i    
    
    print("Test set")
    with open('../datasets/ears/test_ears.txt') as f:
        for line in f:
            fileName = os.path.join("../datasets/ears/", line.replace("\n", ""))      
            image = cv2.imread(fileName)
            input_image = format_yolov5(image) 
            image_cropped, txt_content = predict_yolov5(net, input_image, image)
            if image_cropped is not None and elements(image_cropped) > 0:
                cv2.imwrite('../datasets/ears_cropped/images/test/'+ str(i)+".jpg",image_cropped)
                with open('../datasets/ears_cropped/labels/test/' + str(i) + '.txt', 'w') as output:
                    output.write(txt_content)
                with open('../datasets/ears_cropped/test_ear.txt', 'w') as fileList:
                    fileList.writelines("./images/test/%s.jpg\n" % j for j in range(checkpoint, i))
            i+=1
            
    print("Done")

In [None]:
# Load the model on opencv
import cv2
import os
net = cv2.dnn.readNet('runs/train/exp31/weights/best.onnx')

os.mkdir("../datasets/ears_cropped/")
os.mkdir("../datasets/ears_cropped/images")
os.mkdir("../datasets/ears_cropped/labels") 

os.mkdir("../datasets/ears_cropped/images/train") 
os.mkdir("../datasets/ears_cropped/images/val") 
os.mkdir("../datasets/ears_cropped/images/test") 

os.mkdir("../datasets/ears_cropped/labels/train") 
os.mkdir("../datasets/ears_cropped/labels/val") 
os.mkdir("../datasets/ears_cropped/labels/test") 

crop_dataset(net)

## Finetune model

In [None]:
# Keep training the model with image cropped and lower learning rate
!python train.py --img 640 --batch-size 32 --epochs 30 --data ears_cropped.yaml --weights runs/train/exp31/weights/best.pt --device 0