In [10]:
import numpy as np
import os
import json
from PIL import Image
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


## Unzip Dataset

In [22]:
!unzip -P 2019Deepfashion2** /content/drive/MyDrive/ECE1508\ DL\ Project/validation.zip  -d /content

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/validation/image/007349.jpg  
  inflating: /content/validation/image/007310.jpg  
  inflating: /content/validation/image/007286.jpg  
  inflating: /content/validation/image/007274.jpg  
  inflating: /content/validation/image/007218.jpg  
  inflating: /content/validation/image/007216.jpg  
  inflating: /content/validation/image/007204.jpg  
  inflating: /content/validation/image/007133.jpg  
  inflating: /content/validation/image/007081.jpg  
  inflating: /content/validation/image/007045.jpg  
  inflating: /content/validation/image/007044.jpg  
  inflating: /content/validation/image/007017.jpg  
  inflating: /content/validation/image/006960.jpg  
  inflating: /content/validation/image/006868.jpg  
  inflating: /content/validation/image/006831.jpg  
  inflating: /content/validation/image/006773.jpg  
  inflating: /content/validation/image/006744.jpg  
  inflating: /content/validation/image/006720.jpg  

In [None]:
!ls /content/validation/image -1 -R | wc -l

shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
32154


## Dataframe of Dataset

In [None]:
with open('/content/validation/annos/000002.json') as f:
  test_anno = json.load(f)

In [None]:
test_anno.keys()

dict_keys(['item2', 'source', 'pair_id', 'item1'])

In [None]:
test_anno['item1'].keys()

dict_keys(['segmentation', 'scale', 'viewpoint', 'zoom_in', 'landmarks', 'style', 'bounding_box', 'category_id', 'occlusion', 'category_name'])

In [23]:
import pandas as pd
from multiprocessing import Pool
from tqdm import tqdm

root_dir = '/content/validation'
image_dir = os.path.join(root_dir, 'image')
annotation_dir = os.path.join(root_dir, 'annos')
image_files = os.listdir(image_dir)

def load_data(img_file):
    """
    Loads data and store into a dataframe
    """
    img_name = os.path.join(image_dir, img_file)
    annotation_name = os.path.join(annotation_dir, os.path.splitext(img_file)[0] + '.json')

    # Load annotation
    with open(annotation_name) as f:
        annotation = json.load(f)

    # Extract specific keys and their nested values
    item1 = annotation.get('item1', {})
    item2 = annotation.get('item2', {})

    item1_category_name = item1.get('category_name', None)
    item1_scale = item1.get('scale', None)
    item1_bounding_box = item1.get('bounding_box', None)
    item1_category_id = item1.get('category_id', None)

    item2_category_name = item2.get('category_name', None)
    item2_scale = item2.get('scale', None)
    item2_bounding_box = item2.get('bounding_box', None)
    item2_category_id = item2.get('category_id', None)

    return {
            'image_file': img_file,
            'item1_category_name': item1_category_name,
            'item1_scale': item1_scale,
            'item1_bounding_box': item1_bounding_box,
            'item1_category_id': item1_category_id,
            'item2_category_name': item2_category_name,
            'item2_scale': item2_scale,
            'item2_bounding_box': item2_bounding_box,
            'item2_category_id': item2_category_id}

# Create a pool of workers
num_workers = os.cpu_count()  # Use the number of CPU cores
with Pool(num_workers) as pool:
    data = list(tqdm(pool.imap(load_data, image_files), total=len(image_files)))

# Convert list of dictionaries to DataFrame
df = pd.DataFrame(data)

# Example:
print(df.head())  # Display the first few rows of the DataFrame


100%|██████████| 32153/32153 [00:17<00:00, 1809.21it/s]


   image_file item1_category_name  item1_scale  item1_bounding_box  \
0  005481.jpg  short sleeve dress            3    [0, 0, 620, 467]   
1  019259.jpg     long sleeve top            2   [94, 0, 509, 188]   
2  000376.jpg               sling            3   [92, 1, 466, 444]   
3  010603.jpg          vest dress            3    [2, 5, 290, 463]   
4  003474.jpg               skirt            3  [0, 104, 467, 564]   

   item1_category_id item2_category_name  item2_scale    item2_bounding_box  \
0                 10                None          NaN                  None   
1                  2               skirt          3.0   [81, 112, 621, 604]   
2                  6            trousers          2.0  [113, 377, 402, 623]   
3                 12                None          NaN                  None   
4                  9                None          NaN                  None   

   item2_category_id  
0                NaN  
1                9.0  
2                8.0  
3           

In [None]:
df['item1_category_name'].value_counts(normalize=True)

short sleeve top        0.298666
long sleeve top         0.137281
short sleeve dress      0.094113
vest dress              0.093117
trousers                0.086835
skirt                   0.067054
vest                    0.054521
long sleeve outwear     0.052686
long sleeve dress       0.043635
shorts                  0.037322
sling dress             0.022580
sling                   0.008864
short sleeve outwear    0.003328
Name: item1_category_name, dtype: float64

In [None]:
df['item2_category_name'].value_counts(normalize=True)

trousers                0.325697
skirt                   0.220761
shorts                  0.149314
short sleeve top        0.145411
long sleeve top         0.074323
sling dress             0.021265
vest                    0.018029
vest dress              0.017823
long sleeve outwear     0.015409
short sleeve dress      0.004623
long sleeve dress       0.003801
short sleeve outwear    0.001798
sling                   0.001746
Name: item2_category_name, dtype: float64

In [None]:
df[df['item2_category_name'].isna()].shape

(12684, 9)

## Create dictionary of classes (type of clothing present in dataset)

In [24]:
item_categories = {}
unique_categories =  df['item1_category_name'].unique()

for i in range(len(unique_categories)):
  item_categories[unique_categories[i]] = i

## Convert bbox into YOLO format

In [25]:
from sklearn import preprocessing

def create_YOLO_bbox_str(bbox, img_size):
  """
  Convert bbox (x1, y1, x2, y2) to YOLO format (x_center, y_center, w, h) and normalize it according to the image size
  """
  x_center = (bbox[0] + bbox[2])/(2*img_size)
  y_center = (bbox[1] + bbox[3])/(2*img_size)
  w = (bbox[2] - bbox[0])/img_size
  h = (bbox[3] - bbox[1])/img_size

  bbox_str = str(round(x_center, 6)) + " " + str(round(y_center, 6)) + " " + str(round(w, 6)) + " " + str(round(h, 6))

  return bbox_str

## Create custom dataset

In [26]:
class CustomDataset(Dataset):
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.image_dir = os.path.join(root_dir, 'image')
        self.annotation_dir = os.path.join(root_dir, 'annos')
        self.image_files = os.listdir(self.image_dir)
        self.image_size = 352 # Change default YOLO input from 640 to speed up training process

    def __len__(self):
        return len(self.image_files)

    def __getitemcategories__(self, idx):
        annotation_name = os.path.join(self.annotation_dir, os.path.splitext(self.image_files[idx])[0] + '.json')

        with open(annotation_name) as f:
            annotation = json.load(f)

        # Extract items and their category names
        item1 = annotation.get('item1', {})
        item2 = annotation.get('item2', {})
        item1_category_name = item1.get('category_name', None)
        item2_category_name = item2.get('category_name', None)

        return [item1_category_name, item2_category_name]

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.image_files[idx])
        annotation_name = os.path.join(self.annotation_dir, os.path.splitext(self.image_files[idx])[0] + '.json')

        # Create image
        image = Image.open(img_name).convert('RGB')

        # Resize image
        image_width, image_height = image.size
        image = image.resize((self.image_size, self.image_size))

        with open(annotation_name) as f:
            annotation = json.load(f)

        # Extract specific keys and their nested values
        item1 = annotation.get('item1', {})
        item2 = annotation.get('item2', {})

        item1_category_name = item1.get('category_name', None)
        item1_bounding_box = item1.get('bounding_box', None)
        item1_bounding_box[0] = item1_bounding_box[0] / image_width * self.image_size
        item1_bounding_box[1] = item1_bounding_box[1] / image_height * self.image_size
        item1_bounding_box[2] = item1_bounding_box[2] / image_width * self.image_size
        item1_bounding_box[3] = item1_bounding_box[3] / image_height * self.image_size

        item2_category_name = item2.get('category_name', None)
        item2_bounding_box = item2.get('bounding_box', None)
        if item2_bounding_box is not None: # In some cases there is not a second item in the image
          item2_bounding_box[0] = item2_bounding_box[0] / image_width * self.image_size
          item2_bounding_box[1] = item2_bounding_box[1] / image_height * self.image_size
          item2_bounding_box[2] = item2_bounding_box[2] / image_width * self.image_size
          item2_bounding_box[3] = item2_bounding_box[3] / image_height * self.image_size

        # Create label for image with integer representing class and bbox
        f = open(f"im{idx}.txt", "w")
        f.write(str(item_categories[item1_category_name]) + " " + create_YOLO_bbox_str(item1_bounding_box, self.image_size) + "\n")

        if item2_category_name is not None:
          f.write(str(item_categories[item2_category_name]) + " " + create_YOLO_bbox_str(item2_bounding_box, self.image_size))

        f.close()

        return image

# Create dataset
dataset = CustomDataset('/content/validation')

## Dataset Split

In [40]:
import random
import shutil

def data_splitter(dataset, new_dataset_size, train_size, val_size):
  """
  Creates directories to store images and labels for test, train and validation
  Splits data randomly into test, train and validation according to the specified size
  Tries to balance dataset manually, while keeping YOLO format intact (why train_test_split was not used)
  """

  # Paths of directories (directory tree specific to YOLO format)
  train_images_dir = '/content/drive/My Drive/YOLOv5_work/images/train'
  train_labels_dir = '/content/drive/My Drive/YOLOv5_work/labels/train'
  val_images_dir = '/content/drive/My Drive/YOLOv5_work/images/val'
  val_labels_dir = '/content/drive/My Drive/YOLOv5_work/labels/val'
  test_images_dir = '/content/drive/My Drive/YOLOv5_work/images/test'
  test_labels_dir = '/content/drive/My Drive/YOLOv5_work/labels/test'

  # Delete folders if the path exists (to overwrite current dataset)
  if os.path.exists(train_images_dir):
    shutil.rmtree(train_images_dir)
  if os.path.exists(train_labels_dir):
    shutil.rmtree(train_labels_dir)
  if os.path.exists(val_images_dir):
    shutil.rmtree(val_images_dir)
  if os.path.exists(val_labels_dir):
    shutil.rmtree(val_labels_dir)
  if os.path.exists(test_images_dir):
    shutil.rmtree(test_images_dir)
  if os.path.exists(test_labels_dir):
    shutil.rmtree(test_labels_dir)

  # Create directories
  os.makedirs(train_images_dir, exist_ok=True)
  os.makedirs(train_labels_dir, exist_ok=True)
  os.makedirs(val_images_dir, exist_ok=True)
  os.makedirs(val_labels_dir, exist_ok=True)
  os.makedirs(test_images_dir, exist_ok=True)
  os.makedirs(test_labels_dir, exist_ok=True)

  # Set of randomly selected indicies for 3x the desired dataset size
  numbers = random.sample(range(0, dataset.__len__()), new_dataset_size*3)
  new_set = numbers[:]

  # Counters for items that the model was overfit with (in previous iterations)
  skirt_count = 0
  dress_count = 0
  short_sleeve_top_count = 0
  short_sleeve_out_count = 0
  long_sleeve_top_count = 0

  # Go through the set of indicies and remove them if they go over the threshold for total number of instances (relative to desired dataset size)
  for i in numbers:
    categories = dataset.__getitemcategories__(i)
    remove_item = False

    for j in categories:
        if j == 'skirt' and skirt_count > 0.25 * new_dataset_size:
            remove_item = True
        elif j in ['long sleeve dress', 'short sleeve dress', 'vest dress', 'sling dress'] and dress_count > 0.35 * new_dataset_size:
            remove_item = True
        elif j == 'short sleeve top' and short_sleeve_top_count > 0.2 * new_dataset_size:
            remove_item = True
        elif j == 'short sleeve outwear' and short_sleeve_out_count > 0.005 * new_dataset_size:
            remove_item = True
        elif j == 'long sleeve top' and long_sleeve_top_count > 0.2 * new_dataset_size:
            remove_item = True

        if j == 'skirt':
            skirt_count += 1
        elif j in ['long sleeve dress', 'short sleeve dress', 'vest dress', 'sling dress']:
            dress_count += 1
        elif j == 'short sleeve top':
            short_sleeve_top_count+=1
        elif j == 'short sleeve outwear':
            short_sleeve_out_count += 1
        elif j == 'long sleeve top':
            long_sleeve_top_count += 1

    if remove_item:
        new_set.remove(i)

  # Update dataset to be using the more balanced dataset
  numbers = random.sample(new_set, new_dataset_size)
  train_indicies = random.sample(numbers, int(train_size*new_dataset_size)) # Randomly select training dataset (indicies)

  # Process training dataset (images + labels)
  for i in range(dataset.__len__()):
    if i in train_indicies:
      print(f"Processing train image {i}")

      image = dataset.__getitem__(i)
      image.save(train_images_dir + "/" +  f"im{i}.jpg")
      label_file = f"im{i}.txt"
      label_file_path = os.path.join(train_labels_dir, label_file)

      if os.path.exists(label_file_path):
          os.unlink(label_file_path)

      shutil.copy2(label_file, train_labels_dir)

      numbers.remove(i) # Remove current index

  # Randomly select validation dataset (indicies)
  val_indicies = random.sample(numbers, int(val_size*new_dataset_size))

  # Process validation and testing datasets (images + labels)
  for i in numbers:
    if i in val_indicies:
      print(f"Processing val image {i}")

      image = dataset.__getitem__(i)
      image.save(val_images_dir + "/" +  f"im{i}.jpg")
      label_file = f"im{i}.txt"
      label_file_path = os.path.join(val_labels_dir, label_file)

      if os.path.exists(label_file_path):
          os.unlink(label_file_path)
      shutil.copy2(label_file, val_labels_dir)
    else:
      print(f"Processing test image {i}")

      image = dataset.__getitem__(i)
      image.save(test_images_dir + "/" +  f"im{i}.jpg")
      label_file = f"im{i}.txt"
      label_file_path = os.path.join(test_labels_dir, label_file)

      if os.path.exists(label_file_path):
          os.unlink(label_file_path)
      shutil.copy2(label_file, test_labels_dir)


In [None]:
# Create dataset for training, testing and validation (images + labels stored in Drive)
data_splitter(dataset, 6000, 0.7, 0.1)

In [None]:
# Download YOLOv5 + requirements
!git clone https://github.com/ultralytics/yolov5  # clone repo
!pip install -U -r yolov5/requirements.txt  # install dependencies

## Training + Validation
Epochs = 10, Batch size = 20 <br>
Custom config.yaml file specifies file locations of training and validation data and overwrites the number of output classes to 13 (from 80)

Following general steps taken here: https://www.kaggle.com/code/mostafaibrahim17/yolov5/notebook

In [42]:
%cd /content/drive/My Drive
%cd yolov5

!python train.py --img 352 --batch 20 --epochs 10 --data /content/drive/My\ Drive/YOLOv5_yaml/yolo_config.yaml --weights yolov5s.pt

/content/drive/My Drive
/content/drive/My Drive/yolov5
2024-04-05 18:44:07.827928: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-05 18:44:07.828042: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-05 18:44:07.831555: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=/content/drive/My Drive/YOLOv5_yaml/yolo_config.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=10, batch_size=20, imgsz=352, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data/hyps, re

In [None]:
# Only run if training stops in between, training will resume from last epoch
%cd /content/drive/My\ Drive
%cd yolov5

!python train.py --resume runs/train/exp15/weights/last.pt

## Testing
Following general steps taken here: https://www.kaggle.com/code/mostafaibrahim17/yolov5/notebook

In [15]:
# Directories for test dataset and weights for model
# Directories were updated to determine test and train accuracies for different iterations/datasets
test_dir = '../YOLOv5_work/images/train'
weights_dir = './runs/train/exp2/weights/best.pt'

In [16]:
%cd /content/drive/My\ Drive
!pip install ultralytics
%cd yolov5
!python detect.py --weights $weights_dir --img 352 --conf 0.4 --save-txt --source $test_dir

/content/drive/My Drive
/content/drive/My Drive/yolov5
[34m[1mdetect: [0mweights=['./runs/train/exp2/weights/best.pt'], source=../YOLOv5_work/images/train, data=data/coco128.yaml, imgsz=[352, 352], conf_thres=0.4, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
fatal: cannot change to '/content/drive/My': No such file or directory
YOLOv5 🚀 2024-4-2 Python-3.10.12 torch-2.2.1+cu121 CPU

Fusing layers... 
Model summary: 157 layers, 7045186 parameters, 0 gradients, 15.9 GFLOPs
image 1/4200 /content/drive/My Drive/YOLOv5_work/images/train/im0.jpg: 352x352 (no detections), 138.6ms
image 2/4200 /content/drive/My Drive/YOLOv5_work/images/train/im1000.jpg: 352x352 1 short sleeve outwear, 1 lon

In [7]:
def yolo2voc(image_height, image_width, bboxes):
    """
    Convert YOLO bbox to voc format to determine IoU
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    """

    bboxes[0], bboxes[2] = bboxes[0]*image_width, bboxes[2]*image_width
    bboxes[1], bboxes[3] = bboxes[1]*image_height, bboxes[3]*image_height

    bboxes[0] = bboxes[0] - bboxes[2]/2
    bboxes[1] = bboxes[1] - bboxes[3]/2
    bboxes[2] = bboxes[0] + bboxes[2]
    bboxes[3] = bboxes[1] + bboxes[3]

    return bboxes

In [8]:
def get_iou(bb1, bb2):
    """
    Determines IoU between 2 bounding boxes
    Derived from: https://github.com/1297rohit/RCNN/blob/master/RCNN.ipynb
    """

    x_left = max(bb1[0], bb2[0])
    y_top = max(bb1[1], bb2[1])
    x_right = min(bb1[2], bb2[2])
    y_bottom = min(bb1[3], bb2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    bb1_area = (bb1[2] - bb1[0]) * (bb1[3] - bb1[1])
    bb2_area = (bb2[2] - bb2[0]) * (bb2[3] - bb2[1])

    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)

    assert iou >= 0.0
    assert iou <= 1.0

    return iou

In [18]:
from glob import glob

# Directories were updated to determine test and train accuracies for different iterations/datasets
test_labels_dir = '/content/drive/My Drive/YOLOv5_work/labels/train'
pred_labels_dir = '/content/drive/My Drive/yolov5/runs/detect/exp8/labels/*txt'

def get_accuracy_and_iou(test_labels_dir, pred_labels_dir):
  """
  Goes through test dataset (ground truth) and predictions to determine classification accuracy and average IoU
  """

  # Initialize counters + iou
  correct_predict = 0
  total_predict = 0
  iou = 0

  # Iterate through each data point that had a prediction
  for file_path in tqdm(glob(pred_labels_dir)):
      image_id = file_path.split('/')[-1].split('.')[0] # Extract image id
      ground_truth_path = os.path.join(test_labels_dir, image_id + '.txt')

      # Get predicted classes and bbox for current data point
      pred_class = []
      pred_bbox = []

      f = open(file_path, 'r')
      for line in f:
        values = line.strip().split()
        pred_class.append(values[0])
        bbox = yolo2voc(352, 352, [float(j) for j in values[1:]])
        pred_bbox.append(bbox)

      # Get predicted classes and bbox for current data point
      true_class = []
      true_bbox = []

      f = open(ground_truth_path, 'r')
      for line in f:
        values = line.strip().split()
        true_class.append(values[0])
        bbox = yolo2voc(352, 352, [float(j) for j in values[1:]])
        true_bbox.append(bbox)

      # Determine if there was a correct prediction(s) and the corresponding IoU
      for i in range(len(true_class)):
        if true_class[i] in pred_class:
          correct_predict+=1
          iou += get_iou(true_bbox[i], pred_bbox[pred_class.index(true_class[i])])

      for i in pred_class:
        total_predict+=1

  accuracy = correct_predict/total_predict
  avg_iou = iou/correct_predict

  return accuracy, avg_iou

print(get_accuracy_and_iou(test_labels_dir, pred_labels_dir))


100%|██████████| 3318/3318 [27:36<00:00,  2.00it/s]

(0.7922548645555132, 0.8661244699856329)



