# Setup Environment

Import required packages:

In [None]:
import os, cv2, ntpath
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set Seaborn style for charts
sns.set()

Global variables and settings:

In [None]:
path_dir_project = "/thecube/students/jravagli"

# Paths preprocessed datasets
path_dir_pp_datasets = os.path.join(path_dir_project, "datasets/used-pp-rebalanced")
path_pp_train_dataset = os.path.join(path_dir_pp_datasets, "train")
path_pp_valid_dataset = os.path.join(path_dir_pp_datasets, "valid")
path_pp_test_dataset = os.path.join(path_dir_pp_datasets, "test")
# Path preprocessed annotation files
path_pp_train_ann = os.path.join(path_dir_pp_datasets, "train.txt")
path_pp_valid_ann = os.path.join(path_dir_pp_datasets, "valid.txt")
path_pp_test_ann = os.path.join(path_dir_pp_datasets, "test.txt")

# Class Distributions

Function to load the images names and class from the txt files of the annotations:

In [None]:
def read_annotation_file(path_file):
    with open(path_file) as f:
        list_lines = f.readlines()
    
    list_images_names = []
    list_images_classes = []
    list_images_clothes = []
    for line in list_lines:
        splits = line.split(",")
        list_images_names.append(splits[0].strip())
        list_images_classes.append(splits[1].strip())
        list_images_clothes.append(splits[2].strip())
    
    return list_images_names, list_images_classes, list_images_clothes

Class-label mapping:

In [None]:
class_names = ["concert", "graduation", "meeting", "mountain-trip", "picnic",
               "sea-holiday", "ski-holiday", "wedding", "conference", "exhibition", "fashion",
               "protest", "sport", "theater-dance"]

Train set:

In [None]:
list_images_names, list_images_classes, list_images_clothes = read_annotation_file(path_pp_train_ann)

print(f"Train images: {len(list_images_names)}")

values, counts = np.unique(np.array(list_images_classes, dtype=np.int32),
                           return_counts=True)

y = np.arange(len(values))

plt.figure(figsize=(17,8))
plt.bar(y, counts, align="center", color=sns.color_palette("muted"))
plt.xticks(y, class_names)
plt.ylabel("Images")
plt.title("Train classes distribution")

plt.show()

Validation set:

In [None]:
list_images_names, list_images_classes, list_images_clothes = read_annotation_file(path_pp_valid_ann)

print(f"Validation images: {len(list_images_names)}")

values, counts = np.unique(np.array(list_images_classes, dtype=np.int32),
                           return_counts=True)

y = np.arange(len(values))

plt.figure(figsize=(16,8))
plt.bar(y, counts, align="center", color=sns.color_palette("muted"))
plt.xticks(y, class_names)
plt.ylabel("Images")
plt.title("Validation classes distribution")

plt.show()

Test set:

In [None]:
list_images_names, list_images_classes, list_images_clothes = read_annotation_file(path_pp_test_ann)

print(f"Test images: {len(list_images_names)}")

values, counts = np.unique(np.array(list_images_classes, dtype=np.int32),
                           return_counts=True)
print(counts)
y = np.arange(len(values))

plt.figure(figsize=(16,8))
plt.bar(y, counts, align="center", color=sns.color_palette("muted"))
plt.xticks(y, class_names)
plt.ylabel("Images")
plt.title("Test classes distribution")

plt.show()

# Clothes Distributions

Class-label mapping:

In [None]:
clothes_names = ["ss top", "ls top", "ss outwear", "ls outwear", "vest", "sling", "shorts", "trousers",
                 "skirt", "ss dress", "ls dress", "vest dress", "sling dress"] 

Train set:

In [None]:
list_images_names, list_images_classes, list_images_clothes = read_annotation_file(path_pp_train_ann)

values, counts = np.unique(np.array(list_images_clothes, dtype=np.int32),
                           return_counts=True)
print(f"Found {len(values)} out of {len(clothes_names)} clothes")

y = np.arange(len(values))

plt.figure(figsize=(17,8))
plt.bar(y, counts, align="center", color=sns.color_palette("muted"))
plt.xticks(y, np.array(clothes_names)[values])
plt.ylabel("Images")
plt.title("Train classes distribution")

plt.show()

Validation set:

In [None]:
list_images_names, list_images_classes, list_images_clothes = read_annotation_file(path_pp_valid_ann)

values, counts = np.unique(np.array(list_images_clothes, dtype=np.int32),
                           return_counts=True)
print(f"Found {len(values)} out of {len(clothes_names)} clothes")

y = np.arange(len(values))

plt.figure(figsize=(16,8))
plt.bar(y, counts, align="center", color=sns.color_palette("muted"))
plt.xticks(y, np.array(clothes_names)[values])
plt.ylabel("Images")
plt.title("Validation classes distribution")

plt.show()

Test set:

In [None]:
list_images_names, list_images_classes, list_images_clothes = read_annotation_file(path_pp_test_ann)

values, counts = np.unique(np.array(list_images_clothes, dtype=np.int32),
                           return_counts=True)
print(f"Found {len(values)} out of {len(clothes_names)} clothes")

y = np.arange(len(values))

plt.figure(figsize=(16,8))
plt.bar(y, counts, align="center", color=sns.color_palette("muted"))
plt.xticks(y, np.array(clothes_names)[values])
plt.ylabel("Images")
plt.title("Test classes distribution")

plt.show()

# Image checking

In [None]:
dataset = "train"

if dataset == "train":
    path_pp_ann = path_pp_train_ann
    path_pp_dataset = path_pp_train_dataset
elif dataset == "valid":
    path_pp_ann = path_pp_valid_ann
    path_pp_dataset = path_pp_valid_dataset
else:
    path_pp_ann = path_pp_test_ann
    path_pp_dataset = path_pp_test_dataset

Load annotations and images names:

In [None]:
list_pp_imgs, list_pp_classes, list_pp_clothes = read_annotation_file(path_pp_ann)

np_pp_imgs = np.array(list_pp_imgs)
np_pp_classes = np.array(list_pp_classes, dtype=np.int32)

Select images randomly from the dataset and compare with the original USED image:

In [None]:
class_label = 3

mask = (np_pp_classes == class_label)
filtered_pp_imgs = np_pp_imgs[mask]
img_pp_name = filtered_pp_imgs[np.random.randint(len(filtered_pp_imgs))]

path_pp_img = os.path.join(path_pp_dataset, img_pp_name)

pp_img = cv2.imread(path_pp_img)
plt.imshow(pp_img[:, :, ::-1])
plt.show()

print(f"Img path: {path_pp_img}")
print(f"Preprocessed image shape: {pp_img.shape}")

Debug cell to compare the cropped image with the original:

In [None]:
img = cv2.imread("/thecube/students/jravagli/datasets/used/train/train-part1/9385977683_c8c26f3a56.jpg")
plt.imshow(img[:, :, ::-1])
plt.show()
print(f"Preprocessed image shape: {img.shape}")