In [1]:
import os
import json
from tqdm import tqdm
import numpy as np
from sklearn.model_selection import train_test_split
from PIL import Image

In [2]:
image_root = "/home/user/development/datasets/process_lower_bbox"
cloth_root = "/home/user/development/datasets/bottom_lower_crop"
type_dict_path = "/home/user/development/datasets/bottom_lower_crop/bottom_train_supple_b0_total_data_type_pid_map.json"
output_root = "/home/user/development/datasets/bottom_lower_crop/demo"
output_name = "skirts.txt"

In [4]:
image_list = os.listdir(os.path.join(image_root,"image"))
print(f"{len(image_list)} models in total")
cloth_list = os.listdir(os.path.join(cloth_root,"image"))
print(f"{len(cloth_list)} clothes in total")
""" filter models with clean parse maps """
dirty_list = []
for img in tqdm(image_list):
    img = img.replace("jpg","png")
    img_array = np.asarray(Image.open(os.path.join(image_root,"image-parse-agnostic-v3.2",img)))
    if np.sum(img_array == 5) + np.sum(img_array == 6) > 100:
        dirty_list.append(img)
dirty_list = [file.strip().replace("png","jpg") for file in dirty_list]
image_list = [file for file in image_list if file not in dirty_list]
print(f"{len(image_list)} clean models in total")
""" filter clothes without unidentical types """
with open(type_dict_path,"r") as ff:
    type_dict = json.load(ff)
valid_set = set(type_dict["Skirts"] + type_dict["Trousers"])
cloth_list = [file for file in cloth_list if int(file.split(".")[0]) in valid_set]
print(f"{len(cloth_list)} valid clothes in total")

123 models in total
25328 clothes in total


100%|██████████| 123/123 [00:00<00:00, 698.29it/s]

113 clean models in total
22551 valid clothes in total





In [7]:
for key, value in type_dict.items():
    print(f"{key}: {len(value)}")

Midi Skirt: 3392
Jeans: 2936
A-line Skirt: 5249
Maxi Skirt: 2365
Straight Skirt: 2063
Mini Skirt: 3339
Pleated Skirt: 656
Pants: 4646
Sweatpants: 238
Leather Pants: 870
Skinny Pants: 436
Wide Leg Pants: 1566
Cropped Jeans: 1055
Skinny Jeans: 403
Bodycon Skirt: 1497
Shorts: 2502
Straight Jeans: 750
Leggings: 429
Flared Pants: 675
Cropped Pants: 1267
Straight Pants: 620
Flared Jeans: 476
Denim Skirt: 468
Wide Leg Jeans: 1095
Leather Skirt: 800
Denim Dungarees: 5
Shirt: 1
Skirts: 9328
Trousers: 13223


In [10]:
import re
excluded_types = set(["Denim Dungarees", "Shirt", "Skirts", "Trousers"])
skirt_types = [cloth_type for cloth_type in type_dict.keys() if re.match(r"(.*)Skirt", cloth_type) and cloth_type not in excluded_types]
trouser_types = [cloth_type for cloth_type in type_dict.keys() if not re.match(r"(.*)Skirt", cloth_type) and cloth_type not in excluded_types]

In [13]:
with open(os.path.join(output_root,"skirt_types.txt"),"a") as ff:
    for skirt_type in skirt_types:
        line = skirt_type + "\n"
        ff.write(line)
ff.close()
with open(os.path.join(output_root,"trouser_types.txt"),"a") as ff:
    for trouser_type in trouser_types:
        line = trouser_type + "\n"
        ff.write(line)
ff.close()

In [9]:
if not os.path.exists(output_root):
    os.makedirs(output_root)
for key, value in type_dict.items():
    if key in excluded_types:
        continue
    models = np.random.choice(image_list,10)
    clothes = np.random.choice(type_dict[key],10)
    clothes = [str(num)+".jpg" for num in clothes]
    file_path = os.path.join(output_root, key+".txt")
    with open(file_path,"a") as ff:
        for model in models:
            for cloth in clothes:
                line = f"{model} {cloth}\n"
                ff.write(line)
ff.close()

In [6]:
""" Generate lists for skirts and trousers respectively """

# files = [file for file in cloth_list if int(file.strip().split(".")[0]) in type_dict["Skirts"]]
# with open(os.path.join(output_root,"skirts.txt"),"a") as ff:
#     for file in files:
#         line = file + "\n"
#         ff.write(line)
# files = [file for file in cloth_list if int(file.strip().split(".")[0]) in type_dict["Trousers"]]
# with open(os.path.join(output_root,"trousers.txt"),"a") as ff:
#     for file in files:
#         line = file + "\n"
#         ff.write(line)

22551