In [66]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

## Data overview

* [Official description and data](https://tianchi.aliyun.com/competition/information.htm?spm=5176.11165261.5678.2.585423ebZpjaY4&raceId=231648)

In [72]:
import os
import sys
sys.path.append('..')

import pandas as pd
pd.options.display.max_columns = 100

from utils.config import opt

File structures follow [gathierry/FashionAI-KeyPointsDetectionOfApparel](https://github.com/gathierry/FashionAI-KeyPointsDetectionOfApparel/#data-preparation). Here we want to check the size of each directory.

In [68]:
!find {opt.db_path} -maxdepth 2 -type d -print0 | xargs -0 ls -lh

/shared_folder/data/tianchi/fashionAI_key_points:
total 28K
drwxr-xr-x 2 root root 4.0K Jun  9 07:24 checkpoints
drwxr-xr-x 3 1000 1000 4.0K Jun  9 07:30 r1_test_a
drwxr-xr-x 3 1000 1000 4.0K Jun  9 07:30 r1_test_b
drwxr-xr-x 4 1000 1000 4.0K Jun  8 11:12 r1_train
drwxr-xr-x 3 1000 1000 4.0K Jun  8 11:07 r2_test_a
drwxr-xr-x 3 1000 1000 4.0K Jun  8 11:06 r2_test_b
drwxr-xr-x 4 1000 1000 4.0K Feb 14 07:17 wu_train

/shared_folder/data/tianchi/fashionAI_key_points/checkpoints:
total 0

/shared_folder/data/tianchi/fashionAI_key_points/r1_test_a:
total 3.3M
drwxr-xr-x 7 1000 1000 4.0K Feb 26 14:54 Images
-rw-r--r-- 1 1000 1000 2.7M Jun  5 16:57 fashionAI_key_points_test_a_answer_20180426.csv
-rw-r--r-- 1 1000 1000 570K Feb 27 05:26 test.csv

/shared_folder/data/tianchi/fashionAI_key_points/r1_test_a/Images:
total 648K
drwxr-xr-x 2 1000 1000 120K Feb 27 02:13 blouse
drwxr-xr-x 2 1000 1000 128K Feb 27 02:15 dress
drwxr-xr-x 2 1000 1000 132K Feb 27 03:23 outwear
drwxr-

Check number of images in each directory:

In [69]:
img_dirs = ['wu_train', 'r1_train', 'r1_test_a', 'r1_test_b', 'r2_test_a', 'r2_test_b', ]

print("Number of images:")
total = 0

for dir_name in img_dirs:
    print(f"\n>>", dir_name)
    sub_total = 0
    for d in (opt.db_path / dir_name / 'Images').iterdir():
        d_total = len(os.listdir(d))
        print(f"{d.name:8}:{d_total}")
        sub_total += d_total
        
    print(f"{'TOTAL':8}:{sub_total}")
    total += sub_total
        
print("\nTotal number of images:", total)

Number of images:

>> wu_train
blouse  :2997
outwear :2138
trousers:2795
dress   :2312
skirt   :2292
TOTAL   :12534

>> r1_train
blouse  :7158
outwear :5596
trousers:6347
dress   :4912
skirt   :7618
TOTAL   :31631

>> r1_test_a
blouse  :1977
outwear :2043
trousers:1958
dress   :2038
skirt   :1980
TOTAL   :9996

>> r1_test_b
blouse  :1974
outwear :1947
trousers:1946
dress   :2052
skirt   :2051
TOTAL   :9970

>> r2_test_a
blouse  :2586
outwear :2508
trousers:2631
dress   :2693
skirt   :2683
TOTAL   :13101

>> r2_test_b
blouse  :10670
outwear :10906
trousers:10618
dress   :11096
skirt   :11154
TOTAL   :54444

Total number of images: 131676


Let's check out how the data are structured:

In [70]:
df = pd.read_csv(opt.db_path / 'wu_train/Annotations/annotations.csv') 

In [71]:
df.head()

Unnamed: 0,image_id,image_category,neckline_left,neckline_right,center_front,shoulder_left,shoulder_right,armpit_left,armpit_right,waistline_left,waistline_right,cuff_left_in,cuff_left_out,cuff_right_in,cuff_right_out,top_hem_left,top_hem_right,waistband_left,waistband_right,hemline_left,hemline_right,crotch,bottom_left_in,bottom_left_out,bottom_right_in,bottom_right_out
0,Images/blouse/d21eab37ddc74ea5a5f1b4a5d3d9055a...,blouse,241_135_1,301_135_1,259_136_1,216_142_1,319_144_1,212_186_1,307_202_1,-1_-1_-1,-1_-1_-1,203_236_1,195_256_1,278_241_1,283_261_1,206_243_0,292_252_0,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1
1,Images/blouse/02b54c183d2dbd2c056db14303064886...,blouse,244_76_1,282_76_1,257_99_1,228_81_0,303_85_1,222_134_1,295_131_1,-1_-1_-1,-1_-1_-1,199_153_1,178_100_0,293_173_1,332_150_1,229_161_1,297_162_0,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1
2,Images/blouse/677b5fac683072149f03e13f83dc9f4a...,blouse,240_51_1,311_45_1,276_68_1,160_93_1,398_108_1,184_190_1,361_191_1,-1_-1_-1,-1_-1_-1,107_466_1,152_480_1,294_468_1,326_482_1,172_457_1,350_462_1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1
3,Images/blouse/f1b0555d953037bc0b60ac026b9aad7d...,blouse,230_102_1,278_102_0,258_116_1,189_121_1,319_117_1,201_183_1,315_190_0,-1_-1_-1,-1_-1_-1,187_256_0,159_270_1,321_228_1,339_199_1,176_334_1,358_296_1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1
4,Images/blouse/6cd16d426cf015b18b0b5eaca3670c76...,blouse,247_127_1,308_139_1,296_185_1,192_163_1,351_182_1,209_232_1,340_245_0,-1_-1_-1,-1_-1_-1,230_380_1,166_409_1,351_368_0,360_403_1,209_370_0,357_323_1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1,-1_-1_-1


For details, please check the [official description](https://tianchi.aliyun.com/competition/information.htm?spm=5176.100067.5678.2.2b8b2743mcb1CH&raceId=231648).