In [1]:
import pandas as pd
import numpy as np

# Load the Dataset
annotations= pd.read_csv('./data_annotations.csv')
classes= pd.read_csv('./data_classes.csv')

In [2]:
# Display first frew rows
annotations.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,04_vest,1007,840,1048,884
1,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,04_beige,1061,848,1112,905
2,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,04_velvet,1130,862,1171,908
3,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,05_handbag,1436,1874,1488,1927
4,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,05_brown,1508,1920,1581,1997


In [3]:
# Check the column names and data types
annotations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2223 entries, 0 to 2222
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  2223 non-null   object
 1   width     2223 non-null   int64 
 2   height    2223 non-null   int64 
 3   class     2223 non-null   object
 4   xmin      2223 non-null   int64 
 5   ymin      2223 non-null   int64 
 6   xmax      2223 non-null   int64 
 7   ymax      2223 non-null   int64 
dtypes: int64(6), object(2)
memory usage: 139.1+ KB


# Clean Data

In [4]:
# Clean the data
# Drop a single column:
# Drop a single column
annotations.drop('width', axis=1, inplace=True)
annotations.drop('height', axis=1, inplace=True)
annotations.drop('xmin', axis=1, inplace=True)
annotations.drop('ymin', axis=1, inplace=True)
annotations.drop('xmax', axis=1, inplace=True)
annotations.drop('ymax', axis=1, inplace=True)

In [5]:
annotations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2223 entries, 0 to 2222
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  2223 non-null   object
 1   class     2223 non-null   object
dtypes: object(2)
memory usage: 34.9+ KB


In [6]:
annotations.head()

Unnamed: 0,filename,class
0,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,04_vest
1,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,04_beige
2,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,04_velvet
3,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,05_handbag
4,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,05_brown


In [7]:
# Split the filename column by "_jpg" and keep the first part
annotations['filename'] = annotations['filename'].str.split('_jpg').str[0]


In [8]:
annotations.head()

Unnamed: 0,filename,class
0,dior_fw23_31,04_vest
1,dior_fw23_31,04_beige
2,dior_fw23_31,04_velvet
3,dior_fw23_31,05_handbag
4,dior_fw23_31,05_brown


In [9]:
# Define the conditions and corresponding brand names
conditions = [
    annotations['filename'].str.startswith('dior'),
    annotations['filename'].str.startswith('saint_laurent'),
    annotations['filename'].str.startswith('givenchy'),
    annotations['filename'].str.startswith('loewe')
]

brands = ['dior', 'saint_laurent', 'givenchy', 'loewe']

# Create a new 'brand' column based on the conditions and brands
annotations['brand'] = np.select(conditions, brands, default='')

annotations.head()

Unnamed: 0,filename,class,brand
0,dior_fw23_31,04_vest,dior
1,dior_fw23_31,04_beige,dior
2,dior_fw23_31,04_velvet,dior
3,dior_fw23_31,05_handbag,dior
4,dior_fw23_31,05_brown,dior


In [10]:
annotations

Unnamed: 0,filename,class,brand
0,dior_fw23_31,04_vest,dior
1,dior_fw23_31,04_beige,dior
2,dior_fw23_31,04_velvet,dior
3,dior_fw23_31,05_handbag,dior
4,dior_fw23_31,05_brown,dior
...,...,...,...
2218,dior_fw23_12,07_loosefit,dior
2219,dior_fw23_12,07_silk,dior
2220,dior_fw23_12,09_black,dior
2221,dior_fw23_12,09_boot,dior


In [11]:
# Define the conditions and corresponding category names
conditions = [
    annotations['class'].str.startswith('01_'),
    annotations['class'].str.startswith('02_'),
    annotations['class'].str.startswith('03_'),
    annotations['class'].str.startswith('04_'),
    annotations['class'].str.startswith('4a_'),
    annotations['class'].str.startswith('05_'),
    annotations['class'].str.startswith('06_'),
    annotations['class'].str.startswith('07_'),
    annotations['class'].str.startswith('08_'),
    annotations['class'].str.startswith('09_')
]

categories = ['hats', 'eyewear', 'scarves', 'tops', 'outerwear', 'bags', 'belts', 'bottoms', 'socks', 'shoes']

# Create a new 'category' column based on the conditions and categories
annotations['category'] = np.select(conditions, categories, default='')
annotations.head()


Unnamed: 0,filename,class,brand,category
0,dior_fw23_31,04_vest,dior,tops
1,dior_fw23_31,04_beige,dior,tops
2,dior_fw23_31,04_velvet,dior,tops
3,dior_fw23_31,05_handbag,dior,bags
4,dior_fw23_31,05_brown,dior,bags


In [12]:
class_list = annotations.loc[annotations['class'].str.startswith(( '4')), 'class'].unique().tolist()
print(class_list)

len(class_list)

['4a_coat', '4a_white', '4a_wool', '4a_grey', '4a_oversized', '4a_beige', '4a_velours', '4a_black', '4a_leather', '4a_jacket', '4a_embroidered', '4a_floral', '4a_cashmere', '4a_coated', '4a_tailoring', '4a_brown', '4a_cutout', '4a_navy', '4a_suede', '4a_shearling', '4a_windbreaker', '4a_neon', '4a_nylon', '4a_camel', '4a_robe', '4a_trenchcoat', '4a_animalia', '4a_cotton', '4a_quilted', '4a_logo', '4a_yellow', '4a_copper', '4a_metallic', '4a_nylon-', '4a_utility', '4a_hooded', '4a_distressed', '4a_military', '4a_fur', '4a_patchwork', '4a_petrol', '4a_silk', '4a_checker', '4a_rainjacket', '4a_stone', '4a_print', '4a_mohair', '4a_teddy']


48

In [13]:
style_list = ['04_vest_sku', '04_beige_color', '04_velvet_material', '05_handbag_sku', '05_brown_color', '05_leather_material', '07_shorts_sku', '07_grey_color', '07_denim_material', '09_boot_sku', '09_yellow_color', '09_rubber_material', '04_tanktop_sku', '04_sleeveless_style', '04_silk_material', '04_olive_color', '07_olive_color', '07_silk_material', '09_brown_color', '09_leather_material', '09_derby_sku', '09_black_color', '04_sweatshirt_sku', '04_navy_color', '04_cotton_material', '04_oversized_style', '07_leggings_sku', '07_black_color', '07_cotton_material', '07_jersey_material', '05_totebag_sku', '05_beige_color', '09_leather_material', '09_hightop_style', '04_grey_color', '04_nylon_material', '04_military_style', '07_pants_sku', '07_beige_color', '07_military_style', '07_cargo_style', '09_beige_color', '04_sheer_style', '04_distressed_style', '04_coat_sku', '04_knitwear_sku', '02_sunglasses_sku', '07_wool_material', '04_cashmere_material', '04_black_color', '04_camel_color', '04_overall_sku', '01_beige_color', '07_cutout_style', '09_sneaker_sku', '07_white_color', '07_leather_material', '07_skirt_sku', '05_print_style', '04_shirt_sku', '07_striped_style', '07_blue_color', '07_loosefit_style', '04_sweater_sku', '04_embroidered_style', '04_floral_style', '04_blue_color', '04_striped_style', '04_silver_color', '04_metallic_material', '07_brown_color', '09_velours_material', '04_white_color', '04_wool_material', '09_logo_style', '04_t-shirt_sku', '07_cargo_style', '07_workwear_style', '05_white_color', '05_distressed_style', '09_grey_color', '04_hoodie_sku', '07_zippers_style', '07_linnen_material', '07_neon_color', '07_nylon_material', '07_parachute_style', '07_velours_material', '04_yellow_color', '07_green_color', '07_distressed_style', '04_jacket_sku', '04_vernice_style', '05_black_color', '04_overized_style', '09_rubber_material', '04_cardigan_sku', '04_violet_color', '07_violet_color', '07_sneaker_sku', '07_mix_material', '07_logo_style', '04_logo_style', '04_dotted_style', '07_pink_color', '05_coatedcanvas_material', '04_rose_color', '04_patchwork_style', '04_leather_material', '04_neon_color', '09_neon_color', '09_mix_material', '09_bordeaux_color', '09_camel_color', '07_utility_style', '04_pink_color', '04_shearling_material', '07_military_style', '09_petrol_color', '09_shoe_sku', '04_tailoring_style', '04_sequin_material', '05_stone_color', '04_brown_color', '05_fur_material', '09_mule_sku', '05_nylon_material', '05_exoticleather_material', '04_layered_style', '05_velours_material', '05_grey_color', '07_print_style', '04_lilac_color', '07_lilac_color', '04_cream_color', '05_crossbody_sku', '09_white_color', '04_denim_material', '04_red_color', '07_camel_color', '07_petrol_color', '07_sweatpants_sku', '04_print_style', '07_navy_color', '05_yellow_color', '09_boots_sku', '09_fur_material', '05_canvas_material', '07_yellow_color', '04_mohair_material', '07_velvet_material', '04_white_color', '07_checker_style', '09_silk_material', '04_robe_sku', '07_sheer_style', '05_cotton_material', '04_animalia_style', '07_floral_style', '04_studs_style', '07_studs_style', '05_backpack_sku', '07_mesh_material', '04_green_color', '09_silver_color', '05_bumbag_sku', '05_monogram_style', "4a_coat_sku", "4a_white_color", "4a_wool_material", "4a_grey_color", "4a_oversized_style", "4a_beige_color", "4a_velours_material", "4a_black_color", "4a_leather_material", '4a_jacket', '4a_embroidered', '4a_floral_style', '4a_cashmere_material', '4a_coated_style', '4a_tailoring_style', '4a_brown_color', '4a_cutout_style', '4a_navy_color', '4a_suede_material', '4a_shearling_material', '4a_windbreaker_sku', '4a_neon_color', '4a_nylon_material', '4a_camel_color', '4a_robe_sku', '4a_trenchcoat_sku', '4a_animalia_style', '4a_cotton_material', '4a_quilted_style', '4a_logo_style', '4a_yellow_color', '4a_copper_color', '4a_metallic_material', '4a_nylon_material', '4a_utility_style', '4a_hooded_style', '4a_distressed_style', '4a_military_style', '4a_fur_material', '4a_patchwork_style', '4a_petrol_color', '4a_silk_material', '4a_checker_style', '4a_rainjacket_sku', '4a_stone_color', '4a_print_style', '4a_mohair_material', '4a_teddy_material']

annotations['style'] = annotations['class'].apply(lambda x: style_list[0] if x.startswith('0') or x.startswith('4') else None)

annotations


Unnamed: 0,filename,class,brand,category,style
0,dior_fw23_31,04_vest,dior,tops,04_vest_sku
1,dior_fw23_31,04_beige,dior,tops,04_vest_sku
2,dior_fw23_31,04_velvet,dior,tops,04_vest_sku
3,dior_fw23_31,05_handbag,dior,bags,04_vest_sku
4,dior_fw23_31,05_brown,dior,bags,04_vest_sku
...,...,...,...,...,...
2218,dior_fw23_12,07_loosefit,dior,bottoms,04_vest_sku
2219,dior_fw23_12,07_silk,dior,bottoms,04_vest_sku
2220,dior_fw23_12,09_black,dior,shoes,04_vest_sku
2221,dior_fw23_12,09_boot,dior,shoes,04_vest_sku


In [14]:
empty_dict= dict()

In [15]:
for idx, element in enumerate(style_list):
    empty_dict[idx] = element

In [29]:
empty_dict

{0: '04_vest_sku',
 1: '04_beige_color',
 2: '04_velvet_material',
 3: '05_handbag_sku',
 4: '05_brown_color',
 5: '05_leather_material',
 6: '07_shorts_sku',
 7: '07_grey_color',
 8: '07_denim_material',
 9: '09_boot_sku',
 10: '09_yellow_color',
 11: '09_rubber_material',
 12: '04_tanktop_sku',
 13: '04_sleeveless_style',
 14: '04_silk_material',
 15: '04_olive_color',
 16: '07_olive_color',
 17: '07_silk_material',
 18: '09_brown_color',
 19: '09_leather_material',
 20: '09_derby_sku',
 21: '09_black_color',
 22: '04_sweatshirt_sku',
 23: '04_navy_color',
 24: '04_cotton_material',
 25: '04_oversized_style',
 26: '07_leggings_sku',
 27: '07_black_color',
 28: '07_cotton_material',
 29: '07_jersey_material',
 30: '05_totebag_sku',
 31: '05_beige_color',
 32: '09_leather_material',
 33: '09_hightop_style',
 34: '04_grey_color',
 35: '04_nylon_material',
 36: '04_military_style',
 37: '07_pants_sku',
 38: '07_beige_color',
 39: '07_military_style',
 40: '07_cargo_style',
 41: '09_beige

In [17]:
#for idx,row in annotations.loc[:, ['style']].iterrows():
#    for element in style_list:
#            test = annotations['class'].apply(lambda row: style_list[element] if row.startswith('0') or row.startswith('4') else None)
#test


In [18]:
def insert_descriptor(df, style_list):
        '''Iterate thru rows of class-column and parse values into expanded names.
        Insert expanded names into a new column, "style".'''
        for idx,row in df.loc[:, ['class']].iterrows():
            descriptor = str(row[0])
            for element in style_list:
                if descriptor in element:
                    #print(element)
                    df.loc[idx, 'style'] = element
        
        return df

In [19]:
annotations = insert_descriptor(annotations, style_list)
annotations

Unnamed: 0,filename,class,brand,category,style
0,dior_fw23_31,04_vest,dior,tops,04_vest_sku
1,dior_fw23_31,04_beige,dior,tops,04_beige_color
2,dior_fw23_31,04_velvet,dior,tops,04_velvet_material
3,dior_fw23_31,05_handbag,dior,bags,05_handbag_sku
4,dior_fw23_31,05_brown,dior,bags,05_brown_color
...,...,...,...,...,...
2218,dior_fw23_12,07_loosefit,dior,bottoms,07_loosefit_style
2219,dior_fw23_12,07_silk,dior,bottoms,07_silk_material
2220,dior_fw23_12,09_black,dior,shoes,09_black_color
2221,dior_fw23_12,09_boot,dior,shoes,09_boots_sku


In [20]:
def insert_color(df):
    '''Create a new column "color" based on values in "style" column'''
    color_list = []
    for idx, row in df.iterrows():
        style = str(row['style'])
        if style.endswith('_color'):
            color = style.split('_')[1]
            color_list.append(color)
        else:
            color_list.append('')
    df['color'] = color_list
    return df


In [21]:
insert_color(annotations)

Unnamed: 0,filename,class,brand,category,style,color
0,dior_fw23_31,04_vest,dior,tops,04_vest_sku,
1,dior_fw23_31,04_beige,dior,tops,04_beige_color,beige
2,dior_fw23_31,04_velvet,dior,tops,04_velvet_material,
3,dior_fw23_31,05_handbag,dior,bags,05_handbag_sku,
4,dior_fw23_31,05_brown,dior,bags,05_brown_color,brown
...,...,...,...,...,...,...
2218,dior_fw23_12,07_loosefit,dior,bottoms,07_loosefit_style,
2219,dior_fw23_12,07_silk,dior,bottoms,07_silk_material,
2220,dior_fw23_12,09_black,dior,shoes,09_black_color,black
2221,dior_fw23_12,09_boot,dior,shoes,09_boots_sku,


In [22]:
def insert_material(df):
    '''Create a new column "material" based on values in "style" column'''
    material_list = []
    for idx, row in df.iterrows():
        style = str(row['style'])
        if style.endswith('_material'):
            material = style.split('_')[1]
            material_list.append(material)
        else:
            material_list.append('')
    df['material'] = material_list
    return df


In [23]:
insert_material(annotations)

Unnamed: 0,filename,class,brand,category,style,color,material
0,dior_fw23_31,04_vest,dior,tops,04_vest_sku,,
1,dior_fw23_31,04_beige,dior,tops,04_beige_color,beige,
2,dior_fw23_31,04_velvet,dior,tops,04_velvet_material,,velvet
3,dior_fw23_31,05_handbag,dior,bags,05_handbag_sku,,
4,dior_fw23_31,05_brown,dior,bags,05_brown_color,brown,
...,...,...,...,...,...,...,...
2218,dior_fw23_12,07_loosefit,dior,bottoms,07_loosefit_style,,
2219,dior_fw23_12,07_silk,dior,bottoms,07_silk_material,,silk
2220,dior_fw23_12,09_black,dior,shoes,09_black_color,black,
2221,dior_fw23_12,09_boot,dior,shoes,09_boots_sku,,


In [24]:
def insert_fashion(df):
    '''Create a new column "fashion" based on values in "style" column'''
    fashion_list = []
    for idx, row in df.iterrows():
        style = str(row['style'])
        if style.endswith('_style'):
            fashion_name = style.split('_')[1]
            fashion_list.append(fashion_name)
        else:
            fashion_list.append('')
    df['fashion'] = fashion_list
    return df



In [25]:
insert_fashion(annotations)

Unnamed: 0,filename,class,brand,category,style,color,material,fashion
0,dior_fw23_31,04_vest,dior,tops,04_vest_sku,,,
1,dior_fw23_31,04_beige,dior,tops,04_beige_color,beige,,
2,dior_fw23_31,04_velvet,dior,tops,04_velvet_material,,velvet,
3,dior_fw23_31,05_handbag,dior,bags,05_handbag_sku,,,
4,dior_fw23_31,05_brown,dior,bags,05_brown_color,brown,,
...,...,...,...,...,...,...,...,...
2218,dior_fw23_12,07_loosefit,dior,bottoms,07_loosefit_style,,,loosefit
2219,dior_fw23_12,07_silk,dior,bottoms,07_silk_material,,silk,
2220,dior_fw23_12,09_black,dior,shoes,09_black_color,black,,
2221,dior_fw23_12,09_boot,dior,shoes,09_boots_sku,,,


In [26]:
def insert_sku(df):
    '''Create a new column "sku" based on values in "style" column'''
    sku_list = []
    for idx, row in df.iterrows():
        style = str(row['style'])
        if style.endswith('_sku'):
            sku_name = style.split('_')[1]
            sku_list.append(sku_name)
        else:
            sku_list.append('')
    df['sku'] = sku_list
    return df
insert_sku(annotations)

Unnamed: 0,filename,class,brand,category,style,color,material,fashion,sku
0,dior_fw23_31,04_vest,dior,tops,04_vest_sku,,,,vest
1,dior_fw23_31,04_beige,dior,tops,04_beige_color,beige,,,
2,dior_fw23_31,04_velvet,dior,tops,04_velvet_material,,velvet,,
3,dior_fw23_31,05_handbag,dior,bags,05_handbag_sku,,,,handbag
4,dior_fw23_31,05_brown,dior,bags,05_brown_color,brown,,,
...,...,...,...,...,...,...,...,...,...
2218,dior_fw23_12,07_loosefit,dior,bottoms,07_loosefit_style,,,loosefit,
2219,dior_fw23_12,07_silk,dior,bottoms,07_silk_material,,silk,,
2220,dior_fw23_12,09_black,dior,shoes,09_black_color,black,,,
2221,dior_fw23_12,09_boot,dior,shoes,09_boots_sku,,,,boots


In [27]:
for column in annotations.columns:
    unique_counts = annotations[column].value_counts()
    print(f"Column: {column}")
    print(unique_counts)
    print()


Column: filename
givenchy_fw23_36    23
givenchy_fw23_24    23
givenchy_fw23_34    20
givenchy_fw23_16    19
dior_fw23_54        19
                    ..
loewe_fw23_39        6
loewe_fw23_02        6
loewe_fw23_06        6
givenchy_fw23_47     6
loewe_fw23_16        6
Name: filename, Length: 195, dtype: int64

Column: class
09_boot         146
09_black        143
09_leather      128
07_pants        115
07_black         83
               ... 
04_shearling      1
04_pink           1
07mili            1
4a_nylon-         1
05_monogram       1
Name: class, Length: 216, dtype: int64

Column: brand
dior             660
givenchy         590
saint_laurent    512
loewe            461
Name: brand, dtype: int64

Column: category
bottoms      622
shoes        600
tops         547
outerwear    324
bags         100
eyewear       27
               2
hats           1
Name: category, dtype: int64

Column: style
09_boots_sku             148
09_black_color           143
09_leather_material      128
07_p

In [32]:
filtered_annotations = annotations[annotations['class'].str.contains('4a')]
print(filtered_annotations)

              filename      class     brand   category               style  \
22       loewe_fw23_16    4a_coat     loewe  outerwear     4a_coated_style   
23       loewe_fw23_16   4a_white     loewe  outerwear      4a_white_color   
24       loewe_fw23_16    4a_wool     loewe  outerwear    4a_wool_material   
67       loewe_fw23_38    4a_coat     loewe  outerwear     4a_coated_style   
68       loewe_fw23_38    4a_grey     loewe  outerwear       4a_grey_color   
...                ...        ...       ...        ...                 ...   
2188  givenchy_fw23_02    4a_logo  givenchy  outerwear       4a_logo_style   
2208      dior_fw23_12  4a_mohair      dior  outerwear  4a_mohair_material   
2209      dior_fw23_12   4a_beige      dior  outerwear      4a_beige_color   
2210      dior_fw23_12    4a_coat      dior  outerwear     4a_coated_style   
2211      dior_fw23_12   4a_teddy      dior  outerwear   4a_teddy_material   

      color material fashion sku  
22                    coated

In [34]:
category_counts = annotations['category'].value_counts()

category_counts



bottoms      622
shoes        600
tops         547
outerwear    324
bags         100
eyewear       27
               2
hats           1
Name: category, dtype: int64

# VISUALISATION