In [1]:
import pandas as pd
import numpy as np

# Load the Dataset
annotations= pd.read_csv('./../data/data_annotations.csv')
classes= pd.read_csv('./../data/data_classes.csv')

In [2]:
# Display first frew rows
annotations.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,04_vest,1007,840,1048,884
1,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,04_beige,1061,848,1112,905
2,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,04_velvet,1130,862,1171,908
3,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,05_handbag,1436,1874,1488,1927
4,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,2560,3840,05_brown,1508,1920,1581,1997


In [3]:
# Check the column names and data types
annotations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2223 entries, 0 to 2222
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  2223 non-null   object
 1   width     2223 non-null   int64 
 2   height    2223 non-null   int64 
 3   class     2223 non-null   object
 4   xmin      2223 non-null   int64 
 5   ymin      2223 non-null   int64 
 6   xmax      2223 non-null   int64 
 7   ymax      2223 non-null   int64 
dtypes: int64(6), object(2)
memory usage: 139.1+ KB


In [4]:
# Clean the data
# Drop a single column:
# Drop a single column
annotations.drop('width', axis=1, inplace=True)
annotations.drop('height', axis=1, inplace=True)
annotations.drop('xmin', axis=1, inplace=True)
annotations.drop('ymin', axis=1, inplace=True)
annotations.drop('xmax', axis=1, inplace=True)
annotations.drop('ymax', axis=1, inplace=True)

In [5]:
annotations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2223 entries, 0 to 2222
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  2223 non-null   object
 1   class     2223 non-null   object
dtypes: object(2)
memory usage: 34.9+ KB


In [6]:
annotations.head()

Unnamed: 0,filename,class
0,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,04_vest
1,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,04_beige
2,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,04_velvet
3,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,05_handbag
4,dior_fw23_31_jpg.rf.01d0c04889e426e7cd17c3880d...,05_brown


In [7]:
# Split the filename column by "_jpg" and keep the first part
annotations['filename'] = annotations['filename'].str.split('_jpg').str[0]


In [8]:
annotations.head()

Unnamed: 0,filename,class
0,dior_fw23_31,04_vest
1,dior_fw23_31,04_beige
2,dior_fw23_31,04_velvet
3,dior_fw23_31,05_handbag
4,dior_fw23_31,05_brown


In [9]:
# Define the conditions and corresponding brand names
conditions = [
    annotations['filename'].str.startswith('dior'),
    annotations['filename'].str.startswith('saint_laurent'),
    annotations['filename'].str.startswith('givenchy'),
    annotations['filename'].str.startswith('loewe')
]

brands = ['dior', 'saint_laurent', 'givenchy', 'loewe']

# Create a new 'brand' column based on the conditions and brands
annotations['brand'] = np.select(conditions, brands, default='')

annotations.head()

Unnamed: 0,filename,class,brand
0,dior_fw23_31,04_vest,dior
1,dior_fw23_31,04_beige,dior
2,dior_fw23_31,04_velvet,dior
3,dior_fw23_31,05_handbag,dior
4,dior_fw23_31,05_brown,dior


In [10]:
annotations

Unnamed: 0,filename,class,brand
0,dior_fw23_31,04_vest,dior
1,dior_fw23_31,04_beige,dior
2,dior_fw23_31,04_velvet,dior
3,dior_fw23_31,05_handbag,dior
4,dior_fw23_31,05_brown,dior
...,...,...,...
2218,dior_fw23_12,07_loosefit,dior
2219,dior_fw23_12,07_silk,dior
2220,dior_fw23_12,09_black,dior
2221,dior_fw23_12,09_boot,dior


In [16]:
# Define the conditions and corresponding category names
conditions = [
    annotations['class'].str.startswith('01_'),
    annotations['class'].str.startswith('02_'),
    annotations['class'].str.startswith('03_'),
    annotations['class'].str.startswith('04_'),
    annotations['class'].str.startswith('4a_'),
    annotations['class'].str.startswith('05_'),
    annotations['class'].str.startswith('06_'),
    annotations['class'].str.startswith('07_'),
    annotations['class'].str.startswith('08_'),
    annotations['class'].str.startswith('09_')
]

categories = ['hats', 'eyewear', 'scarves', 'tops', 'outerwear', 'bags', 'belts', 'bottoms', 'socks', 'shoes']

# Create a new 'category' column based on the conditions and categories
annotations['category'] = np.select(conditions, categories, default='')
annotations.head()


Unnamed: 0,filename,class,brand,category
0,dior_fw23_31,04_vest,dior,tops
1,dior_fw23_31,04_beige,dior,tops
2,dior_fw23_31,04_velvet,dior,tops
3,dior_fw23_31,05_handbag,dior,bags
4,dior_fw23_31,05_brown,dior,bags


In [21]:
class_list = annotations.loc[annotations['class'].str.startswith('0'), 'class'].unique().tolist()
print(class_list)


['04_vest', '04_beige', '04_velvet', '05_handbag', '05_brown', '05_leather', '07_shorts', '07_grey', '07_denim', '09_boot', '09_yellow', '09_rubber', '04_tanktop', '04_sleeveless', '04_silk', '04_olive', '07_olive', '07_silk', '09_brown', '09_leather', '09_derby', '09_black', '04_sweatshirt', '04_navy', '04_cotton', '04_oversized', '07_leggings', '07_black', '07_cotton', '07_jersey', '05_totebag', '05_beige', '09_leather-', '09_hightop', '04_grey', '04_nylon', '04_military', '07_pants', '07_beige', '07mili', '07_cargo', '09_beige', '04_sheer', '04_distressed', '04_coat', '04_knitwear', '02_sunglasses', '07_wool', '04_cashmere', '04_black', '04_camel', '04_overall', '01_beige', '07_cutout', '09_sneaker', '07_white', '07_leather', '07_skirt', '05_print', '04_shirt', '07_striped', '07_blue', '07_loosefit', '04_sweater', '04_embroidered', '04_floral', '04_blue', '04_striped', '04_silver', '04_metallic', '07_brown', '09_velours', '04_white', '04_wool', '09_logo', '04_t-shirt', '07_cartgo', 

In [23]:
style_list = ['04_vest_sku', '04_beige_color', '04_velvet', '05_handbag', '05_brown', '05_leather', '07_shorts', '07_grey', '07_denim', '09_boot', '09_yellow', '09_rubber', '04_tanktop', '04_sleeveless', '04_silk', '04_olive', '07_olive', '07_silk', '09_brown', '09_leather', '09_derby', '09_black', '04_sweatshirt', '04_navy', '04_cotton', '04_oversized', '07_leggings', '07_black', '07_cotton', '07_jersey', '05_totebag', '05_beige', '09_leather-', '09_hightop', '04_grey', '04_nylon', '04_military', '07_pants', '07_beige', '07mili', '07_cargo', '09_beige', '04_sheer', '04_distressed', '04_coat', '04_knitwear', '02_sunglasses', '07_wool', '04_cashmere', '04_black', '04_camel', '04_overall', '01_beige', '07_cutout', '09_sneaker', '07_white', '07_leather', '07_skirt', '05_print', '04_shirt', '07_striped', '07_blue', '07_loosefit', '04_sweater', '04_embroidered', '04_floral', '04_blue', '04_striped', '04_silver', '04_metallic', '07_brown', '09_velours', '04_white', '04_wool', '09_logo', '04_t-shirt', '07_cartgo', '07_workwear', '05_white', '05_distressed', '09_grey', '04_hoodie', '07_zippers', '07_linnen', '07_neon', '07_nylon', '07_parachute', '07_velours', '04_yellow', '07_green', '07_distressed', '04_jacket', '04_vernice', '05_black', '04_overized', '09_ruber', '04_cardigan', '04_violet', '07_violet', '07_sneaker', '07_mix', '07_logo', '04_logo', '04_dotted', '07_pink', '05_coatedcanvas', '04_rose', '04_patchwork', '04_leather', '04_neon', '09_neon', '09_mix', '09_bordeaux', '09_camel', '07_utility', '04_pink', '04_shearling', '07_military', '09_petrol', '09_shoe', '04_tailoring', '04_sequin', '05_stone', '04_brown', '05_fur', '09_mule', '05_nylon', '05_exoticleather', '04_layered', '05_velours', '05_grey', '07_print', '04_lilac', '07_lilac', '04_cream', '05_crossbody', '09_white', '04_denim', '04_red', '07_camel', '07_petrol', '07_sweatpants', '04_print', '07_navy', '05_yellow', '09_boots', '09_fur', '05_canvas', '07_yellow', '04_mohair', '07_velvet', '04_white-', '07_checker', '09_silk', '04_robe', '07_sheer', '05_cotton', '04_animalia', '07_floral', '04_studs', '07_studs', '05_backpack', '07_mesh', '04_green', '09_silver', '05_bumbag', '05_monogram']  # Define your list of style values

annotations['style'] = annotations['class'].apply(lambda x: style_list[0] if x.startswith('04_') or x.startswith('4') else None)

annotations.head()

Unnamed: 0,filename,class,brand,category,style
0,dior_fw23_31,04_vest,dior,tops,04_vest_sku
1,dior_fw23_31,04_beige,dior,tops,04_vest_sku
2,dior_fw23_31,04_velvet,dior,tops,04_vest_sku
3,dior_fw23_31,05_handbag,dior,bags,
4,dior_fw23_31,05_brown,dior,bags,
