<a href="https://colab.research.google.com/github/gloguercio/cloth_classifier/blob/main/DeepLearning_Track.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
%pip install -q opendatasets
import opendatasets as od
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, Dropout, Flatten, Dense, Input, Layer
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import BatchNormalization ,GlobalMaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping ,ReduceLROnPlateau ,ModelCheckpoint
from tensorflow.keras import models
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MultiLabelBinarizer

In [2]:
# Downloading the dataset from Kaggle (assuming `od` is properly imported)
od.download('https://www.kaggle.com/datasets/paramaggarwal/fashion-product-images-small',data_dir="dataset")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: gustavologuercio
Your Kaggle Key: ··········
Downloading fashion-product-images-small.zip to dataset/fashion-product-images-small


100%|██████████| 565M/565M [00:04<00:00, 135MB/s] 





In [3]:
# Load the CSV file containing information about fashion products into a Pandas DataFrame
clothes_df = pd.read_csv('dataset/fashion-product-images-small/styles.csv', on_bad_lines='skip')

In [4]:
# Display the first few rows of the DataFrame
clothes_df.head()

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England Men Party Blue Jeans
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011.0,Casual,Manchester United Men Solid Black Track Pants
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma Men Grey T-shirt


In [5]:
# Remove rows with missing values
clothes_df = clothes_df.dropna()

In [6]:
# Get the number of unique values in each column
clothes_df.nunique()

id                    44077
gender                    5
masterCategory            7
subCategory              45
articleType             142
baseColour               46
season                    4
year                     13
usage                     8
productDisplayName    30806
dtype: int64

In [7]:
# Display the column names
clothes_df.columns

Index(['id', 'gender', 'masterCategory', 'subCategory', 'articleType',
       'baseColour', 'season', 'year', 'usage', 'productDisplayName'],
      dtype='object')

In [8]:
# Combine different categorical columns into one for creating labels
clothes_df['combined_category'] = clothes_df['articleType'] + '_' + clothes_df['baseColour'] + '_' + clothes_df['masterCategory'] + '_' + clothes_df['subCategory']

In [9]:
# Get the length of the DataFrame after data cleaning
len(clothes_df)

44077

In [10]:
# Create an empty list to store image data
data = []

In [11]:
from tensorflow.keras.preprocessing.image import img_to_array
import cv2

IX = 80
IY = 60

invalid_ids = []

# Loop through each product ID and process its associated image
for name in clothes_df.id:
    try:
        # Read the image
        image = cv2.imread('dataset/fashion-product-images-small/images/'+str(name)+'.jpg')
        # Resize the image to a fixed size
        image = cv2.resize(image, (IX,IY) )
        # Convert the image to a NumPy array
        image = img_to_array(image)
        # Append the image data to the list
        data.append(image)
    except:
        # Skip images for which there are errors in loading
        invalid_ids.append(name)

In [12]:
# Create an empty list to store labels
labels = []

In [13]:
# Iterate through each row in the DataFrame to extract labels

used_column = ['combined_category']

for index, row in clothes_df.iterrows():
    if row['id'] in invalid_ids:
        continue
    tags = []
    for col in used_column:
        tags.append(row[col])
    labels.append(tags)

In [14]:
# Convert the lists to NumPy arrays
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

In [15]:
# Print the extracted labels
print(labels)

[['Shirts_Navy Blue_Apparel_Topwear']
 ['Jeans_Blue_Apparel_Bottomwear']
 ['Watches_Silver_Accessories_Watches']
 ...
 ['Tshirts_Blue_Apparel_Topwear']
 ['Perfume and Body Mist_Blue_Personal Care_Fragrance']
 ['Watches_Pink_Accessories_Watches']]


In [16]:
# Binarize the labels using MultiLabelBinarizer
mlb = MultiLabelBinarizer()
labels = mlb.fit_transform(labels)

In [17]:
# Print the classes learned by the MultiLabelBinarizer
print(mlb.classes_)
print(labels[0])

['Accessory Gift Set_Black_Accessories_Accessories'
 'Accessory Gift Set_Blue_Accessories_Accessories'
 'Accessory Gift Set_Brown_Accessories_Accessories' ...
 'Wristbands_Red_Accessories_Sports Accessories'
 'Wristbands_Red_Sporting Goods_Wristbands'
 'Wristbands_White_Sporting Goods_Wristbands']
[0 0 0 ... 0 0 0]


In [18]:
# Define the input shape for the model
inputShape = (60, 80, 3)

In [19]:
# Define the architecture of the neural network model
model = Sequential([
    Conv2D(32, (3, 3), padding="same", input_shape=inputShape),
    Activation("relu"),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.2),
    Conv2D(64, (3, 3)),
    Activation('relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.2),
    Flatten(),
    Dense(128, activation='sigmoid'),
    Dense(labels.shape[1], activation='sigmoid')  # Output layer with the number of unique combined categories
])

In [20]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
from sklearn.model_selection import train_test_split
# Split the dataset into training and testing sets
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.1, random_state=42)

In [None]:
# Set batch size
batch = 32

In [None]:
# Train the model
history = model.fit(x=trainX, y=trainY, epochs=50, batch_size=32, verbose=1)

In [None]:
# Make predictions on the testing set
preds = model.predict(testX)

In [None]:
# Evaluate the model on the testing set
accuracy = model.evaluate(testX, testY, verbose=0)[1]
print("Accuracy: ", accuracy)

In [None]:
# Print the binarized labels
print(labels)