In [1]:
from google.colab import drive
drive.mount('/content/drive')
#/content/drive/MyDrive/datasets/without_bg

Mounted at /content/drive


In [2]:
!ls /content/drive/MyDrive/datasets/without_bg | wc

   5756    5756  235996


In [3]:
!wget https://raw.githubusercontent.com/alexeygrigorev/clothing-dataset/master/images.csv

--2023-01-26 01:08:09--  https://raw.githubusercontent.com/alexeygrigorev/clothing-dataset/master/images.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 291660 (285K) [text/plain]
Saving to: ‘images.csv’


2023-01-26 01:08:10 (15.7 MB/s) - ‘images.csv’ saved [291660/291660]



In [4]:
import pandas as pd

records = pd.read_csv("./images.csv")
records.head()

Unnamed: 0,image,sender_id,label,kids
0,4285fab0-751a-4b74-8e9b-43af05deee22,124,Not sure,False
1,ea7b6656-3f84-4eb3-9099-23e623fc1018,148,T-Shirt,False
2,00627a3f-0477-401c-95eb-92642cbe078d,94,Not sure,False
3,ea2ffd4d-9b25-4ca8-9dc2-bd27f1cc59fa,43,T-Shirt,False
4,3b86d877-2b9e-4c8b-a6a2-1d87513309d0,189,Shoes,False


In [5]:
records = records[records['label'] != 'Not sure']
records = records[records['label'] != 'Other']
records = records[records['label'] != 'Skip']

In [6]:
categories = list(records["label"].unique())
print(categories)

['T-Shirt', 'Shoes', 'Shorts', 'Shirt', 'Pants', 'Skirt', 'Top', 'Outwear', 'Dress', 'Body', 'Longsleeve', 'Undershirt', 'Hat', 'Polo', 'Blouse', 'Hoodie', 'Blazer']


In [7]:
!rm -r ./dataset ./output

rm: cannot remove './dataset': No such file or directory
rm: cannot remove './output': No such file or directory


In [8]:
import os
base_dir = "./dataset"
os.mkdir(base_dir)

In [9]:
for category in categories:
  os.mkdir(base_dir + "/" + category)

In [10]:
import shutil

for index, row in records.iterrows():
    image_path = "/content/drive/MyDrive/datasets/without_bg/" + row['image'] + ".png"
    target_dir = "./dataset/" + row['label']
    try:
      shutil.copy(image_path, target_dir)
    except Exception as e:
      print(e)

In [11]:
!pip install split-folders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [12]:
import splitfolders

splitfolders.ratio("dataset", output="output", seed=1337, ratio=(.8, .2), group_prefix=None, move=False)

Copying files: 5096 files [00:03, 1481.06 files/s]


In [13]:
!ls output/train output/val

output/train:
Blazer	Body   Hat     Longsleeve  Pants  Shirt  Shorts  Top	  Undershirt
Blouse	Dress  Hoodie  Outwear	   Polo   Shoes  Skirt	 T-Shirt

output/val:
Blazer	Body   Hat     Longsleeve  Pants  Shirt  Shorts  Top	  Undershirt
Blouse	Dress  Hoodie  Outwear	   Polo   Shoes  Skirt	 T-Shirt


In [14]:
BATCH_SIZE = 32
TARGET_SIZE = (150, 150)
CLASSES = len(categories) 

In [15]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255,
                                  rotation_range=40,
                                  width_shift_range=0.2,
                                  height_shift_range=0.2,
                                  zoom_range=0.3,
                                  horizontal_flip=True,
                                  validation_split=0.2)

train_generator = train_datagen.flow_from_directory("./output/train",
                                                    batch_size = BATCH_SIZE,
                                                    class_mode = 'categorical',
                                                    color_mode='rgb',
                                                    target_size = TARGET_SIZE)

validation_datagen = ImageDataGenerator(rescale=1/255)

validation_generator = validation_datagen.flow_from_directory("./output/val",
                                                              target_size = TARGET_SIZE,
                                                              batch_size = BATCH_SIZE,
                                                              color_mode='rgb',
                                                              class_mode = 'categorical')


Found 4070 images belonging to 17 classes.
Found 1026 images belonging to 17 classes.


In [None]:
import tensorflow as tf
from tensorflow import keras

base_model = keras.applications.Xception(
    weights='imagenet',
    input_shape=(150, 150, 3),
    include_top=False)

In [None]:
base_model.trainable = False

In [None]:
inputs = keras.Input(shape=(150, 150, 3))
x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(CLASSES, activation="softmax")(x)
model = keras.Model(inputs, outputs)

In [None]:
model.compile(optimizer="adam",
              metrics=['accuracy'],
              loss="categorical_crossentropy")

In [None]:
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001,beta_1=0.9,beta_2=0.99),
#               metrics=['accuracy'],
#               loss="categorical_crossentropy")

In [None]:
model.fit(train_generator, epochs=20, validation_data=validation_generator)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f7a4e29f490>

In [None]:
from tensorflow.keras.callbacks import EarlyStopping 

early_stopping = EarlyStopping(monitor='val_accuracy', patience=3)

In [None]:
base_model.trainable = True

# It's important to recompile your model after you make any changes
# to the `trainable` attribute of any inner layer, so that your changes
# are take into account
model.compile(optimizer=keras.optimizers.Adam(1e-5),  # Very low learning rate
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train end-to-end. Be careful to stop before you overfit!
model.fit(train_generator, epochs=10, callbacks=[early_stopping], validation_data=validation_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f7a6c42a0a0>

In [None]:
validation_generator.reset()

# Evaluate on Validation data
scores = model.evaluate(validation_generator)
print("%s%s: %.2f%%" % ("val acc ", model.metrics_names[1], scores[1]*100))

train_generator.reset()

# Evaluate on Training data
scores = model.evaluate(train_generator)
print("%s%s: %.2f%%" % ("train acc ", model.metrics_names[1], scores[1]*100))

val acc accuracy: 81.68%
train acc accuracy: 88.92%


In [None]:
model.save('/content/drive/MyDrive/datasets/model.m5')



In [16]:
train_generator.class_indices

{'Blazer': 0,
 'Blouse': 1,
 'Body': 2,
 'Dress': 3,
 'Hat': 4,
 'Hoodie': 5,
 'Longsleeve': 6,
 'Outwear': 7,
 'Pants': 8,
 'Polo': 9,
 'Shirt': 10,
 'Shoes': 11,
 'Shorts': 12,
 'Skirt': 13,
 'T-Shirt': 14,
 'Top': 15,
 'Undershirt': 16}