<a href="https://colab.research.google.com/github/ageraustine/apparel-type-classification/blob/master/Apparel_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

                    Data Fetching and Cleansing

In [7]:
import pandas as pd
import numpy as np
import zipfile
import os
import shutil

In [3]:
from google.colab import drive
drive.mount('/content/drive/')
styles_path = '/content/drive/MyDrive/datasets/fashion_dataset/styles.csv'

Mounted at /content/drive/


In [6]:
zip_dir = '/content/drive/MyDrive/datasets/fashion_dataset/fashion_products.zip'
dataset_dir = os.getcwd() + '/dataset'

with zipfile.ZipFile(zip_dir, "r") as zp:
  zp.extractall(dataset_dir)

                Categorization Utilities

In [8]:
def categorize(dataframe, idx, source, dest)->dict:
    """
    accepts fours arguments i.e dataframe, idx, source, dest

    returns a dictionary containing arrays as values
    """
    categories = {}
    for id in idx:
        img_category = dataframe.loc[dataframe[source] == id, dest].to_list()[0]
        current_img = str(id) + ".jpg"
        if img_category in categories:
            values = categories[img_category]
            values.append(current_img)
            categories[img_category] = values
        else:
            categories[img_category] = [current_img]
    return categories

def copy_images(image_dict, source, dest):
    """
    Accepts image dictionary, source directory and target directory args

    copies images into categorized directories
    """
    for key, val in image_dict.items():
        dest_image_dir = f"{dest}/{key}"
        if(not os.path.exists(dest_image_dir)):
            os.makedirs(dest_image_dir)
        for image in val:
            source_img_path = f"{source}/{image}"
            dest_img_path = f"{dest_image_dir}/{image}"
            if(os.path.exists(source_img_path)):
                if(not os.path.exists(dest_img_path)):
                    shutil.copyfile(source_img_path, dest_img_path)


                       Categorization of The Dataset
The fashion product dataset is categorized based on articleType column. This splits the data into various groups i.e shirts, trousers, watches etc                       

In [9]:
import pandas as pd
# Load the styles csv file
styles_dir = '/content/drive/MyDrive/datasets/fashion_dataset/styles.csv'
fashion_df = pd.read_csv(styles_dir, on_bad_lines='skip')
ids = fashion_df['id']

# Categorize the images based on article type i.e shirts, trousers etc
article_categories = categorize(fashion_df, ids, 'id', 'articleType')

In [10]:
# Copy images from source directories to a directory with class sub-directories
source_dir = f"{dataset_dir}/images"
dest_dir = f"{dataset_dir}/articleType"
copy_images(article_categories, source_dir, dest_dir)

               DATASET LOADING

In [16]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
BATCH_SIZE = 32
dataset = keras.utils.image_dataset_from_directory(
    dest_dir,
    subset = "training",
    validation_split = 0.2,
    seed=132,
    batch_size= BATCH_SIZE
)

                      Data Preprocessing

In [20]:
data_augmentation = keras.Sequential(
    [
        layers.Rescaling(1/225),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.1),
    ]
)
dataset = dataset.map(lambda x,y:(data_augmentation(x), y))