In [1]:
%matplotlib widget

import tensorflow as tf
import os
import sys
import cv2
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import random

from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.layers import GlobalMaxPooling2D
from numpy.linalg import norm
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.svm import LinearSVC

In [2]:
DATASET_PATH = "/Users/jeremy/Google Drive/datasets/fashion-dataset/"
print(os.listdir(DATASET_PATH))

['embeddings.tsv', '.DS_Store', 'images.csv', 'images', 'resnet50-embeddings-full.pkl', 'styles_preprocessed.csv', 'styles', 'styles.csv', '.ipynb_checkpoints', 'embeddings.csv', 'resnet50-embeddings.pkl']


In [3]:
df = pd.read_csv(DATASET_PATH + "styles.csv", nrows=5000, error_bad_lines=False)
df['image'] = df.apply(lambda row: str(row['id']) + ".jpg", axis=1)
df = df.reset_index(drop=True)
df.head(10)

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011,Casual,Turtle Check Men Navy Blue Shirt,15970.jpg
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Peter England Men Party Blue Jeans,39386.jpg
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016,Casual,Titan Women Silver Watch,59263.jpg
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011,Casual,Manchester United Men Solid Black Track Pants,21379.jpg
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012,Casual,Puma Men Grey T-shirt,53759.jpg
5,1855,Men,Apparel,Topwear,Tshirts,Grey,Summer,2011,Casual,Inkfruit Mens Chain Reaction T-shirt,1855.jpg
6,30805,Men,Apparel,Topwear,Shirts,Green,Summer,2012,Ethnic,Fabindia Men Striped Green Shirt,30805.jpg
7,26960,Women,Apparel,Topwear,Shirts,Purple,Summer,2012,Casual,Jealous 21 Women Purple Shirt,26960.jpg
8,29114,Men,Accessories,Socks,Socks,Navy Blue,Summer,2012,Casual,Puma Men Pack of 3 Socks,29114.jpg
9,30039,Men,Accessories,Watches,Watches,Black,Winter,2016,Casual,Skagen Men Black Watch,30039.jpg


In [4]:
def get_img_path(img):
  return DATASET_PATH + "images/" + img

In [5]:
df_embs = pickle.load(open(DATASET_PATH + "resnet50-embeddings.pkl", "rb"))

In [6]:
# Filter only embeddings with master category 'Apparel' and save indices to filter for them later
df_filtered = df.loc[df.masterCategory == 'Apparel']
df_filtered_idx = df_filtered.index.values.tolist()
df_filtered = df_filtered.reset_index(drop=True)
df_filtered.masterCategory.unique(), df_filtered.shape, len(df_filtered_idx)

(array(['Apparel'], dtype=object), (2297, 11), 2297)

In [7]:
embs_filtered = []
for i, row in df_embs.iterrows():
    if i in  df_filtered_idx:
        embs_filtered.append(row)
df_embs_filtered = pd.DataFrame(embs_filtered)
df_embs_filtered = df_embs_filtered.reset_index(drop=True)
df_embs_filtered

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,5.661316,2.366165,0.000000,3.255730,0.845208,2.673841,8.780870,6.280786,0.787188,1.587657,...,2.463610,0.000000,1.825994,20.485268,3.902863,0.000000,8.225969,1.313863,0.000000,10.820526
1,5.790417,9.593648,0.000000,9.431670,0.287235,0.259945,17.028748,3.037453,0.136341,0.000000,...,1.670570,0.013891,10.338681,4.784722,0.189140,0.000000,7.739144,0.372693,0.000000,8.903802
2,5.882203,1.958606,0.000000,12.519882,0.000000,0.000000,9.383594,3.781614,0.000000,1.490617,...,17.825478,0.000000,3.592507,2.042305,0.000000,0.000000,6.656779,1.457314,1.694204,1.403327
3,0.245968,17.383430,0.493824,3.374468,2.567368,0.000000,6.482537,5.862639,0.000000,3.164866,...,4.464024,0.447075,1.448143,12.884138,0.000000,0.397501,5.028075,1.939697,0.000000,11.485400
4,0.028546,18.567099,0.000000,0.757250,1.504949,0.000000,1.982024,2.963777,1.072765,4.539300,...,3.247082,4.273134,3.088643,16.997053,0.000000,0.000000,2.258095,1.595798,0.000000,7.674047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2292,3.134137,12.337814,3.281830,0.000000,6.123401,6.273666,3.345068,4.230134,2.082114,2.691854,...,5.392367,2.139013,7.540308,5.394151,6.070920,0.000000,0.000000,0.000000,0.000000,10.168349
2293,2.800386,19.866081,3.215006,2.487933,0.147357,0.000000,7.958175,5.669247,0.706661,4.877097,...,10.292748,0.000000,0.738173,16.109261,0.383140,0.000000,4.432935,2.898650,0.416252,12.830476
2294,4.801172,16.536064,2.132150,2.630906,1.190370,0.243107,1.454644,3.500423,0.627453,5.221931,...,0.000000,1.886256,3.294083,17.186272,0.000000,2.014549,1.663332,2.118907,0.000000,6.815829
2295,3.184294,14.538852,6.306130,0.000000,3.361131,6.701039,0.729837,0.295319,0.252367,3.175583,...,0.000000,1.471544,1.482233,22.654318,2.425646,12.427614,8.393599,0.000000,9.616654,3.375090


In [8]:
# Perform PCA over the embeddings to reduce dimensionality before applying t-sne
num_feature_dimensions = 3  # Set the number of embedding dimensions
pca = PCA(n_components = num_feature_dimensions)
embs_compressed = pca.fit_transform(df_embs_filtered)
df_embs_filtered_compressed = pd.DataFrame(embs_compressed)
df_embs_filtered_compressed

Unnamed: 0,0,1,2
0,-64.281212,-0.382274,10.129683
1,-50.859009,-78.791428,-48.603447
2,-21.894360,-69.084694,-56.701591
3,-52.814461,-30.695782,24.538647
4,-26.194765,-12.308734,56.018620
...,...,...,...
2292,99.575562,-30.713907,-20.068645
2293,-26.236423,-12.102205,52.941185
2294,-29.318726,36.647327,31.197702
2295,54.131577,63.903629,-8.761883


In [9]:
# Application of SVM to create the axis for a given feature
X = df_embs_filtered_compressed
y = df_filtered["season"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
svm_clf = LinearSVC(C=1, max_iter=100000)
svm_clf.fit(X_scaled, y)

LinearSVC(C=1, max_iter=100000)

In [10]:
df_filtered['x'] = df_embs_filtered_compressed[0]
df_filtered['y'] = df_embs_filtered_compressed[1]
df_filtered['z'] = df_embs_filtered_compressed[2]

In [12]:
def create_user_marker(x, y, z):
    """Creates and returns a user marker on a specific position"""
    user_marker = plt.plot(x, y, z, 'yo', markersize=10)
    user_position = np.array([x, y, z])
    return user_marker, user_position

In [23]:
def update_user_position(change):
    """Update the user position after the slider value has changed"""
    new_x, new_y, new_z = get_updated_user_pos(change.new, nav_axis[0], nav_axis[1], nav_axis[2])
    new_user_pos = np.array([new_x, new_y, new_z])
    user_position = new_user_pos
    user_marker[0].set_data(new_x, new_y)
    user_marker[0].set_3d_properties(new_z)
    nearest_neighbour, nearest_neighbour_pos = get_nearest_neighbour(new_user_pos, df_filtered)
    annotate_nearest_neighbour(nearest_neighbour, ax2, df_filtered)
    fig.suptitle('Nearest Embedding: {} with season: {}'.format(nearest_neighbour, df_filtered.loc[df_filtered['id'] == nearest_neighbour].season.values[0]))
    fig.canvas.draw()
    fig.canvas.flush_events()

In [14]:
def get_updated_user_pos(change, axis_x, axis_y, axis_z):
    """
    Computes the index of the element that is closest to the given change value in x0.
    Returns the new user position.
    """
    idx = np.where(np.isclose(axis_x, min(axis_x, key=lambda x:abs(x-change))))[0][0]
    return axis_x[idx], axis_y[idx], axis_z[idx]

In [14]:
df_filtered.loc[df_filtered['id'] == 21379]

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,x,y,z
2,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011,Casual,Manchester United Men Solid Black Track Pants,21379.jpg,-21.894361,-69.084831,-56.699165


In [15]:
def get_nearest_neighbour(user_position, df):
   nearest_neighbour = None
   nearest_neighbour_pos = None
   smallest_dist = sys.maxsize
   for row in df.itertuples():
      embedding_position = np.array([row.x, row.y, row.z])
      dist = norm(user_position - embedding_position)
      if dist < smallest_dist:
         smallest_dist = dist
         nearest_neighbour = row.id
         nearest_neighbour_pos = embedding_position
   return nearest_neighbour, nearest_neighbour_pos

In [16]:
def annotate_nearest_neighbour(nearest_neighbour, ax, df):
    arr_img = plt.imread(get_img_path(df.loc[df['id'] == nearest_neighbour].image.values[0]))
    plt.imshow(arr_img)

In [17]:
def compute_navigation_axis(emb, w, dist):
    nav_axis = []
    for i, feature in enumerate(emb):
        feature_val = emb[feature].values[0]
        step = w[i]
        axis = [feature_val]
        pos_val = feature_val
        neg_val = feature_val
        for i in range(dist):
            pos_val = pos_val + step
            neg_val = neg_val - step
            axis.append(pos_val)
            axis.append(neg_val)
        nav_axis.append(sorted(axis))
    return np.array(nav_axis)

In [61]:
from matplotlib.colors import ListedColormap
from matplotlib.offsetbox import (AnnotationBbox, OffsetImage, TextArea)
from ipywidgets import AppLayout, FloatSlider

sns.set_style("white")

# Create matplotlib figure
fig = plt.figure(figsize=(16, 10))
ax1 = fig.add_subplot(121, projection='3d')

# Create Scatterplot of each embedding and give their respective season value as label
for s in df_filtered.season.unique():
    ax1.scatter(df_filtered.x[df_filtered.season == s], df_filtered.y[df_filtered.season == s], df_filtered.z[df_filtered.season == s], label=s)
ax1.legend()

# Computes the decision boundary of a trained classifier
xx,yy = np.meshgrid(np.linspace(-75, 75, 200), np.linspace(-75, 75, 200))
zz = (-svm_clf.intercept_[0] - svm_clf.coef_[0][0] * xx - svm_clf.coef_[0][1] * yy) / svm_clf.coef_[0][2]

# Plot axis for navigation
w = svm_clf.coef_[0]
rand_emb = df_embs_filtered_compressed.sample()
nav_axis = compute_navigation_axis(rand_emb, w, 250)

#ax1.plot3D(xx, yy, zz, 'gray')
ax1.plot_surface(xx, yy, zz, color='seashell')
ax1.plot3D(nav_axis[0], nav_axis[1], nav_axis[2], 'red')

# Find closest point to choses starting embedding on navigation axis as starting point
starting_idx = np.where(np.isclose(nav_axis[0], min(nav_axis[0], key=lambda x:abs(x-rand_emb[0].values[0]))))[0][0]
user_marker, user_position = create_user_marker(nav_axis[0][starting_idx], nav_axis[1][starting_idx], nav_axis[2][starting_idx])
nearest_neighbour, nearest_neighbour_pos = get_nearest_neighbour(user_position, df_filtered)

ax2 = fig.add_subplot(122)
annotate_nearest_neighbour(nearest_neighbour, ax2, df_filtered)

fig.suptitle('Nearest Embedding: {} with season: {}'.format(nearest_neighbour, df_filtered.loc[df_filtered['id'] == nearest_neighbour].season.values[0]))

# Create Slider to interact with the plot
slider = FloatSlider(
    orientation="horizontal",
    description="x-Position:",
    value=user_position[0],
    min=min(nav_axis[0]),
    max=max(nav_axis[0])
)
slider.layout.margin = '0px 20% 0px 20%'
slider.layout.width = '40%'

slider.observe(update_user_position, names='value')

display(slider)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

FloatSlider(value=91.89837646484375, description='x-Position:', layout=Layout(margin='0px 20% 0px 20%', width=…