In [1]:
%matplotlib widget

import tensorflow as tf
import os
import sys
import cv2
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import random

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from annoy import AnnoyIndex

In [2]:
DATASET_PATH = "/Users/jeremy/Google Drive/datasets/fashion-dataset/"
print(os.listdir(DATASET_PATH))

['embeddings.tsv', '.DS_Store', 'images.csv', 'images', 'resnet50-embeddings-full.pkl', 'styles_preprocessed.csv', 'styles', 'styles.csv', '.ipynb_checkpoints', 'embeddings.csv', 'resnet50-embeddings.pkl']


In [3]:
df = pd.read_csv(DATASET_PATH + "styles_preprocessed.csv", error_bad_lines=False)
df['image'] = df.apply(lambda row: str(row['id']) + ".jpg", axis=1)
df = df.reset_index(drop=True)
df.head(10)

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011,Casual,Turtle Check Men Navy Blue Shirt,15970.jpg
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Peter England Men Party Blue Jeans,39386.jpg
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016,Casual,Titan Women Silver Watch,59263.jpg
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011,Casual,Manchester United Men Solid Black Track Pants,21379.jpg
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012,Casual,Puma Men Grey T-shirt,53759.jpg
5,1855,Men,Apparel,Topwear,Tshirts,Grey,Summer,2011,Casual,Inkfruit Mens Chain Reaction T-shirt,1855.jpg
6,30805,Men,Apparel,Topwear,Shirts,Green,Summer,2012,Ethnic,Fabindia Men Striped Green Shirt,30805.jpg
7,26960,Women,Apparel,Topwear,Shirts,Purple,Summer,2012,Casual,Jealous 21 Women Purple Shirt,26960.jpg
8,29114,Men,Accessories,Socks,Socks,Navy Blue,Summer,2012,Casual,Puma Men Pack of 3 Socks,29114.jpg
9,30039,Men,Accessories,Watches,Watches,Black,Winter,2016,Casual,Skagen Men Black Watch,30039.jpg


In [4]:
def get_img_path(img):
  return DATASET_PATH + "images/" + img

In [5]:
df_embs = pickle.load(open(DATASET_PATH + "resnet50-embeddings-full.pkl", "rb"))

In [6]:
# Filter only embeddings with master category 'Apparel' and save indices to filter for them later
df_filtered = df.loc[df.masterCategory == 'Apparel']
df_filtered_idx = df_filtered.index.values.tolist()
df_filtered = df_filtered.reset_index(drop=True)
df_filtered.masterCategory.unique(), df_filtered.shape, len(df_filtered_idx)

(array(['Apparel'], dtype=object), (21393, 11), 21393)

In [7]:
embs_filtered = []
for i, row in df_embs.iterrows():
    if i in  df_filtered_idx:
        embs_filtered.append(row)
df_embs_filtered = pd.DataFrame(embs_filtered)
df_embs_filtered = df_embs_filtered.reset_index(drop=True)
df_embs_filtered

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,5.661311,2.366158,0.000000,3.255738,0.845207,2.673839,8.780884,6.280785,0.787186,1.587661,...,2.463607,0.000000,1.825991,20.485271,3.902871,0.000000,8.225970,1.313863,0.000000,10.820534
1,5.790412,9.593652,0.000000,9.431687,0.287238,0.259950,17.028755,3.037456,0.136339,0.000000,...,1.670569,0.013894,10.338679,4.784732,0.189142,0.000000,7.739138,0.372693,0.000000,8.903797
2,5.882204,1.958622,0.000000,12.519884,0.000000,0.000000,9.383584,3.781623,0.000000,1.490620,...,17.825481,0.000000,3.592513,2.042306,0.000000,0.000000,6.656784,1.457313,1.694215,1.403330
3,0.245961,17.383436,0.493822,3.374461,2.567365,0.000000,6.482548,5.862635,0.000000,3.164864,...,4.464021,0.447082,1.448132,12.884141,0.000000,0.397487,5.028076,1.939697,0.000000,11.485396
4,0.028540,18.567101,0.000000,0.757250,1.504954,0.000000,1.982043,2.963786,1.072763,4.539298,...,3.247076,4.273134,3.088639,16.997049,0.000000,0.000000,2.258092,1.595798,0.000000,7.674057
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21388,0.000000,35.020271,7.460171,1.012842,1.366380,2.258118,1.922700,1.341036,1.293422,0.000000,...,4.813864,2.418916,1.957278,0.703634,6.915268,0.523176,4.877475,0.000000,0.000000,14.361758
21389,2.841607,15.895623,1.662815,2.716725,0.000000,0.727688,1.496747,0.472215,0.905088,3.890155,...,6.330880,2.010111,3.669139,13.017206,0.000000,0.749803,4.271129,1.611024,0.000000,9.624297
21390,3.243481,16.985628,1.560327,2.596826,0.545848,0.000000,9.023268,0.919529,1.380090,3.811848,...,13.571547,2.010174,1.121704,18.124388,1.862831,0.000000,1.212624,2.394026,3.093368,25.842484
21391,3.075987,6.170718,0.000000,0.242754,6.728174,0.000000,3.616792,1.160508,0.088385,1.244293,...,4.731680,1.630670,0.743359,15.961621,9.004664,0.551970,1.509514,0.000000,1.935909,13.389180


In [8]:
# Perform PCA over the embeddings to reduce dimensionality
num_feature_dimensions = 100  # Set the number of embedding dimensions
pca = PCA(n_components = num_feature_dimensions)
embs_compressed = pca.fit_transform(df_embs_filtered)
df_embs_filtered_compressed = pd.DataFrame(embs_compressed)
df_embs_filtered_compressed

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,-64.357330,2.010041,4.515400,48.358093,-19.129559,5.343149,48.397041,-27.092304,-30.360741,19.002281,...,-1.045795,7.806426,-1.543592,-7.379512,0.897222,-0.631809,-9.591692,-5.601133,0.066160,0.213388
1,-52.113586,-68.620972,-57.570976,-31.831814,53.722122,-23.362316,-2.748698,-17.556942,-21.903976,6.951294,...,-6.568780,-2.930577,-1.556583,-4.454517,-3.807970,-0.810698,2.823783,-0.550963,1.513241,-7.014592
2,-22.425549,-59.431023,-64.441757,-27.178450,47.945007,-31.331881,-1.548607,8.080978,-4.350989,-14.550542,...,-2.131486,-2.078046,-1.948719,-4.242862,-7.243876,1.015777,-13.728615,-5.745375,3.258711,3.961050
3,-54.588421,-30.280922,21.072227,-0.946355,-21.052837,-9.432900,-18.621923,3.578413,-16.154772,19.667294,...,1.145181,-5.982284,-0.907705,-6.121184,3.044176,0.285288,-6.524754,-8.342410,-9.483220,3.852769
4,-28.993317,-14.921981,54.522690,-20.936266,-14.654048,-7.483439,17.664339,19.237764,-10.021878,-5.284913,...,3.867297,2.302430,1.138552,3.322856,-9.495638,-5.037411,0.005897,6.639260,-5.325815,-0.020914
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21388,106.352806,-50.901730,48.033310,14.035436,4.876522,-4.716371,-13.125345,-2.849037,-0.469375,-25.319582,...,-9.299715,9.475740,6.190634,-2.480321,3.653558,11.617820,-5.822254,5.804242,-3.224222,2.742183
21389,-37.274277,-15.107412,56.033360,-37.821804,7.631861,-12.317935,25.639151,-17.738352,14.464298,-2.579217,...,0.003033,4.839504,6.868231,6.266203,1.181449,7.943367,12.269714,-4.839841,-2.580473,-0.929886
21390,-33.474823,26.566456,21.142511,-23.724096,21.569035,-24.361183,-19.552797,-66.617706,18.289919,-10.338766,...,5.356727,-3.027230,-2.347238,-0.049629,-0.107464,1.277156,-4.013589,-5.891430,-2.114031,0.592374
21391,1.716123,63.075356,7.019055,-0.876149,25.028723,17.429117,22.952635,-33.428246,-2.201575,8.034440,...,-11.033210,0.235250,-2.618758,-6.559300,7.157476,-7.545960,0.426204,-0.862922,-3.962469,11.233874


In [9]:
# Application of SVM to create the axis for a given feature
X = df_embs_filtered_compressed
y = df_filtered["season"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
svm_clf = LinearSVC(C=1, max_iter=10000000)
svm_clf.fit(X_scaled, y)

LinearSVC(C=1, max_iter=10000000)

In [10]:
number_of_embeddings = df_filtered.shape[0]

f = 100
t = AnnoyIndex(f, 'euclidean')
for index, row in df_embs_filtered_compressed.iterrows():
    v = row.iloc[row.index].values.tolist()
    t.add_item(index, v)

t.build(10)
#t.save('annoy_index_1000d.ann')

True

In [11]:
def get_nearest_neighbour(user_position, df):
   i = t.get_nns_by_vector(user_position, 1)
   nearest_neighbour = df.iloc[i]['id'].values[0]
   return nearest_neighbour

In [12]:
def annotate_nearest_neighbour(nearest_neighbour, df):
    arr_img = plt.imread(get_img_path(df.loc[df['id'] == nearest_neighbour].image.values[0]))
    plt.imshow(arr_img)

In [13]:
def compute_navigation_axis(emb, w, dist):
    nav_axis = []
    for i, feature in enumerate(emb):
        feature_val = emb[feature].values[0]
        step = w[i]
        axis = [feature_val]
        pos_val = feature_val
        neg_val = feature_val
        for i in range(dist):
            pos_val = pos_val + step
            neg_val = neg_val - step
            axis.append(pos_val)
            axis.append(neg_val)
        nav_axis.append(sorted(axis))
    return np.array(nav_axis)

In [14]:
def initialize_user_position(axis, idx):
    user_position = []
    for dim in axis:
        user_position.append(dim[idx])
    return user_position

In [15]:
def update_user_position(change):
    """Update the user position after the slider value has changed"""
    idx = np.where(np.isclose(nav_axis[0], min(nav_axis[0], key=lambda x:abs(x-change.new))))[0][0]
    new_user_pos = []
    for i in range(len(nav_axis)):
        new_user_pos.append(nav_axis[i][idx])
    user_position = new_user_pos
    nearest_neighbour = get_nearest_neighbour(user_position, df_filtered)
    annotate_nearest_neighbour(nearest_neighbour, df_filtered)
    plt.title('Nearest Embedding: {} with season: {}'.format(nearest_neighbour, df_filtered.loc[df_filtered['id'] == nearest_neighbour].season.values[0]))
    fig.canvas.draw()
    fig.canvas.flush_events()

In [16]:
def compute_user_position(change):
    idx = np.where(np.isclose(nav_axis[0], min(nav_axis[0], key=lambda x:abs(x-change))))[0][0]
    new_user_pos = []
    for i in range(len(nav_axis)):
        new_user_pos.append(nav_axis[i][idx])
    return new_user_pos

In [17]:
def explore_space(start, end):
    images = []
    for x_pos in range(start, end, 1):
        user_pos = compute_user_position(x_pos)
        nn = get_nearest_neighbour(user_pos, df_filtered)
        if nn not in images:
            images.append(nn)
    return images

In [None]:
fig = plt.figure(figsize=(16, 10))
images = explore_space(int(min(nav_axis[0])), int(max(nav_axis[0])))
columns = len(images)
rows = 1
id = 0
for i in range(1, columns*rows+1):
    img = plt.imread(get_img_path(df.loc[df['id'] == images[id]].image.values[0]))
    fig.add_subplot(rows, columns, i)
    plt.imshow(img, interpolation='nearest')
    id = id + 1
plt.show()

In [19]:
from ipywidgets import AppLayout, FloatSlider

# Create matplotlib figure for displaying fashion items
fig = plt.figure(figsize=(7, 7))

# Create orthogonal navigation axis
w = svm_clf.coef_[0]
rand_emb = df_embs_filtered_compressed.sample()
nav_axis = compute_navigation_axis(rand_emb, w, 1000)

starting_idx = np.where(np.isclose(nav_axis[0], min(nav_axis[0], key=lambda x:abs(x-min(nav_axis[0])))))[0][0]
user_position = initialize_user_position(nav_axis, starting_idx)

nearest_neighbour = get_nearest_neighbour(user_position, df_filtered)
annotate_nearest_neighbour(nearest_neighbour, df_filtered)

plt.title('Nearest Embedding: {} with season: {}'.format(nearest_neighbour, df_filtered.loc[df_filtered['id'] == nearest_neighbour].season.values[0]))

# Create Slider to navigate in embedding space
slider = FloatSlider(
    orientation="horizontal",
    description="x-Position:",
    value=user_position[0],
    min=min(nav_axis[0]),
    max=max(nav_axis[0])
)
slider.layout.margin = '0px 10% 0px 10%'
slider.layout.width = '40%'

slider.observe(update_user_position, names='value')

display(slider)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

FloatSlider(value=-259.21128008589614, description='x-Position:', layout=Layout(margin='0px 10% 0px 10%', widt…