In [None]:
# import libraries

from numpy import integer, unique, where, argmax
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from PIL import Image
import requests
from io import BytesIO
import pickle
import imageio as iio

from sklearn.mixture import GaussianMixture
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import Normalizer, StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from mlxtend.plotting import plot_decision_regions

In [None]:
def categorical_encoding(df, col_name):
    """
    This function converts categorical data in 
    dataframe to numerical values to fit in ML models
    """
    # Preprocess categorical columns
    col_np = df[[col_name]].to_numpy().ravel()

    # Integer encode
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(col_np)

    # Binary encode
    one_hot_encoder = OneHotEncoder(sparse = False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded)

    # print(one_hot_encoded.shape)

    return one_hot_encoded


def gen_features(df):
    """
    This function generate features from E-Weaver_data.csv's selected columns.
    Columns include: color, gender, brand, price, and each materials columns.
    """
    # Convert categorical features into numerical values
    # for col_name in ['color', 'gender', 'brand']:
    for col_name in ['gender', 'brand']:
        if 'features' not in locals():
            features = categorical_encoding(df, col_name)
        else:
            tmp = categorical_encoding(df, col_name)
            features = np.concatenate((features, tmp),axis=1) 

    # Get the column names for each materials
    columns = df.columns.tolist()
    composition_index = columns.index("composition")
    materials_columns = columns[composition_index+1:-1]

    # Convert material-dataframe to numpy array
    materials_np = df[materials_columns].to_numpy()
    price_np = df[['price']].to_numpy()

    # Add the rest of numerical features of 'price' and each individual materials
    features = np.concatenate((features, price_np), axis=1)
    features = np.concatenate((features, materials_np), axis=1)

    # Save the features as csv file
    # np.savetxt('./data/features.csv', features, delimiter=',')

    return features



In [None]:
def get_data(file_name):

  df = pd.read_csv(file_folder + file_name, index_col= [0])

  # df2 = df.sample(frac=1).reset_index(drop=True)

  return df

In [None]:
def minmax_split(df):
    scaler = MinMaxScaler(feature_range=(0,1))
    X_MinMaxScaled = scaler.fit_transform(df)

    length = len(df)
    split = length - 10

    X_train = X_MinMaxScaled[:split]
    X_test = X_MinMaxScaled[split:]

    return X_train, X_test

In [None]:
def norm_split(df):
    scaler2 = Normalizer().fit(df)
    X_normalized = scaler2.fit_transform(df)

    length = len(df)
    split = length - 10

    X_train = X_normalized[:split]
    X_test = X_normalized[split:]

    return X_train, X_test

Connect to G-Drive

In [None]:
# Run once

from google.colab import drive

drive.mount("/content/gdrive")
file_folder = "./gdrive/My Drive/data/"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
df = get_data('E-Weaver_data.csv')

In [None]:
df

Unnamed: 0,id,title,description,price,color,gender,imageUrl,brand,composition,linen,nylon,cotton,lyocell,lambskin,elastane,wool,viscose,polyamide,silk,acrylic,polyester,modal,polystyrene,rayon,spandex,cupro,repreve,hemp,tencel,fairtrade,viskose,kapok,sum
0,0,SMILEY ® HAPPY COLLECTION PRINTED SWEATSHIRT,Full cut short sleeve sweatshirt with round ne...,49.9,Black,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1,PRINTED TEXT T-SHIRT,Full cut T-shirt with round neck and short sle...,35.9,Black,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,2,SHIRT WITH PATCHES,Full cut T-shirt with round neck and short sle...,35.9,Black,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,3,“LOVE” PRINT T-SHIRT © PET SHOP BOYS PARTNERSHIP,Full cut T-shirt with round neck and short sle...,35.9,Black,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,4,TIE-DYE PRINT SHIRT,Relaxed fit tank top with scoop neckline.,19.9,Pink White,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3683,2979,Mens Long Sleeve V Neck,"Made in the USA, Los Angeles; Luxury finishing...",64.0,black,men,https://cdn.shopify.com/s/files/1/0038/1111/51...,The Classic T-shirt,"100% pre-shrunk organic cotton, GOTS certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3684,2980,Mens Long Sleeve V Neck,"Made in the USA, Los Angeles; Luxury finishing...",64.0,white,men,https://cdn.shopify.com/s/files/1/0038/1111/51...,The Classic T-shirt,"100% pre-shrunk organic cotton, GOTS certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3685,2981,Mens Long Sleeve V Neck,"Made in the USA, Los Angeles; Luxury finishing...",64.0,dark-grey,men,https://cdn.shopify.com/s/files/1/0038/1111/51...,The Classic T-shirt,"100% pre-shrunk organic cotton, GOTS certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3686,2982,Mens Long Sleeve V Neck,"Made in the USA, Los Angeles; Luxury finishing...",64.0,navy-blue,men,https://cdn.shopify.com/s/files/1/0038/1111/51...,The Classic T-shirt,"100% pre-shrunk organic cotton, GOTS certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


### Create a separate df that does NOT include "color" to test accuracy of other features
- color has too many variations

In [None]:
df2 = df.drop(["color"], axis=1)

In [None]:
df2

Unnamed: 0,id,title,description,price,gender,imageUrl,brand,composition,linen,nylon,cotton,lyocell,lambskin,elastane,wool,viscose,polyamide,silk,acrylic,polyester,modal,polystyrene,rayon,spandex,cupro,repreve,hemp,tencel,fairtrade,viskose,kapok,sum
0,0,SMILEY ® HAPPY COLLECTION PRINTED SWEATSHIRT,Full cut short sleeve sweatshirt with round ne...,49.9,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1,PRINTED TEXT T-SHIRT,Full cut T-shirt with round neck and short sle...,35.9,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,2,SHIRT WITH PATCHES,Full cut T-shirt with round neck and short sle...,35.9,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,3,“LOVE” PRINT T-SHIRT © PET SHOP BOYS PARTNERSHIP,Full cut T-shirt with round neck and short sle...,35.9,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,4,TIE-DYE PRINT SHIRT,Relaxed fit tank top with scoop neckline.,19.9,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3683,2979,Mens Long Sleeve V Neck,"Made in the USA, Los Angeles; Luxury finishing...",64.0,men,https://cdn.shopify.com/s/files/1/0038/1111/51...,The Classic T-shirt,"100% pre-shrunk organic cotton, GOTS certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3684,2980,Mens Long Sleeve V Neck,"Made in the USA, Los Angeles; Luxury finishing...",64.0,men,https://cdn.shopify.com/s/files/1/0038/1111/51...,The Classic T-shirt,"100% pre-shrunk organic cotton, GOTS certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3685,2981,Mens Long Sleeve V Neck,"Made in the USA, Los Angeles; Luxury finishing...",64.0,men,https://cdn.shopify.com/s/files/1/0038/1111/51...,The Classic T-shirt,"100% pre-shrunk organic cotton, GOTS certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3686,2982,Mens Long Sleeve V Neck,"Made in the USA, Los Angeles; Luxury finishing...",64.0,men,https://cdn.shopify.com/s/files/1/0038/1111/51...,The Classic T-shirt,"100% pre-shrunk organic cotton, GOTS certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [None]:
df2.shape

(2833, 32)

## Shuffle `df2` before splitting for optimal modeling and consistent `X_test` t-shirts

In [None]:
df2 = df2.sample(frac=1).reset_index(drop=True)

In [None]:
df2

Unnamed: 0,id,title,description,price,gender,imageUrl,brand,composition,linen,nylon,cotton,lyocell,lambskin,elastane,wool,viscose,polyamide,silk,acrylic,polyester,modal,polystyrene,rayon,spandex,cupro,repreve,hemp,tencel,fairtrade,viskose,kapok,sum
0,2162,TIE DYE BUTTERFLY LOGO S/S T-SHIRT (LIME/BLACK),cold water wash and tumble dry low,35.00,unisex,https://cdn.shopify.com/s/files/1/2380/6877/pr...,Chnge,100% GOTS Certified Organic Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2354,Bellbird Tee. 1101,Machine cold wash with like colours. Do not bl...,30.15,women,https://cdn.shopify.com/s/files/1/0559/3340/33...,little yellow bird,100% Organic Rain-Fed Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,460,WASHED RIB T-SHIRT,Round neck top with short sleeves. Rib trim.,12.90,women,https://static.zara.net/photos///2021/I/0/1/p/...,Zara,'100% cotton',0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2043,FRIEDA T-shirt,Up close - the argument for certified natural ...,14.68,women,https://www.livingcrafts.de/file/thumb/shop/ex...,Living Craft,100 % Cotton (organic),0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2789,TARAA CIRCULAR 1.5,30?? normal wash; Do not bleach; Dry cleaning ...,38.22,women,https://www.armedangels.com/media/1152x1659x90...,Armed Angels,"50% Cotton (recycled), 50% Lyocell (TENCEL?)",0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2828,2361,Whio Tee. 1002,Machine cold wash with like colours. Do not bl...,32.83,unisex,https://cdn.shopify.com/s/files/1/0559/3340/33...,little yellow bird,100% Organic Rain-Fed Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2829,2564,Louise Sleeveless Window Navy,"It's made in a clean, crisp jersey made with o...",70.00,women,https://cdn.nudiejeans.com/img/Louise-Sleevele...,Nudie Jeans,100% Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2830,1490,T-shirt Visby Blue Birds,"Women's crew neck T-shirt with short, folded s...",45.00,women,https://tshirtstore.centracdn.net/client/dynam...,Dedicated,"100% Cotton, Organic and Fairtrade-certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2831,2655,HOMEWAII T-SHIRT GREY,"There are approximately 12,110km as the crow f...",34.56,men,https://www.bleed-clothing.com/media/product/e...,bleed,100% Organic Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## Split into train/test

In [None]:
length = len(df2)
split = length - 10

X_train_ = df2[:split]
X_test_ = df2[split:]

In [None]:
X_train_

Unnamed: 0,id,title,description,price,gender,imageUrl,brand,composition,linen,nylon,cotton,lyocell,lambskin,elastane,wool,viscose,polyamide,silk,acrylic,polyester,modal,polystyrene,rayon,spandex,cupro,repreve,hemp,tencel,fairtrade,viskose,kapok,sum
0,2162,TIE DYE BUTTERFLY LOGO S/S T-SHIRT (LIME/BLACK),cold water wash and tumble dry low,35.00,unisex,https://cdn.shopify.com/s/files/1/2380/6877/pr...,Chnge,100% GOTS Certified Organic Cotton,0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2354,Bellbird Tee. 1101,Machine cold wash with like colours. Do not bl...,30.15,women,https://cdn.shopify.com/s/files/1/0559/3340/33...,little yellow bird,100% Organic Rain-Fed Cotton,0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,460,WASHED RIB T-SHIRT,Round neck top with short sleeves. Rib trim.,12.90,women,https://static.zara.net/photos///2021/I/0/1/p/...,Zara,'100% cotton',0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2043,FRIEDA T-shirt,Up close - the argument for certified natural ...,14.68,women,https://www.livingcrafts.de/file/thumb/shop/ex...,Living Craft,100 % Cotton (organic),0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2789,TARAA CIRCULAR 1.5,30?? normal wash; Do not bleach; Dry cleaning ...,38.22,women,https://www.armedangels.com/media/1152x1659x90...,Armed Angels,"50% Cotton (recycled), 50% Lyocell (TENCEL?)",0.0,0.0,0.50,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2818,2204,NOTORIOUS AOC (CHALK),cold water wash and tumble dry low,35.00,unisex,https://cdn.shopify.com/s/files/1/2380/6877/pr...,Chnge,100% GOTS Certified Organic Cotton,0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2819,1216,u AIRism cotton oversized crew neck t-shirt,"#The Paris design team, led by Christophe Lema...",14.90,men,https://image.uniqlo.com/UQ/ST3/WesternCommon/...,uniqlo,"53% Cotton, 47% Polyester",0.0,0.0,0.53,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2820,512,MICKEY MOUSE ©DISNEY T-SHIRT,Round neck T-shirt with short sleeves. Mickey ...,19.90,women,https://static.zara.net/photos///2021/I/0/1/p/...,Zara,'100% cotton',0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2821,903,Linen T-shirt,Straight-cut T-shirt in airy linen jersey. Low...,17.99,women,//lp2.hm.com/hmgoepprod?set=source[/e4/04/e404...,H&M,Linen 100%,1.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [None]:
X_test_

Unnamed: 0,id,title,description,price,gender,imageUrl,brand,composition,linen,nylon,cotton,lyocell,lambskin,elastane,wool,viscose,polyamide,silk,acrylic,polyester,modal,polystyrene,rayon,spandex,cupro,repreve,hemp,tencel,fairtrade,viskose,kapok,sum
2823,109,CONTRASTING STRIPED T-SHIRT,Full cut shirt with round neck and short sleeves.,45.9,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'70% cotton · 30% linen',0.3,0.0,0.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2824,2064,IDONY T-shirt,"This unbeatably comfortable, soft T-shirt is t...",22.59,women,https://www.livingcrafts.de/file/thumb/shop/ex...,Living Craft,52 % cotton (organic); 48 % viscose (bamboo),0.0,0.0,0.52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2825,1861,Women's Organic Long Sleeve Scoop Neck Tee,A sustainable minimalist closet starts with go...,43.9,women,https://cdn.shopify.com/s/files/1/0028/2525/70...,fair indigo,95% organic Pima cotton 5% spandex,0.0,0.0,0.95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2826,2584,Kevin Basic Round Neck Tee Cloud Dancer,Kevin is our loose and lightweight basic t-shi...,39.9,men,https://cdn.shopify.com/s/files/1/0267/8774/17...,KUYICHI,100% Organic Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2827,276,SEMI-SHEER TOP,Semi-sheer top with round neck and long sleeves.,19.9,women,https://static.zara.net/photos///2021/I/0/1/p/...,Zara,'53% viscose · 47% polyester',0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.53,0.0,0.0,0.0,0.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2828,2361,Whio Tee. 1002,Machine cold wash with like colours. Do not bl...,32.83,unisex,https://cdn.shopify.com/s/files/1/0559/3340/33...,little yellow bird,100% Organic Rain-Fed Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2829,2564,Louise Sleeveless Window Navy,"It's made in a clean, crisp jersey made with o...",70.0,women,https://cdn.nudiejeans.com/img/Louise-Sleevele...,Nudie Jeans,100% Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2830,1490,T-shirt Visby Blue Birds,"Women's crew neck T-shirt with short, folded s...",45.0,women,https://tshirtstore.centracdn.net/client/dynam...,Dedicated,"100% Cotton, Organic and Fairtrade-certified",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2831,2655,HOMEWAII T-SHIRT GREY,"There are approximately 12,110km as the crow f...",34.56,men,https://www.bleed-clothing.com/media/product/e...,bleed,100% Organic Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2832,1134,u crew neck short-sleeve t-shirt,#Part of a collection of progressive essential...,19.9,men,https://image.uniqlo.com/UQ/ST3/WesternCommon/...,uniqlo,100% Cotton,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## Get Original Image VGG16 features

In [None]:
file_name2 = 'flatten_matrix_df.pkl'
file_folder = "./gdrive/My Drive/data/"

In [None]:
with open(file_folder + file_name2, 'rb') as openfile:
    matrix = pickle.load(openfile)

In [None]:
matrix.shape

(2833, 41473)

In [None]:
matrix.iloc[0]['id']

0.0

In [None]:
matrix.iloc[-1]['id']

2832.0

In [None]:
# Convert to df

og_im_df = pd.DataFrame(matrix)

In [None]:
og_im_df

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,...,41432,41433,41434,41435,41436,41437,41438,41439,41440,41441,41442,41443,41444,41445,41446,41447,41448,41449,41450,41451,41452,41453,41454,41455,41456,41457,41458,41459,41460,41461,41462,41463,41464,41465,41466,41467,41468,41469,41470,41471
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.854774,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,2.314921,0.000000,0.0,4.468711,0.000000,0.000000,0.0,0.061595,0.0,0.000000,0.0,5.119471,0.0,0.000000,0.0,0.0,5.168469,0.0,0.0,0.0,0.0
1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.910776,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.333467,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.257058,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,8.232752,0.000000,0.000000,0.0,0.686097,0.0,0.000000,0.0,8.797201,0.0,0.000000,0.0,0.0,2.473982,0.0,0.0,0.0,0.0
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.632312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.517718,0.970705,0.0,9.987722,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,3.873752,0.0,1.398927,0.0,0.0,6.997279,0.0,0.0,0.0,0.0
3,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.026177,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.242938,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.660896,3.172983,0.0,13.140417,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,13.082182,0.0,0.000000,0.0,0.0,0.582830,0.0,0.0,0.0,0.0
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,2.210988,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.828314,0.0,0.979642,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2828,2828,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.525218,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.526378,0.000000,0.0,0.0,0.0,7.159697,0.000000,0.000000,0.0,11.863145,6.841841,5.218503,0.0,0.000000,0.0,6.371971,0.0,0.000000,0.0,0.000000,0.0,0.0,6.095173,0.0,0.0,0.0,0.0
2829,2829,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.614925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,3.554946,0.0,0.0,0.0,1.374541,0.000000,0.000000,0.0,0.000000,7.331583,0.078062,0.0,0.000000,0.0,7.518927,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2830,2830,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.349672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,4.025370,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,8.374681,0.0,5.458471,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2831,2831,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.771840,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,10.601613,0.0,2.627762,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


## Get Masked Image VGG16 features

In [None]:
file_name3 = 'flatten_matrix_df_w_masked.pkl'

In [None]:
with open(file_folder + file_name3, 'rb') as openfile:
    matrix2 = pickle.load(openfile)

In [None]:
matrix2.shape

(1419, 41473)

In [None]:
# Convert to df

mask_im_df = pd.DataFrame(matrix2)

In [None]:
mask_im_df

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,...,41432,41433,41434,41435,41436,41437,41438,41439,41440,41441,41442,41443,41444,41445,41446,41447,41448,41449,41450,41451,41452,41453,41454,41455,41456,41457,41458,41459,41460,41461,41462,41463,41464,41465,41466,41467,41468,41469,41470,41471
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,1.057625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,1.057625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,1.057625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,1.057625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,1.057625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1414,2970,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,2.105223,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1415,2971,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,1.574899,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1416,2972,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,2.656899,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1417,2973,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,2.098136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
mask_im_df['id'] = mask_im_df['id'].astype(int)

## Model Pipeline
1. Shuffle df
2. Get features (encoding non-numerical features)
3. Scale data (MinMax, Normalize)
  -   Rescale data - useful for algorithms that use distance measures like K-Nearest Neighbors.
4. Fit model
5. Locate results

## Model Iterations

### Model 1 - Features Only
- MinMax Scaler
- NearestNeigbors = 10
- cosine similarity
- with 'color'

In [None]:
# MODEL 1
# use df2

X1 = gen_features(df2)
X1_min_train, X1_min_test = minmax_split(X1)
X1_test = X1_min_test[0]
model1 = NearestNeighbors(n_neighbors=10, metric = 'cosine')
model1.fit(X1_min_train)

distances, indices = model1.kneighbors(X1_test.reshape(1, -1))

print("These are the distances: " + str(distances))
print("Here are the nearest neighbors" + str(indices))

These are the distances: [[0.00049512 0.01336052 0.02612338 0.02897846 0.02897846 0.02897846
  0.02906612 0.02906612 0.02906612 0.02906612]]
Here are the nearest neighbors[[1785  610  470 1576  244 2680 2698 2246 2768  950]]


Test Image

In [None]:
image = df2.iloc[2823]['imageUrl']

In [None]:
image

'https://static.zara.net/photos///2021/I/0/2/p/7545/261/250/2/w/563/7545261250_1_1_1.jpg?ts=1626871751046'

In [None]:
indices[0][0]

1785

In [None]:
len(indices[0])

10

In [None]:
i = 0
while i < 10:
  nbr = indices[0][i]
  url = df2.iloc[nbr]['imageUrl']
  print(url)
  i += 1


https://static.zara.net/photos///2021/I/0/2/p/0722/469/086/2/w/563/0722469086_1_1_1.jpg?ts=1622020396714
https://static.zara.net/photos///2021/I/0/2/p/5372/304/800/2/w/563/5372304800_1_1_1.jpg?ts=1623413099291
https://static.zara.net/photos///2021/I/0/2/p/9240/303/803/2/w/563/9240303803_1_1_1.jpg?ts=1630059331922
https://static.zara.net/photos///2021/I/0/2/p/6917/301/903/2/w/563/6917301903_1_1_1.jpg?ts=1623404231494
https://static.zara.net/photos///2021/I/0/2/p/0840/306/251/2/w/563/0840306251_1_1_1.jpg?ts=1625476674298
https://static.zara.net/photos///2021/I/0/2/p/4087/314/800/22/w/563/4087314800_1_1_1.jpg?ts=1629124839549
https://static.zara.net/photos///2021/I/0/2/p/3332/304/251/82/w/563/3332304251_1_1_1.jpg?ts=1633506838211
https://static.zara.net/photos///2021/I/0/2/p/6917/314/250/2/w/563/6917314250_1_1_1.jpg?ts=1623851650322
https://static.zara.net/photos///2021/I/0/2/p/3284/300/802/2/w/563/3284300802_1_1_1.jpg?ts=1626865154810
https://static.zara.net/photos///2021/I/0/2/p/3597/42

### Model 2 - Features with original photos (non-VGG16)
- MinMax Scaler
- NearestNeigbors = 10
- cosine similarity
- with 'color'

Combine flattened photos to `df2` to run through model

Modeling - Model 2

In [None]:
# MODEL 2
# use df2

X2 = gen_features(df2)
X2_min_train, X2_min_test = minmax_split(X2)
X2_test = X2_min_test[0]
model2 = NearestNeighbors(n_neighbors=10, metric = 'cosine')
model2.fit(X2_min_train)

distances2, indices2 = model1.kneighbors(X2_test.reshape(1, -1))

print("These are the distances: " + str(distances))
print("Here are the nearest neighbors" + str(indices))

### Model 3 - Features with original photos (VGG16 feat)
- MinMax Scaler
- NearestNeigbors = 10
- cosine similarity
- with 'color'

Combine flattened photos to `df2` to run through model

In [None]:
df_model3 = pd.merge(df2, og_im_df, on='id')

Modeling - Model 3

In [None]:
# MODEL 3
# use df2

X3 = gen_features(df_model3)
X3_min_train, X3_min_test = minmax_split(X3)
X3_test = X3_min_test[0]
model3 = NearestNeighbors(n_neighbors=10, metric = 'cosine')
model3.fit(X3_min_train)

distances3, indices3 = model3.kneighbors(X3_test.reshape(1, -1))

print("These are the distances: " + str(distances3))
print("Here are the nearest neighbors" + str(indices3))

These are the distances: [[0.50833471 0.51447073 0.52731125 0.53094632 0.53432296 0.54036986
  0.54341057 0.54836499 0.55079138 0.55229588]]
Here are the nearest neighbors[[  55 2001  117 1806 2049 1054 2627  215 1114  954]]


In [None]:
X3_test

array([0., 0., 1., ..., 0., 0., 0.])

In [None]:
i = 0
while i < 10:
  nbr = indices3[0][i]
  url = df2.iloc[nbr]['imageUrl']
  print(url)
  i += 1

https://image.uniqlo.com/UQ/ST3/WesternCommon/imagesgoods/433037/item/goods_09_433037.jpg?width=380
https://www.armedangels.com/media/1234x1777x90/55/a7/00/1626320294/minaa-ruby-red.jpg
https://www.armedangels.com/media/1234x1778x90/b1/c1/a0/1644469174/antoniaa-white.jpg
https://cdn.shopify.com/s/files/1/0559/3340/3309/products/308_Image2_381475c3-aeb7-42df-ab32-99a2f312ef97_5000x.jpg?v=1631844010
//lp2.hm.com/hmgoepprod?set=source[/d9/ad/d9adb9717e701a42232d1cdcf604e7978ccc27c8.jpg],origin[dam],category[],type[DESCRIPTIVESTILLLIFE],res[m],hmver[2]&call=url[file:/product/style]
https://image.uniqlo.com/UQ/ST3/WesternCommon/imagesgoods/435087/item/goods_69_435087.jpg?width=380
//lp2.hm.com/hmgoepprod?set=source[/04/29/0429cf3b196dfd8376e9cfc4ce6170ca6a17c039.jpg],origin[dam],category[men_tshirtstanks_shortsleeve],type[DESCRIPTIVESTILLLIFE],res[m],hmver[2]&call=url[file:/product/style]
https://cdn.shopify.com/s/files/1/2341/3995/products/TCW3563-20174.jpg?v=1627600850
https://cdn.shopify

### Model 4 - Features with masked photos (VGG16 feat)
- MinMax Scaler
- NearestNeigbors = 10
- cosine similarity
- with 'color'

Combine flattened photos to `df2` to run through model

In [None]:
df_model4 = pd.merge(df2, mask_im_df, on='id')

In [None]:
df_model4

Unnamed: 0,id,title,description,price,gender,imageUrl,brand,composition,linen,nylon,cotton,lyocell,lambskin,elastane,wool,viscose,polyamide,silk,acrylic,polyester,modal,polystyrene,rayon,spandex,cupro,repreve,hemp,tencel,fairtrade,viskose,kapok,sum,0,1,2,3,4,5,6,7,...,41432,41433,41434,41435,41436,41437,41438,41439,41440,41441,41442,41443,41444,41445,41446,41447,41448,41449,41450,41451,41452,41453,41454,41455,41456,41457,41458,41459,41460,41461,41462,41463,41464,41465,41466,41467,41468,41469,41470,41471
0,2354,Bellbird Tee. 1101,Machine cold wash with like colours. Do not bl...,30.15,women,https://cdn.shopify.com/s/files/1/0559/3340/33...,little yellow bird,100% Organic Rain-Fed Cotton,0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,1.513436,0.0,3.609580,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000
1,460,WASHED RIB T-SHIRT,Round neck top with short sleeves. Rib trim.,12.90,women,https://static.zara.net/photos///2021/I/0/1/p/...,Zara,'100% cotton',0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,1.057625,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000
2,2789,TARAA CIRCULAR 1.5,30?? normal wash; Do not bleach; Dry cleaning ...,38.22,women,https://www.armedangels.com/media/1152x1659x90...,Armed Angels,"50% Cotton (recycled), 50% Lyocell (TENCEL?)",0.0,0.0,0.50,0.5,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.842897,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.957482,0.0,0.000000,0.0,3.629300,0.0,0.379108,0.0,0.0,0.0,0.0,4.129673,0.0,2.135918
3,1589,T-shirt Stockholm Wild At Heart Charcoal,"The perfect crew neck T-shirt, made sustainabl...",39.00,men,https://tshirtstore.centracdn.net/client/dynam...,Dedicated,"100% Cotton, Organic and Fairtrade-certified",0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008774,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,1.448632,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000
4,163,COMBINATION PRINT T-SHIRT,Full cut round neck T-shirt with short sleeves...,25.90,men,https://static.zara.net/photos///2021/I/0/2/p/...,Zara,'100% cotton',0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,1.057625,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1414,2064,IDONY T-shirt,"This unbeatably comfortable, soft T-shirt is t...",22.59,women,https://www.livingcrafts.de/file/thumb/shop/ex...,Living Craft,52 % cotton (organic); 48 % viscose (bamboo),0.0,0.0,0.52,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.00,0.0,0.0,0.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,1.691841,0.0,1.554064,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000
1415,276,SEMI-SHEER TOP,Semi-sheer top with round neck and long sleeves.,19.90,women,https://static.zara.net/photos///2021/I/0/1/p/...,Zara,'53% viscose · 47% polyester',0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.53,0.0,0.0,0.0,0.47,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,1.057625,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000
1416,2564,Louise Sleeveless Window Navy,"It's made in a clean, crisp jersey made with o...",70.00,women,https://cdn.nudiejeans.com/img/Louise-Sleevele...,Nudie Jeans,100% Cotton,0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,3.641372,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000
1417,1490,T-shirt Visby Blue Birds,"Women's crew neck T-shirt with short, folded s...",45.00,women,https://tshirtstore.centracdn.net/client/dynam...,Dedicated,"100% Cotton, Organic and Fairtrade-certified",0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.164962,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,1.046350,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000


In [None]:
df_len = len(df_model4)
df_len

1419

In [None]:
df_test_ = df_model4[1409:]

In [None]:
df_test_.iloc[0]

id                                                          1886
title              Women's Organic Long Sleeve Crew Neck T-shirt
description    A quintessential staple in any organic wardrob...
price                                                       43.9
gender                                                     women
                                     ...                        
41467                                                        0.0
41468                                                        0.0
41469                                                   0.190302
41470                                                        0.0
41471                                                        0.0
Name: 1409, Length: 41504, dtype: object

Modeling - Model 4

In [None]:
# MODEL 4
# use df2

X4 = gen_features(df_model4)
X4_min_train, X4_min_test = minmax_split(X4)
X4_test = X4_min_test[4]
model4 = NearestNeighbors(n_neighbors=10, metric = 'cosine')
model4.fit(X4_min_train)

distances4, indices4 = model4.kneighbors(X4_test.reshape(1, -1))

print("These are the distances: " + str(distances4))
print("Here are the nearest neighbors" + str(indices4))

These are the distances: [[0.2057602  0.22159589 0.23616615 0.24315281 0.24608031 0.25395725
  0.25431063 0.25500695 0.25574622 0.26260827]]
Here are the nearest neighbors[[ 772  935 1388    4  220   54  636  978  539  753]]


In [None]:
i = 0
while i < 10:
  nbr = indices4[0][i]
  url = df_model4.iloc[nbr]['imageUrl']
  print(url)
  i += 1

https://static.zara.net/photos///2021/I/0/2/p/6224/315/251/2/w/563/6224315251_1_1_1.jpg?ts=1627039627628
https://static.zara.net/photos///2021/I/0/2/p/4087/301/806/12/w/563/4087301806_1_1_1.jpg?ts=1626187215882
https://static.zara.net/photos///2021/I/0/2/p/6224/306/708/22/w/563/6224306708_1_1_1.jpg?ts=1626187430605
https://static.zara.net/photos///2021/I/0/2/p/0962/809/805/2/w/563/0962809805_1_1_1.jpg?ts=1625069457485
https://static.zara.net/photos///2021/I/0/2/p/6917/327/251/2/w/563/6917327251_1_1_1.jpg?ts=1627473703473
https://static.zara.net/photos///2021/I/0/2/p/0962/800/923/2/w/563/0962800923_1_1_1.jpg?ts=1625068055099
https://static.zara.net/photos///2021/I/0/2/p/0722/303/920/2/w/563/0722303920_1_1_1.jpg?ts=1629994476600
https://static.zara.net/photos///2021/I/0/2/p/1259/307/250/2/w/563/1259307250_1_1_1.jpg?ts=1628069723759
https://static.zara.net/photos///2021/I/0/2/p/0495/300/250/2/w/563/0495300250_1_1_1.jpg?ts=1625831651478
https://static.zara.net/photos///2021/I/0/2/p/0526/33