In [7]:
import pandas as pd
import urllib.request
import os

In [30]:
filtered_shoes = pd.read_csv('filtered_shoes_1.csv')
df = filtered_shoes[:10000]

In [24]:
base_dir = os.path.join(os.getcwd())
images_dir = os.path.join(base_dir, 'images')

In [None]:
error_indices = {}
for index, row in df.iterrows():
    try:
        print(index)
        picture_file = os.path.join(images_dir, str(row['id']) + '.jpg')
        urllib.request.urlretrieve(row['image_url'], picture_file)
    except Exception as e:
        print('error: ', index)
        error_indices[index] = row['id']

In [34]:
import tensorflow_hub as hub
from annoy import AnnoyIndex
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
import tensorflow_hub as hub
# For saving 'feature vectors' into a txt file
import numpy as np
# Glob for reading file names in a folder
import glob
import os.path
from tqdm import tqdm
import ntpath
import warnings
import cv2
from annoy import AnnoyIndex
from scipy import spatial
warnings.filterwarnings('ignore')
from operator import itemgetter

In [35]:
def load_img(path):
    # Reads the image file and returns data type of string
    img = tf.io.read_file(path)
    # Decodes the image to W x H x 3 shape tensor with type of uint8
    img = tf.io.decode_jpeg(img, channels=3)
    # Resizes the image to 224 x 224 x 3 shape tensor
    img = tf.image.resize_with_pad(img, 224, 224)
    # Converts the data type of uint8 to float32 by adding a new axis
    # img becomes 1 x 224 x 224 x 3 tensor with data type of float32
    # This is required for the mobilenet model we are using
    img = tf.image.convert_image_dtype(img,tf.float32)[tf.newaxis, ...]

    return img

In [36]:
def match_id(filename):
    product_id = '_'.join(filename.split('_')[:-1])
    return product_id

In [38]:
def get_features(img, image_path): 
     # Definition of module with using tfhub.dev
    module_handle = "https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4"
    # Loads the module
    module = hub.load(module_handle)
    features = module(img)
    # Remove single-dimensional entries from the 'features' array  
    feature_set = np.squeeze(features)
    #print(image_path)

    # Saves the image feature vectors into a file for later use
    outfile_name = os.path.basename(image_path) + ".npz"

    out_path = os.path.join('C:/Users/Aakash Atnoorkar/Documents/ADM/Final Project/images/imagesScraped/',outfile_name)
    # Saves the 'feature_set' to a text file
    np.savetxt(out_path, feature_set, delimiter=',')
    vector = np.loadtxt(out_path)
    #image_style_embeddings[ntpath.basename(image_path)] = vector
    return vector

In [None]:
image_paths= glob.glob('C:\\Users\\Aakash Atnoorkar\\Documents\\ADM\\Final Project\\images\\*.jpg')
#vectorfiles = glob.glob('C:\\Users\\Aakash Atnoorkar\\Documents\\ADM\\Assignment 3\\images_test\\ImagesScraped\\*.npz')
# compute styles
image_style_embeddings = {}


for image_path in tqdm(image_paths): 
    image_tensor = load_img(image_path)
    vector = get_features(image_tensor, image_path)
    image_style_embeddings[ntpath.basename(image_path)] = vector

In [57]:
def cluster():
    print("-------Annoy Index Generation----------------")
    # Defining data structures as empty dict
    file_index_to_file_name = {}
    file_index_to_file_vector = {}
    file_index_to_product_id = {}
    # Configuring annoy parameters
    dims = 1792
    n_nearest_neighbors = 20
    trees = 10000
    # Reads all file names which stores feature vectors
    allfiles = glob.glob('C:\\Users\\Aakash Atnoorkar\\Documents\\ADM\\Final Project\\images\\imagesScraped\\*.npz')

    t = AnnoyIndex(dims, metric='angular')
    
    for file_index, i in enumerate(allfiles):
    # Reads feature vectors and assigns them into the file_vector 
        file_vector = np.loadtxt(i)

        # Assigns file_name, feature_vectors and corresponding product_id
        file_name = os.path.basename(i).split('.')[0]
        print(file_name)
        file_index_to_file_name[file_index] = file_name
        file_index_to_file_vector[file_index] = file_vector
        file_index_to_product_id[file_index] = file_name

        # Adds image feature vectors into annoy index   
        t.add_item(file_index, file_vector)

        print("---------------------------------")
        print("Annoy index     : %s" %file_index)
        print("Image file name : %s" %file_name)
        print("Product id      : %s" %file_index_to_product_id[file_index])
        #print("--- %.2f minutes passed ---------" % ((time.time() - start_time)/60))


    # Builds annoy index
    t.build(trees)

    print ("Step.1 - ANNOY index generation - Finished")
    print ("Step.2 - Similarity score calculation - Started ") 
  
    global named_nearest_neighbors
    named_nearest_neighbors = []

  # Loops through all indexed items
    for i in file_index_to_file_name.keys():

    # Assigns master file_name, image feature vectors and product id values
        master_file_name = file_index_to_file_name[i]
        master_vector = file_index_to_file_vector[i]
        master_product_id = file_index_to_product_id[i]

        # Calculates the nearest neighbors of the master item
        nearest_neighbors = t.get_nns_by_item(i, n_nearest_neighbors)

        # Loops through the nearest neighbors of the master item
        for j in nearest_neighbors:

            # Assigns file_name, image feature vectors and product id values of the similar item
            neighbor_file_name = file_index_to_file_name[j]
            neighbor_file_vector = file_index_to_file_vector[j]
            neighbor_product_id = file_index_to_product_id[j]

            # Calculates the similarity score of the similar item
            similarity = 1 - spatial.distance.cosine(master_vector, neighbor_file_vector)
            rounded_similarity = int((similarity * 10000)) / 10000.0

            # Appends master product id with the similarity score 
            # and the product id of the similar items
            named_nearest_neighbors.append({
            'similarity': rounded_similarity,
            'master_pi': master_product_id,
            'similar_pi': neighbor_product_id})
#     print(named_nearest_neighbors)
#     print(type(named_nearest_neighbors))
            
#     return named_nearest_neighbors

In [None]:
image_paths = glob.glob('C:\\Users\\Aakash Atnoorkar\\Documents\\ADM\\Final Project\\images\\*.jpg')
print(f'Founnd [{len(image_paths)}] images')

images = []
index = 1
for image_path in image_paths:
    image = cv2.imread(image_path, 3)
    b,g,r = cv2.split(image)           # get b, g, r
    image = cv2.merge([r,g,b])         # switch it to r, g, b
    image = cv2.resize(image, (200, 200))
    product_id = ntpath.basename(image_path).split('.')[0]
    images.append({
      'image_path' : ntpath.basename(image_path),
        'image' : image,
       'product_id':product_id
    })
    print(index)
    index += 1

In [58]:
cluster()

-------Annoy Index Generation----------------
AV-9hndsuC1rwyj_g2bb
---------------------------------
Annoy index     : 0
Image file name : AV-9hndsuC1rwyj_g2bb
Product id      : AV-9hndsuC1rwyj_g2bb
AV-9hpHjYSSHbkXwpyGV
---------------------------------
Annoy index     : 1
Image file name : AV-9hpHjYSSHbkXwpyGV
Product id      : AV-9hpHjYSSHbkXwpyGV
AV-9hqJYuC1rwyj_g2c_
---------------------------------
Annoy index     : 2
Image file name : AV-9hqJYuC1rwyj_g2c_
Product id      : AV-9hqJYuC1rwyj_g2c_
AV-9hriAuC1rwyj_g2dv
---------------------------------
Annoy index     : 3
Image file name : AV-9hriAuC1rwyj_g2dv
Product id      : AV-9hriAuC1rwyj_g2dv
AV-9hsUluC1rwyj_g2eA
---------------------------------
Annoy index     : 4
Image file name : AV-9hsUluC1rwyj_g2eA
Product id      : AV-9hsUluC1rwyj_g2eA
AV-9id7QYSSHbkXwpyLI
---------------------------------
Annoy index     : 5
Image file name : AV-9id7QYSSHbkXwpyLI
Product id      : AV-9id7QYSSHbkXwpyLI
AV-9iKxlYSSHbkXwpyKu
---------------

AV-ncL2WuC1rwyj_gemu
---------------------------------
Annoy index     : 66
Image file name : AV-ncL2WuC1rwyj_gemu
Product id      : AV-ncL2WuC1rwyj_gemu
AV-ncM9VHh53nbDR_VMe
---------------------------------
Annoy index     : 67
Image file name : AV-ncM9VHh53nbDR_VMe
Product id      : AV-ncM9VHh53nbDR_VMe
AV-ncPiuuC1rwyj_gepk
---------------------------------
Annoy index     : 68
Image file name : AV-ncPiuuC1rwyj_gepk
Product id      : AV-ncPiuuC1rwyj_gepk
AV-ncQLSuC1rwyj_geqO
---------------------------------
Annoy index     : 69
Image file name : AV-ncQLSuC1rwyj_geqO
Product id      : AV-ncQLSuC1rwyj_geqO
AV-ncqVLYSSHbkXwpaec
---------------------------------
Annoy index     : 70
Image file name : AV-ncqVLYSSHbkXwpaec
Product id      : AV-ncqVLYSSHbkXwpaec
AV-ncrrVuC1rwyj_ge34
---------------------------------
Annoy index     : 71
Image file name : AV-ncrrVuC1rwyj_ge34
Product id      : AV-ncrrVuC1rwyj_ge34
AV-nct2MuC1rwyj_ge4_
---------------------------------
Annoy index     : 72


Image file name : AVpf51qXilAPnD_xkSHI
Product id      : AVpf51qXilAPnD_xkSHI
AVpf566silAPnD_xkTWM
---------------------------------
Annoy index     : 126
Image file name : AVpf566silAPnD_xkTWM
Product id      : AVpf566silAPnD_xkTWM
AVpfaMRwLJeJML438uZ4
---------------------------------
Annoy index     : 127
Image file name : AVpfaMRwLJeJML438uZ4
Product id      : AVpfaMRwLJeJML438uZ4
AVpfArORLJeJML430Zbv
---------------------------------
Annoy index     : 128
Image file name : AVpfArORLJeJML430Zbv
Product id      : AVpfArORLJeJML430Zbv
AVpfb1BSilAPnD_xbuFf
---------------------------------
Annoy index     : 129
Image file name : AVpfb1BSilAPnD_xbuFf
Product id      : AVpfb1BSilAPnD_xbuFf
AVpfBEW3LJeJML430iQ5
---------------------------------
Annoy index     : 130
Image file name : AVpfBEW3LJeJML430iQ5
Product id      : AVpfBEW3LJeJML430iQ5
AVpfBgBmLJeJML430sYG
---------------------------------
Annoy index     : 131
Image file name : AVpfBgBmLJeJML430sYG
Product id      : AVpfBgBmLJeJM

AVpfs1zNilAPnD_xgqjh
---------------------------------
Annoy index     : 185
Image file name : AVpfs1zNilAPnD_xgqjh
Product id      : AVpfs1zNilAPnD_xgqjh
AVpfSPKr1cnluZ0-hzpz
---------------------------------
Annoy index     : 186
Image file name : AVpfSPKr1cnluZ0-hzpz
Product id      : AVpfSPKr1cnluZ0-hzpz
AVpfstKl1cnluZ0-poJ4
---------------------------------
Annoy index     : 187
Image file name : AVpfstKl1cnluZ0-poJ4
Product id      : AVpfstKl1cnluZ0-poJ4
AVpfSwNzilAPnD_xY_tB
---------------------------------
Annoy index     : 188
Image file name : AVpfSwNzilAPnD_xY_tB
Product id      : AVpfSwNzilAPnD_xY_tB
AVpfTMqFLJeJML436lYL
---------------------------------
Annoy index     : 189
Image file name : AVpfTMqFLJeJML436lYL
Product id      : AVpfTMqFLJeJML436lYL
AVpftPCILJeJML43CPOU
---------------------------------
Annoy index     : 190
Image file name : AVpftPCILJeJML43CPOU
Product id      : AVpftPCILJeJML43CPOU
AVpftVqF1cnluZ0-pz7V
---------------------------------
Annoy index    

AVpgzk2v1cnluZ0-6jnn
---------------------------------
Annoy index     : 243
Image file name : AVpgzk2v1cnluZ0-6jnn
Product id      : AVpgzk2v1cnluZ0-6jnn
AVpgZUfE1cnluZ0-05Om
---------------------------------
Annoy index     : 244
Image file name : AVpgZUfE1cnluZ0-05Om
Product id      : AVpgZUfE1cnluZ0-05Om
AVph1d5VilAPnD_x926A
---------------------------------
Annoy index     : 245
Image file name : AVph1d5VilAPnD_x926A
Product id      : AVph1d5VilAPnD_x926A
AVph2NUtilAPnD_x9_W_
---------------------------------
Annoy index     : 246
Image file name : AVph2NUtilAPnD_x9_W_
Product id      : AVph2NUtilAPnD_x9_W_
AVph34uzilAPnD_x-STz
---------------------------------
Annoy index     : 247
Image file name : AVph34uzilAPnD_x-STz
Product id      : AVph34uzilAPnD_x-STz
AVph3mXu1cnluZ0-HT3k
---------------------------------
Annoy index     : 248
Image file name : AVph3mXu1cnluZ0-HT3k
Product id      : AVph3mXu1cnluZ0-HT3k
AVph4acGLJeJML43f4tN
---------------------------------
Annoy index    

AVpiLDnNLJeJML43jDEw
---------------------------------
Annoy index     : 311
Image file name : AVpiLDnNLJeJML43jDEw
Product id      : AVpiLDnNLJeJML43jDEw
AVpiLL4XilAPnD_xBj89
---------------------------------
Annoy index     : 312
Image file name : AVpiLL4XilAPnD_xBj89
Product id      : AVpiLL4XilAPnD_xBj89
AVpimk8HLJeJML43nQKh
---------------------------------
Annoy index     : 313
Image file name : AVpimk8HLJeJML43nQKh
Product id      : AVpimk8HLJeJML43nQKh
AVpiO-zZLJeJML43jrey
---------------------------------
Annoy index     : 314
Image file name : AVpiO-zZLJeJML43jrey
Product id      : AVpiO-zZLJeJML43jrey
AVpir8tB1cnluZ0-PtRb
---------------------------------
Annoy index     : 315
Image file name : AVpir8tB1cnluZ0-PtRb
Product id      : AVpir8tB1cnluZ0-PtRb
AVpirB6K1cnluZ0-PkAG
---------------------------------
Annoy index     : 316
Image file name : AVpirB6K1cnluZ0-PkAG
Product id      : AVpirB6K1cnluZ0-PkAG
AVpiVskEilAPnD_xDNJ2
---------------------------------
Annoy index    

AV_EezrkuC1rwyj_hCoE
---------------------------------
Annoy index     : 376
Image file name : AV_EezrkuC1rwyj_hCoE
Product id      : AV_EezrkuC1rwyj_hCoE
AV_EfEXtKZqtpbFMTTHh
---------------------------------
Annoy index     : 377
Image file name : AV_EfEXtKZqtpbFMTTHh
Product id      : AV_EfEXtKZqtpbFMTTHh
AV_F-bhCYSSHbkXwp_rT
---------------------------------
Annoy index     : 378
Image file name : AV_F-bhCYSSHbkXwp_rT
Product id      : AV_F-bhCYSSHbkXwp_rT
AV_F-EVTKZqtpbFMTTyB
---------------------------------
Annoy index     : 379
Image file name : AV_F-EVTKZqtpbFMTTyB
Product id      : AV_F-EVTKZqtpbFMTTyB
AV_F-EwUYSSHbkXwp_Hr
---------------------------------
Annoy index     : 380
Image file name : AV_F-EwUYSSHbkXwp_Hr
Product id      : AV_F-EwUYSSHbkXwp_Hr
AV_F-FYxKZqtpbFMTTz8
---------------------------------
Annoy index     : 381
Image file name : AV_F-FYxKZqtpbFMTTz8
Product id      : AV_F-FYxKZqtpbFMTTz8
AV_F-hVHYSSHbkXwp_x1
---------------------------------
Annoy index    

AV_X3aUeKZqtpbFMT3VX
---------------------------------
Annoy index     : 442
Image file name : AV_X3aUeKZqtpbFMT3VX
Product id      : AV_X3aUeKZqtpbFMT3VX
AV_X3bFduC1rwyj_hre1
---------------------------------
Annoy index     : 443
Image file name : AV_X3bFduC1rwyj_hre1
Product id      : AV_X3bFduC1rwyj_hre1
AV_X3epUHh53nbDRAY4x
---------------------------------
Annoy index     : 444
Image file name : AV_X3epUHh53nbDRAY4x
Product id      : AV_X3epUHh53nbDRAY4x
AV_X3gRmYSSHbkXwqnAb
---------------------------------
Annoy index     : 445
Image file name : AV_X3gRmYSSHbkXwqnAb
Product id      : AV_X3gRmYSSHbkXwqnAb
AV_X3hk1uC1rwyj_hrgG
---------------------------------
Annoy index     : 446
Image file name : AV_X3hk1uC1rwyj_hrgG
Product id      : AV_X3hk1uC1rwyj_hrgG
AV_X3K6EKZqtpbFMT3S2
---------------------------------
Annoy index     : 447
Image file name : AV_X3K6EKZqtpbFMT3S2
Product id      : AV_X3K6EKZqtpbFMT3S2
AV_X3MvzHh53nbDRAY0z
---------------------------------
Annoy index    

AWo-Bel00U_gzG0hgB6B
---------------------------------
Annoy index     : 495
Image file name : AWo-Bel00U_gzG0hgB6B
Product id      : AWo-Bel00U_gzG0hgB6B
AWo-BKxBJbEilcB6NiMl
---------------------------------
Annoy index     : 496
Image file name : AWo-BKxBJbEilcB6NiMl
Product id      : AWo-BKxBJbEilcB6NiMl
AWo-Bu4tAGTnQPR7sPkp
---------------------------------
Annoy index     : 497
Image file name : AWo-Bu4tAGTnQPR7sPkp
Product id      : AWo-Bu4tAGTnQPR7sPkp
AWo-CavOJbEilcB6NixV
---------------------------------
Annoy index     : 498
Image file name : AWo-CavOJbEilcB6NixV
Product id      : AWo-CavOJbEilcB6NixV
AWo-CeehM263mwCq7Db6
---------------------------------
Annoy index     : 499
Image file name : AWo-CeehM263mwCq7Db6
Product id      : AWo-CeehM263mwCq7Db6
AWo-CIeLM263mwCq7DS5
---------------------------------
Annoy index     : 500
Image file name : AWo-CIeLM263mwCq7DS5
Product id      : AWo-CIeLM263mwCq7DS5
AWo-CLR4M263mwCq7DTv
---------------------------------
Annoy index    

AWo0f73b0U_gzG0hey89
---------------------------------
Annoy index     : 552
Image file name : AWo0f73b0U_gzG0hey89
Product id      : AWo0f73b0U_gzG0hey89
AWo0f7800U_gzG0hey9B
---------------------------------
Annoy index     : 553
Image file name : AWo0f7800U_gzG0hey9B
Product id      : AWo0f7800U_gzG0hey9B
AWo0f8aSAGTnQPR7q_8z
---------------------------------
Annoy index     : 554
Image file name : AWo0f8aSAGTnQPR7q_8z
Product id      : AWo0f8aSAGTnQPR7q_8z
AWo0f8tNJbEilcB6MSi4
---------------------------------
Annoy index     : 555
Image file name : AWo0f8tNJbEilcB6MSi4
Product id      : AWo0f8tNJbEilcB6MSi4
AWo0fB34M263mwCq5zOe
---------------------------------
Annoy index     : 556
Image file name : AWo0fB34M263mwCq5zOe
Product id      : AWo0fB34M263mwCq5zOe
AWo0fbxxJbEilcB6MSUN
---------------------------------
Annoy index     : 557
Image file name : AWo0fbxxJbEilcB6MSUN
Product id      : AWo0fbxxJbEilcB6MSUN
AWo0fcb50U_gzG0heywX
---------------------------------
Annoy index    

AWo0gBxdM263mwCq5zqW
---------------------------------
Annoy index     : 613
Image file name : AWo0gBxdM263mwCq5zqW
Product id      : AWo0gBxdM263mwCq5zqW
AWo0gCm-JbEilcB6MSlP
---------------------------------
Annoy index     : 614
Image file name : AWo0gCm-JbEilcB6MSlP
Product id      : AWo0gCm-JbEilcB6MSlP
AWo0gD33M263mwCq5zrT
---------------------------------
Annoy index     : 615
Image file name : AWo0gD33M263mwCq5zrT
Product id      : AWo0gD33M263mwCq5zrT
AWo0gdtvAGTnQPR7rAL0
---------------------------------
Annoy index     : 616
Image file name : AWo0gdtvAGTnQPR7rAL0
Product id      : AWo0gdtvAGTnQPR7rAL0
AWo0gdZgJbEilcB6MSw8
---------------------------------
Annoy index     : 617
Image file name : AWo0gdZgJbEilcB6MSw8
Product id      : AWo0gdZgJbEilcB6MSw8
AWo0gF5tAGTnQPR7rAAp
---------------------------------
Annoy index     : 618
Image file name : AWo0gF5tAGTnQPR7rAAp
Product id      : AWo0gF5tAGTnQPR7rAAp
AWo0gf9OM263mwCq5z4p
---------------------------------
Annoy index    

AWo0hQyz0U_gzG0hezm5
---------------------------------
Annoy index     : 669
Image file name : AWo0hQyz0U_gzG0hezm5
Product id      : AWo0hQyz0U_gzG0hezm5
AWo0hrm6M263mwCq50ef
---------------------------------
Annoy index     : 670
Image file name : AWo0hrm6M263mwCq50ef
Product id      : AWo0hrm6M263mwCq50ef
AWo0hSQnJbEilcB6MTL5
---------------------------------
Annoy index     : 671
Image file name : AWo0hSQnJbEilcB6MTL5
Product id      : AWo0hSQnJbEilcB6MTL5
AWo0hT9MJbEilcB6MTMi
---------------------------------
Annoy index     : 672
Image file name : AWo0hT9MJbEilcB6MTMi
Product id      : AWo0hT9MJbEilcB6MTMi
AWo0ht_R0U_gzG0hez2N
---------------------------------
Annoy index     : 673
Image file name : AWo0ht_R0U_gzG0hez2N
Product id      : AWo0ht_R0U_gzG0hez2N
AWo0hUKz0U_gzG0hezo7
---------------------------------
Annoy index     : 674
Image file name : AWo0hUKz0U_gzG0hezo7
Product id      : AWo0hUKz0U_gzG0hezo7
AWo0hvKcJbEilcB6MTaM
---------------------------------
Annoy index    

AWo0lXLzM263mwCq51Rj
---------------------------------
Annoy index     : 732
Image file name : AWo0lXLzM263mwCq51Rj
Product id      : AWo0lXLzM263mwCq51Rj
AWo0lXQ30U_gzG0he0lu
---------------------------------
Annoy index     : 733
Image file name : AWo0lXQ30U_gzG0he0lu
Product id      : AWo0lXQ30U_gzG0he0lu
AWo0m22GAGTnQPR7rBz7
---------------------------------
Annoy index     : 734
Image file name : AWo0m22GAGTnQPR7rBz7
Product id      : AWo0m22GAGTnQPR7rBz7
AWo0m24OJbEilcB6MUZO
---------------------------------
Annoy index     : 735
Image file name : AWo0m24OJbEilcB6MUZO
Product id      : AWo0m24OJbEilcB6MUZO
AWo0m2qMAGTnQPR7rBz0
---------------------------------
Annoy index     : 736
Image file name : AWo0m2qMAGTnQPR7rBz0
Product id      : AWo0m2qMAGTnQPR7rBz0
AWo0mhdH0U_gzG0he0vq
---------------------------------
Annoy index     : 737
Image file name : AWo0mhdH0U_gzG0he0vq
Product id      : AWo0mhdH0U_gzG0he0vq
AWo0mknt0U_gzG0he0wP
---------------------------------
Annoy index    

In [60]:
import json
with open('nearest_neighbors.json', 'w') as out:
    json.dump(named_nearest_neighbors, out)

In [64]:
image_mapping = []
for image in images:
    image_mapping.append({
      'image_path' : image['image_path'],
       'product_id': image['product_id']
    })
with open('image_mapping.txt', 'w') as out:
    #json.dump(images, out)
    out.write(str(image_mapping))

In [65]:
image_paths= glob.glob('C:\\Users\\Aakash Atnoorkar\\Documents\\ADM\\Final Project\\images\\imagesScraped\\*.npz')
product_list = []
for image in image_paths:
    product_list.append(ntpath.basename(image).split('.')[0])

In [66]:
with open('product_list.txt', 'w') as out:
    #json.dump(images, out)
    out.write(str(product_list))

In [67]:
sample_filtered = filtered_shoes[filtered_shoes['id'].isin(product_list)]

In [77]:
#sample_filtered.to_csv('sample_filtered.csv', index = True)
sample_filtered = sample_filtered.reset_index()
sample_filtered

Unnamed: 0,index,id,brand,imageURLs,name,prices.amountMax,prices.amountMin,prices.color,prices.dateSeen,prices.size,gender,dateDiff,avgPrice,image_url
0,0,AVpe8Pi0ilAPnD_xRYTs,Fireside Casuals,https://i5.walmartimages.com/asr/c55fed13-30ad...,Men's Camouflage Espadrille Slipper,20.00,20.00,Camouflage,2015-04-04 23:35:36.936000+00:00,8.0,M,318.0,0.0,https://i5.walmartimages.com/asr/c55fed13-30ad...
1,1,AVpfNujoilAPnD_xXXc1,MUK LUKS,https://i5.walmartimages.com/asr/481a1bfa-4e31...,Muk Luks Men's Fairisle Full Foot Slipper,14.99,14.99,Java,2015-04-04 22:46:00.817000+00:00,5.0,M,318.0,0.0,https://i5.walmartimages.com/asr/481a1bfa-4e31...
2,2,AVpfEFx41cnluZ0-dGzz,Daxx,https://i5.walmartimages.com/asr/9e7a8df0-cfc2...,Daxx Men's Topstitched Hardware Accent Loafers,29.99,29.99,Black,2015-08-18 00:00:00+00:00,7.5,M,183.0,0.0,https://i5.walmartimages.com/asr/9e7a8df0-cfc2...
3,3,AVpfOFMeilAPnD_xXe5b,Genuine Dickies,https://i5.walmartimages.com/asr/4cc4b01e-434f...,Genuine Dickies Jobrated Maxx Waterproof Work ...,69.78,69.78,Wheat,2015-10-18 00:00:00+00:00,10.5,M,122.0,0.0,https://i5.walmartimages.com/asr/4cc4b01e-434f...
4,4,AVpfOFMeilAPnD_xXe5b,Genuine Dickies,https://i5.walmartimages.com/asr/4cc4b01e-434f...,Genuine Dickies Jobrated Maxx Waterproof Work ...,69.78,69.78,Wheat,2015-08-24 00:00:00+00:00,13.0,M,177.0,0.0,https://i5.walmartimages.com/asr/4cc4b01e-434f...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60085,60085,AVpij_dBilAPnD_xFV8S,easy street,https://media.kohlsimg.com/is/image/kohls/2794...,Easy Street Natara Women's Sandals,54.99,54.99,Black,2017-11-30 07:34:00+00:00,7.5,F,516.0,0.0,https://media.kohlsimg.com/is/image/kohls/2794...
60086,60086,AVpij_dBilAPnD_xFV8S,easy street,https://media.kohlsimg.com/is/image/kohls/2794...,Easy Street Natara Women's Sandals,54.99,54.99,Black,2017-11-27 11:53:00+00:00,7.5,F,519.0,0.0,https://media.kohlsimg.com/is/image/kohls/2794...
60087,60087,AVpij_dBilAPnD_xFV8S,easy street,https://media.kohlsimg.com/is/image/kohls/2794...,Easy Street Natara Women's Sandals,54.99,54.99,Black,2017-11-29 11:20:00+00:00,7.5,F,517.0,0.0,https://media.kohlsimg.com/is/image/kohls/2794...
60088,60088,AVpij_dBilAPnD_xFV8S,easy street,https://media.kohlsimg.com/is/image/kohls/2794...,Easy Street Natara Women's Sandals,54.99,54.99,Black,2017-11-26 14:21:00+00:00,7.5,F,520.0,0.0,https://media.kohlsimg.com/is/image/kohls/2794...


In [78]:
#sample_filtered = sample_filtered.drop(columns=['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1'])
sample_filtered = sample_filtered.drop(columns=['index'])

In [80]:
sample_filtered.to_csv('sample_filtered.csv')

In [85]:
allfiles = glob.glob('C:\\Users\\Aakash Atnoorkar\\Documents\\ADM\\Final Project\\images\\imagesScraped\\*.npz')

success_images = []
for file_index, i in enumerate(allfiles):
    success_images.append(ntpath.basename(i).split('.')[0] + '.jpg')

In [89]:
column_names = ["similarity", "master_pi", "similar_pi"]

similarity_df = pd.DataFrame(columns = column_names)
similarity_df = similarity_df.append(named_nearest_neighbors)

In [91]:
similarity_df.to_csv('similarity_scores.csv')

In [92]:
import snowflake.connector

In [106]:
query = "SELECT SIMILAR_PI, MASTER_PI, SIMILARITY_SCORE FROM SIMILARITY_SCORES WHERE MASTER_PI = \'" + "AV-9hndsuC1rwyj_g2bb" + "\' AND SIMILARITY_SCORE!=1 ORDER BY SIMILARITY_SCORE DESC"
print(query)
results = curs.execute(query).fetchall()



SELECT SIMILAR_PI, MASTER_PI, SIMILARITY_SCORE FROM SIMILARITY_SCORES WHERE MASTER_PI = 'AV-9hndsuC1rwyj_g2bb' AND SIMILARITY_SCORE!=1 ORDER BY SIMILARITY_SCORE DESC


In [108]:
sim_products_score = []
for rec in results:
    sim_products_score.append(rec[0])

In [128]:
results = curs.execute(query).fetchall()