In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
product_data_id = 'MLM1000_1000'
df = pd.read_csv(f'./data_warehouse/products/{product_data_id}.csv')

vgg_model = models.vgg16(pretrained=True)
vgg_model.classifier = nn.Sequential(*list(vgg_model.classifier.children())[:-3]) 




In [3]:
image_transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize image to VGG input size
        transforms.ToTensor(),           # Convert PIL Image to tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize image
    ])

In [4]:
def extract_image_features(image_path):
    image_transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize image to VGG input size
        transforms.ToTensor(),           # Convert PIL Image to tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize image
    ])
    img = Image.open(image_path)
    img_tensor = image_transform(img).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        features = vgg_model(img_tensor)
    return features.squeeze().numpy()

In [5]:
def calculate_similarity(image_path1, image_path2):
    features1 = extract_image_features(image_path1)
    features2 = extract_image_features(image_path2)
    similarity_score = cosine_similarity([features1], [features2])[0][0]
    return similarity_score

In [27]:
from torchvision.models import resnet18

# Step 1: Load pre-trained ResNet-18 model
model = resnet18(pretrained=True)
model.eval()
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Step 2: Function to extract features from an image using ResNet-18
def extract_image_features(image_path):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0)
    with torch.no_grad():
        features = model(image)
    features = features.squeeze().numpy()
    return features

# Step 3: Calculate similarity between two images
def calculate_similarity(image_path1, image_path2):
    features1 = extract_image_features(image_path1)
    features2 = extract_image_features(image_path2)
    similarity_score = cosine_similarity([features1], [features2])[0][0]
    return similarity_score


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/jaimevarelap/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:04<00:00, 10.2MB/s]


In [28]:
similarity_scores = []

for i in range(len(df)):
    print(i,df['semantic_string'][i])
    for j in range(len(df)):
        
        if df['domain_id'][i] == df['domain_id'][j]:

            # Compute embeddings for semantic strings
            path1 = f'./data_warehouse/thumbnails/{df["catalog_product_id"][i]}_thumbnail.jpg'
            path2 = f'./data_warehouse/thumbnails/{df["catalog_product_id"][j]}_thumbnail.jpg'
            
            # Calculate cosine similarity between embeddings
            similarity_score = calculate_similarity(path1,path2)
            
            # Store catalog_product_id_1, catalog_product_id_2, and similarity score
            similarity_scores.append([df['catalog_product_id'][i], df['catalog_product_id'][j], similarity_score])

0 Huawei Freebuds Pro 3 Plata Glacial Huawei 10.2 cm 200 g
1 Audífonos Inalámbricos Bluetooth Función Carga Inalámbrica Bt3-0066 Negro Avedistante 11 7.6 cm 60 g
2 Bocina Portátil Jbl Flip 6 Bluetooth Aprueba De Agua Color Negro JBL 10.8 cm 940 g
3 Smart Tv Portátil Hisense H4f Series 40h4030f Led Full Hd 40  Hisense 99.6 cm 7640 g
4 Asistente Virtual Amazon Echo Dot 5ta Generación Negro Amazon 15.4 cm 560 g
5 Audífonos Inalámbricos Jbl Tune 510bt Jblt510bt Negro JBL 23.6 cm 340 g
6 Bocina Bluetooth Portátil 1hora Boc241, Bocinas Para Pc Bocinas Inalámbrica Bluetooth Rgb Altavoces Inalámbrico, Soporte 3.5 Mm Aux/tf/usb/bluetooth Para Interior, Hogar Y Fiesta 1Hora 41.6 cm 760 g
7 Audífonos In-ear Gamer Inalámbricos Bluetooth F9-5 Negro Con Luz  Azul Marino Shenzhen Yihaotong  
8  Pantalla Sansui Smx32t1h Led Hd 32  Sansui 78.4 cm 4700 g
9 Asistente Virtual Amazon Echo Dot 5ta Gen Blanco Color Glacier White Amazon 15.4 cm 560 g
10 Audífonos In-ear Inalámbricos 1hora A8s Tws Negro Audífo

In [29]:
product_dictionary = df[['domain_id', 'catalog_product_id','semantic_string','brand']]
product_dictionary.head(10)

Unnamed: 0,domain_id,catalog_product_id,semantic_string,brand
0,MLM-HEADPHONES,MLM28292846,Huawei Freebuds Pro 3 Plata Glacial Huawei 10....,Huawei
1,MLM-HEADPHONES,MLM26195158,Audífonos Inalámbricos Bluetooth Función Carga...,11
2,MLM-SPEAKERS,MLM18930466,Bocina Portátil Jbl Flip 6 Bluetooth Aprueba D...,JBL
3,MLM-TELEVISIONS,MLM15254609,Smart Tv Portátil Hisense H4f Series 40h4030f ...,Hisense
4,MLM-SMART_SPEAKERS,MLM19795010,Asistente Virtual Amazon Echo Dot 5ta Generaci...,Amazon
5,MLM-HEADPHONES,MLM17968066,Audífonos Inalámbricos Jbl Tune 510bt Jblt510b...,JBL
6,MLM-SPEAKERS,MLM22801217,"Bocina Bluetooth Portátil 1hora Boc241, Bocina...",1Hora
7,MLM-HEADPHONES,MLM16224063,Audífonos In-ear Gamer Inalámbricos Bluetooth ...,Shenzhen Yihaotong
8,MLM-TELEVISIONS,MLM19898765,Pantalla Sansui Smx32t1h Led Hd 32 Sansui 78...,Sansui
9,MLM-SMART_SPEAKERS,MLM19795012,Asistente Virtual Amazon Echo Dot 5ta Gen Blan...,Amazon


In [30]:
similarity_df = pd.DataFrame(similarity_scores, columns=['catalog_product_id_1', 'catalog_product_id_2', 'similarity_score'])

similarity_df = pd.merge(similarity_df,product_dictionary,how='left',left_on='catalog_product_id_1',right_on='catalog_product_id')
similarity_df = pd.merge(similarity_df,product_dictionary,how='left',left_on='catalog_product_id_2',right_on='catalog_product_id')

similarity_df.drop(columns=['domain_id_y'],inplace=True)
similarity_df.rename(columns={'domain_id_x' : 'domain_id'},inplace=True)

similarity_df.drop(columns=['catalog_product_id_1','catalog_product_id_2'],inplace=True)

similarity_df = similarity_df[['domain_id','catalog_product_id_x','catalog_product_id_y','semantic_string_x','semantic_string_y','brand_x','brand_y','similarity_score']]

similarity_df.to_csv(f'./data_warehouse/thumbnail_similarity/thumbnail_similarity_{product_data_id}.csv', index=False)

In [31]:
#similarity_df[similarity_df['similarity_score']<0.99].sort_values(by='similarity_score',ascending=False).head(10)
different_product_index = similarity_df['catalog_product_id_x']!=similarity_df['catalog_product_id_y']
different_brand_index = similarity_df['brand_x']!=similarity_df['brand_y']

similarity_df[different_product_index & different_brand_index].sort_values(by='similarity_score',ascending=False).head(60)

Unnamed: 0,domain_id,catalog_product_id_x,catalog_product_id_y,semantic_string_x,semantic_string_y,brand_x,brand_y,similarity_score
117924,MLM-HEADPHONES,MLM25915118,MLM29599699,Audífonos Gamer Bluetooth Inalámbricos P9 Mode...,Audífonos Inalámbrico Diadema Con Microfono Bl...,Genérica,Vulata,0.998799
92052,MLM-HEADPHONES,MLM29599699,MLM25915118,Audífonos Inalámbrico Diadema Con Microfono Bl...,Audífonos Gamer Bluetooth Inalámbricos P9 Mode...,Vulata,Genérica,0.998799
11509,MLM-AUDIO_AND_VIDEO_CABLES_AND_ADAPTERS,MLM22795819,MLM21303411,Cable De Video Hdmi 3 Metros Full Hd Xbox Lapt...,Cable Hdmi 1.5 Metros Full Hd 1080p Ps3 Xbox 3...,Genérica,Transhine,0.998171
95518,MLM-AUDIO_AND_VIDEO_CABLES_AND_ADAPTERS,MLM21303411,MLM22795819,Cable Hdmi 1.5 Metros Full Hd 1080p Ps3 Xbox 3...,Cable De Video Hdmi 3 Metros Full Hd Xbox Lapt...,Transhine,Genérica,0.998171
118081,MLM-HEADPHONES,MLM25915118,MLM28901848,Audífonos Gamer Bluetooth Inalámbricos P9 Mode...,Audífonos Inalámbrico Diadema Con Microfono Bl...,Genérica,Vulata,0.998127
192326,MLM-HEADPHONES,MLM28901848,MLM25915118,Audífonos Inalámbrico Diadema Con Microfono Bl...,Audífonos Gamer Bluetooth Inalámbricos P9 Mode...,Vulata,Genérica,0.998127
192413,MLM-HEADPHONES,MLM28901848,MLM26326121,Audífonos Inalámbrico Diadema Con Microfono Bl...,Audífonos Gamer Inalámbricos Sw-ahead Audífono...,Vulata,SW-AHEAD,0.989249
173718,MLM-HEADPHONES,MLM26326121,MLM28901848,Audífonos Gamer Inalámbricos Sw-ahead Audífono...,Audífonos Inalámbrico Diadema Con Microfono Bl...,SW-AHEAD,Vulata,0.989249
92139,MLM-HEADPHONES,MLM29599699,MLM26326121,Audífonos Inalámbrico Diadema Con Microfono Bl...,Audífonos Gamer Inalámbricos Sw-ahead Audífono...,Vulata,SW-AHEAD,0.988886
173561,MLM-HEADPHONES,MLM26326121,MLM29599699,Audífonos Gamer Inalámbricos Sw-ahead Audífono...,Audífonos Inalámbrico Diadema Con Microfono Bl...,SW-AHEAD,Vulata,0.988886
