In [1]:
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [2]:
# Cargo datos
file_path = "productos_todas_categorias.csv" 
df = pd.read_csv(file_path)

In [3]:
# Selecciono columnas necesarias
df = df[['id', 'title', 'condition', 'price', 'original_price', 'category_id', 'Category', 'available_quantity', 'thumbnail', 'permalink' ]]

In [4]:
# Vectorización de títulos con BERT ( es un modelo especializado en productos)
model = SentenceTransformer('all-mpnet-base-v2')

In [5]:
# Obtengo embeddings de los títulos como una matriz numpy
title_embeddings = np.array([model.encode(t) for t in df['title'].tolist()])

In [6]:
# Agrego los embeddings al DataFrame como listas de números
df['title_embedding'] = title_embeddings.tolist()

In [7]:
# Normalizo variables numéricas
scaler = StandardScaler()
df[['price']] = scaler.fit_transform(df[['price']])

In [8]:
# Guardo embeddings en FAISS para búsquedas rápidas
dimension = title_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(title_embeddings)

In [9]:
# Guardo el índice de FAISS
faiss.write_index(index, "productos_faiss.index")

In [10]:
# Guardo los IDs de productos para mapear luego con FAISS
df[['id']].to_csv("productos_ids.csv", index=False)

In [12]:
# Guardo el DataFrame limpio con embeddings
df.to_csv("productos_con_embeddings.csv", index=False)

In [13]:
df

Unnamed: 0,id,title,condition,price,original_price,category_id,Category,available_quantity,thumbnail,permalink,title_embedding
0,MCO2126119526,michelin urbano city grip 2 de 130/70-12,new,-0.111903,314615.0,MCO167778,MCO1747,1,http://http2.mlstatic.com/D_638799-MLU75748277...,https://www.mercadolibre.com.co/michelin-urban...,"[-0.023791847750544548, 0.003918687347322702, ..."
1,MCO1384535677,casco ich 501 sp integral solid certificado,new,-0.112172,110000.0,MCO21947,MCO1747,100,http://http2.mlstatic.com/D_833377-MCO74072395...,https://articulo.mercadolibre.com.co/MCO-13845...,"[-0.02210717648267746, -0.019891370087862015, ..."
2,MCO464586517,tapa fugas de radiador liqui moly 150ml,new,-0.112374,39300.0,MCO165954,MCO1747,50,http://http2.mlstatic.com/D_867340-MCO70089331...,https://articulo.mercadolibre.com.co/MCO-46458...,"[-0.011301456019282341, -0.11856019496917725, ..."
3,MCO1493511235,intercomunicador auriculares casco bluetooh bt...,new,-0.112315,60001.0,MCO90069,MCO1747,50,http://http2.mlstatic.com/D_929862-MLU73884050...,https://www.mercadolibre.com.co/intercomunicad...,"[-0.01385044027119875, -0.03008236363530159, -..."
4,MCO1240028153,llanta 140/70r17 diablo rosso 3 tl 66h pirelli,new,-0.111359,526800.0,MCO167778,MCO1747,1,http://http2.mlstatic.com/D_951084-MLU73121912...,https://www.mercadolibre.com.co/llanta-14070r1...,"[-0.038395341485738754, -0.02207917347550392, ..."
...,...,...,...,...,...,...,...,...,...,...,...
29723,MCO1382352183,virgen nuestra señora de la dulce espera 20x6 ...,new,-0.112272,75000.0,MCO116987,MCO1953,1,http://http2.mlstatic.com/D_708464-MCO73957846...,https://articulo.mercadolibre.com.co/MCO-13823...,"[-0.0054532261565327644, -0.001373916282318532..."
29724,MCO872571839,encendedor zippo graffiti negro,new,-0.112071,145500.0,MCO172908,MCO1953,1,http://http2.mlstatic.com/D_745535-MCO49414367...,https://articulo.mercadolibre.com.co/MCO-87257...,"[0.010995982214808464, 0.07852696627378464, -0..."
29725,MCO454844292,encendedor zippo cromo cepillado tipo tejido -...,new,-0.112167,111800.0,MCO172908,MCO1953,1,http://http2.mlstatic.com/D_603703-MCO31016609...,https://articulo.mercadolibre.com.co/MCO-45484...,"[0.006936823017895222, -0.0653829276561737, 0...."
29726,MCO851937665,encendedor zipp ojo color 49699,new,-0.111977,178500.0,MCO172908,MCO1953,1,http://http2.mlstatic.com/D_607455-MCO48896385...,https://articulo.mercadolibre.com.co/MCO-85193...,"[-0.0007504175300709903, 0.010132159106433392,..."
