# Colab starter: build title embeddings and FAISS index
This notebook shows the minimal steps to load the Shopee dataset, compute title embeddings with `sentence-transformers/all-MiniLM-L6-v2`, and build a FAISS index. Run on Colab (GPU recommended for larger models).

In [None]:
# Install required packages
!pip install sentence-transformers faiss-cpu pandas

In [None]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/shopee-product-matching/train.csv')
df.head()

In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
model = SentenceTransformer('all-MiniLM-L6-v2')
titles = df['title'].fillna('').astype(str).tolist()
embs = model.encode(titles, show_progress_bar=True, convert_to_numpy=True)
np.save('/content/embeddings.npy', embs)

In [None]:
import faiss
embs = np.load('/content/embeddings.npy')
embs = embs.astype('float32')
d = embs.shape[1]
index = faiss.IndexFlatIP(d)
faiss.normalize_L2(embs)
index.add(embs)
faiss.write_index(index, '/content/faiss_index.idx')