In [5]:
import torch
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pandas as pd

df = pd.read_csv('./data/process_emojis_no_duplicates.csv')

# Load pre-trained sentence transformer model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Generate embeddings for text_name column
text_embeddings = model.encode(df["text_name"], convert_to_numpy=True)

# Normalize embeddings for cosine similarity search
text_embeddings = text_embeddings / np.linalg.norm(text_embeddings, axis=1, keepdims=True)

# Create FAISS index
d = text_embeddings.shape[1]  # Dimension of embeddings
index = faiss.IndexFlatIP(d)  # Cosine similarity with inner product
index.add(text_embeddings)  # Add embeddings to the index

# Function to find the closest match
def find_closest_match(query):
    query_embedding = model.encode([query], convert_to_numpy=True, show_progress_bar=False)
    query_embedding = query_embedding / np.linalg.norm(query_embedding)  # Normalize
    _, indices = index.search(query_embedding, 1)  # Search for closest match
    return df.iloc[indices[0][0]]["emoji"]  # Return matching row



In [None]:
def find_closest_match(query):
    query_embedding = model.encode([query], convert_to_numpy=True, show_progress_bar=False)
    query_embedding = query_embedding / np.linalg.norm(query_embedding)  # Normalize
    _, indices = index.search(query_embedding, 10)  # Search for closest match
    return df.iloc[indices[0][0]]["emoji"]  # Return matching row

In [11]:
query_text = "i love ball"

for i in query_text.split():
    closest_match = find_closest_match(i)
    print(closest_match, end=" ")

🧑 💏 ⚽ 