<a href="https://colab.research.google.com/github/Zia-Ul-Hasan/NLP_SEMACNTIC_PRODUCT_SEARCH/blob/main/sem_UI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import numpy as np
import pandas as pd
import re
from sentence_transformers import SentenceTransformer

# Load model
@st.cache_resource
def load_model():
    model = SentenceTransformer('D:/nlp______/saved_model')  # Load your saved model directory here
    return model

# Load data
@st.cache_data
def load_data():
    # Load the cleaned merged CSV instead of old product.csv
    product_df = pd.read_csv('D:/PROjects/nlp project/cleaned_merged_products.csv')  # <-- updated path
    product_embeddings = np.load('D:/Opera_downloads/product_embeddings_full.npy')  # Make sure this matches cleaned CSV rows

    # Verify embeddings length matches dataframe rows
    if product_embeddings.shape[0] != len(product_df):
        raise ValueError(
            f"Mismatch between embeddings ({product_embeddings.shape[0]}) and "
            f"product dataframe rows ({len(product_df)})"
        )
    return product_df, product_embeddings


model = load_model()
product_df, product_embeddings = load_data()

# Text cleaning function consistent with preprocessing
def clean_text(text):
    return re.sub(r'[^a-z0-9\s]', ' ', str(text).lower()).strip()

# Search logic
def get_top_matches(query, top_k=5):
    query = clean_text(query)
    query_vector = model.encode([query], normalize_embeddings=True)
    similarities = np.dot(product_embeddings, query_vector.T).squeeze()

    st.write(f"Data rows: {len(product_df)}")
    st.write(f"Embeddings shape: {product_embeddings.shape}")
    st.write(f"Similarities shape: {similarities.shape}")

    actual_top_k = min(top_k, len(similarities))
    top_indices = similarities.argsort()[-actual_top_k:][::-1]

    return product_df.iloc[top_indices]

# Streamlit UI
st.title("🔎 Semantic Product Search")

query = st.text_input("Enter your product search query:")

if query:
    results = get_top_matches(query)
    st.subheader("Top Matches:")
    st.dataframe(results)
