<a href="https://colab.research.google.com/github/aims777/RAG-PROFILE-SEARCH/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install streamlit sentence-transformers chromadb pandas
import streamlit as st
import pandas as pd
import time
import os
from sentence_transformers import SentenceTransformer
from chromadb import PersistentClient

# ---------------- SETTINGS ----------------
CHROMA_DIR = "./chroma_db"
CSV_PATH = "profiles.csv"

# ---------------- CREATE BUILT-IN CSV ----------------
if not os.path.exists(CSV_PATH):
    st.write("‚öôÔ∏è Creating built-in demo dataset...")
    data = {
        "name": ["Aiman Saba", "Pooja Sharma", "Rahul Verma", "Sneha Rao", "Vikram Singh"],
        "email": [
            "aiman@example.com",
            "pooja@example.com",
            "rahul@example.com",
            "sneha@example.com",
            "vikram@example.com",
        ],
        "location": ["Bangalore", "Mysore", "Hyderabad", "Chennai", "Pune"],
        "skills": [
            "Python, Machine Learning, Data Analysis",
            "Java, React, Full Stack Development",
            "SQL, Tableau, Data Visualization",
            "Deep Learning, NLP, AI Research",
            "Excel, Business Intelligence, Power BI",
        ],
        "experience_years": [2, 3, 4, 5, 2],
        "summary": [
            "Data enthusiast with experience in Python and ML.",
            "Full-stack developer skilled in Java and React.",
            "Analyst experienced in data visualization and SQL.",
            "AI researcher passionate about NLP and deep learning.",
            "Business analyst skilled in Excel and BI tools.",
        ],
    }

    df = pd.DataFrame(data)
    df["raw_text"] = df["name"] + " " + df["skills"] + " " + df["summary"]
    df.to_csv(CSV_PATH, index=False)
    st.write("‚úÖ Demo dataset created!")

# ---------------- LOAD PROFILES ----------------
@st.cache_resource
def load_profiles():
    st.write("üîÑ Loading model and profiles database...")
    embed_model = SentenceTransformer("all-MiniLM-L6-v2")

    df = pd.read_csv(CSV_PATH)
    st.success(f"‚úÖ Loaded {len(df)} profiles")

    client = PersistentClient(path=CHROMA_DIR)
    collection_name = "profiles_" + str(int(time.time()))
    collection = client.create_collection(name=collection_name)

    for i, row in df.iterrows():
        text = str(row.get("raw_text", ""))
        metadata = row.to_dict()
        embedding = embed_model.encode(text).tolist()
        collection.add(
            ids=[str(i)],
            documents=[text],
            metadatas=[metadata],
            embeddings=[embedding]
        )

    return embed_model, collection, df


embed_model, collection, df = load_profiles()

# ---------------- STREAMLIT UI ----------------
st.set_page_config(page_title="RAG Profile Search", layout="centered")

st.title("üîé RAG Profile Search")
st.caption("Search candidate profiles using semantic similarity (RAG-based).")

query = st.text_input("Enter skills, roles, or keywords:")

if st.button("Search"):
    if not query.strip():
        st.warning("Please enter a search query.")
    else:
        with st.spinner("Searching profiles..."):
            query_emb = embed_model.encode(query).tolist()
            results = collection.query(query_embeddings=[query_emb], n_results=3)

            if not results["metadatas"] or len(results["metadatas"][0]) == 0:
                st.error("‚ùå No matching profiles found.")
            else:
                st.success("‚úÖ Top Matching Profiles")
                for i, meta in enumerate(results["metadatas"][0]):
                    st.markdown(f"### üë§ Result {i+1}")
                    st.write(f"**Name:** {meta.get('name')}")
                    st.write(f"**Email:** {meta.get('email')}")
                    st.write(f"**Location:** {meta.get('location')}")
                    st.write(f"**Skills:** {meta.get('skills')}")
                    st.write(f"**Experience:** {meta.get('experience_years')} years")
                    st.write(f"**Summary:** {meta.get('summary')}")
                    st.markdown("---")




