In [0]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load FAQ data
faq_df = pd.read_csv("/Workspace/Users/ziadashraf98765@gmail.com/Databricks_repo/FAQ_Chatbot/data/marketing_faq.csv")
faq_df

In [0]:
# %pip install sentence-transformers
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

faq_df['embedding'] = faq_df['Answer'].apply(lambda x: model.encode(x))

In [0]:
faq_df.head()

In [0]:
# ==============================
# Function: Get Query Embedding
# ==============================
def get_query_embedding(query):
    return model.encode(query)

In [0]:
# ==============================
# 6️⃣ Function: Find Most Similar FAQ Answer
# ==============================
def answer_query(query, faq_df, top_k=1):
    """
    Returns the top-k most relevant FAQ entries for a user query
    """
    # Generate embedding for the query
    query_emb = get_query_embedding(query)
    
    # Compute cosine similarity with all FAQ embeddings
    similarities = faq_df['embedding'].apply(lambda x: cosine_similarity([query_emb], [x])[0][0])
    
    # Get top matching FAQ(s)
    top_indices = similarities.sort_values(ascending=False).index[:top_k]
    
    return faq_df.loc[top_indices, ['Question', 'Answer']]

In [0]:
# ==============================
# 7️⃣ Test the Chatbot
# ==============================
query1 = "How do I increase social media engagement?"
query2 = "What are the best marketing automation tools?"

print("Query:", query1)
print(answer_query(query1, faq_df))
print("\nQuery:", query2)
print(answer_query(query2, faq_df))