In [None]:
# 📦 Install required packages (run once)
!pip install -q sentence-transformers rapidfuzz nltk

# 📥 Imports
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from rapidfuzz import fuzz
from nltk.corpus import wordnet
import nltk
import re
import torch
import pickle
import os

# 📌 Download NLTK corpus
nltk.download('wordnet')
nltk.download('omw-1.4')

# 🧠 Load transformer model
print("📥 Loading transformer model...")
model = SentenceTransformer("all-MiniLM-L6-v2")

# 📄 Load your CSV file (replace path if needed)
csv_path = "amibot.csv"  # Ensure it has columns: 'Field', 'Value'
try:
    df = pd.read_csv(csv_path, encoding='utf-8')
except UnicodeDecodeError:
    df = pd.read_csv(csv_path, encoding='cp1252')  # fallback encoding

print("✅ Loaded CSV with encoding:", df.columns)

# 📚 Preprocess data
field_variants = []
field_map = {}

for idx, row in df.iterrows():
    field_str = row["Field"]
    value = row["Value"]
    variants = [v.strip().lower() for v in field_str.split(",") if v.strip()]
    for v in variants:
        field_variants.append(v)
        field_map[v] = value  # Map each variant to its value

field_embeddings = model.encode(field_variants, convert_to_tensor=True)

# 🔧 Function: Correct typos (basic spell fix using regex for now)
def correct_typos(text):
    text = text.strip().lower()
    text = re.sub(r"[^\w\s]", "", text)
    return text

# 🔧 Function: Expand with synonyms using WordNet
def expand_with_synonyms(text):
    words = text.split()
    expanded_words = []
    for word in words:
        synonyms = set()
        for syn in wordnet.synsets(word):
            for lemma in syn.lemmas():
                synonyms.add(lemma.name().replace("_", " "))
        if synonyms:
            expanded_words.append(word + " " + " ".join(list(synonyms)[:2]))
        else:
            expanded_words.append(word)
    return " ".join(expanded_words)

# 🤖 Function: Get AmiBot response
def get_response(user_input, model, field_variants, field_embeddings, field_map, threshold=0.55, fuzz_threshold=55):
    original_input = user_input.strip()
    corrected_input = correct_typos(original_input)
    expanded_input = expand_with_synonyms(corrected_input)

    query_embedding = model.encode(expanded_input, convert_to_tensor=True)
    similarities = util.cos_sim(query_embedding, field_embeddings)[0]

    best_score = float(similarities.max())
    best_idx = int(similarities.argmax())
    best_field = field_variants[best_idx]
    best_answer = field_map[best_field]

    fuzzy_score = fuzz.token_set_ratio(original_input.lower(), best_field.lower())

    if best_score >= threshold or fuzzy_score >= fuzz_threshold:
        return f"\n✅ Matched: '{best_field}'\n📐 Semantic: {best_score:.2f}, 🔤 Fuzzy: {fuzzy_score}\n👉 {best_answer}"
    else:
        return f"\n🤖 Sorry, I’m not sure what you meant.\n💡 Did you mean: '{best_field}'?\nPlease rephrase your question."

# 💾 Save necessary components for Flask app
save_dir = "amibot_data"
os.makedirs(save_dir, exist_ok=True)

with open(f"{save_dir}/df.pkl", "wb") as f:
    pickle.dump(df, f)

with open(f"{save_dir}/field_variants.pkl", "wb") as f:
    pickle.dump(field_variants, f)

with open(f"{save_dir}/field_map.pkl", "wb") as f:
    pickle.dump(field_map, f)

torch.save(field_embeddings, f"{save_dir}/field_embeddings.pt")

print("💾 Saved df.pkl, field_variants.pkl, and field_embeddings.pt to 'amibot_data/'")

# 🧪 Test in Notebook (example)
while True:
    user_input = input("\nAsk AmiBot (type 'exit' to quit): ")
    if user_input.lower() == "exit":
        break
    response = get_response(user_input, model, field_variants, field_embeddings, field_map)
    print(response)




AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'




[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\amrit\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\amrit\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


📥 Loading transformer model...
✅ Loaded CSV with encoding: Index(['Field', 'Value'], dtype='object')
💾 Saved df.pkl, field_variants.pkl, and field_embeddings.pt to 'amibot_data/'



Ask AmiBot (type 'exit' to quit):  wife



✅ Matched: 'wife'
📐 Semantic: 0.75, 🔤 Fuzzy: 100.0
👉 Committed for life — merged with Sneha Mishra in a lifelong partnership.
