In [1]:
# 📦 Install required packages (if needed)
# !pip install -q sentence-transformers nltk pandas torch rapidfuzz

import pandas as pd
from sentence_transformers import SentenceTransformer, util
from nltk.corpus import wordnet
import nltk
import re
import torch
import pickle
import os

# 📌 Download WordNet for synonym expansion
nltk.download('wordnet')
nltk.download('omw-1.4')

csv_path = "amibot.csv"  # Ensure it has columns: 'Field', 'Value'
try:
    df = pd.read_csv(csv_path, encoding='utf-8')
except UnicodeDecodeError:
    df = pd.read_csv(csv_path, encoding='cp1252')  # fallback encoding

# ✅ Text cleaning
def correct_typos(text):
    text = text.strip().lower()
    text = re.sub(r"[^\w\s]", "", text)
    return text

# ✅ Synonym expansion using WordNet
def expand_with_synonyms(text):
    words = text.split()
    expanded_words = []
    for word in words:
        synonyms = set()
        for syn in wordnet.synsets(word):
            for lemma in syn.lemmas():
                synonyms.add(lemma.name().replace("_", " "))
        if synonyms:
            expanded_words.append(word + " " + " ".join(list(synonyms)[:2]))
        else:
            expanded_words.append(word)
    return " ".join(expanded_words)

# ✅ Build field variant map
field_map = {}
field_variants = {}  # key = canonical field, value = list of variants

for idx, row in df.iterrows():
    field_str = row["Field"]
    response = row["Value"]

    variants = [v.strip().lower() for v in field_str.split(",") if v.strip()]
    for variant in variants:
        field_map[variant] = response

    canonical = variants[0]
    field_variants[canonical] = variants

# ✅ Generate embeddings for all variants (with expansion)
model = SentenceTransformer("all-MiniLM-L6-v2")

query_list = []
variant_to_response = {}

for canonical, variants in field_variants.items():
    for v in variants:
        cleaned = correct_typos(v)
        expanded = expand_with_synonyms(cleaned)
        query_list.append(expanded)
        variant_to_response[expanded] = field_map[v]  # maps expanded input to response

# ✅ Encode all queries
embeddings = model.encode(query_list, convert_to_tensor=True)

# ✅ Save everything
os.makedirs("amibot_data", exist_ok=True)

torch.save(embeddings, "amibot_data/field_embeddings.pt")
with open("amibot_data/query_list.pkl", "wb") as f:
    pickle.dump(query_list, f)

with open("amibot_data/variant_to_response.pkl", "wb") as f:
    pickle.dump(variant_to_response, f)

print("✅ All files saved to 'amibot_data/' for light Flask usage.")


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'




[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\amrit\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\amrit\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


FileNotFoundError: [Errno 2] No such file or directory: 'amibot.csv'