In [6]:
import json
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pickle
import re

# Step 1: Load dataset
with open("D:\ML PROJECTS\Chatbot using NLP (Rule-Based + ML Hybrid)\intents_extended.json", "r") as f:
    data = json.load(f)

# Step 2: Extract patterns and labels
texts = []
labels = []

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        texts.append(pattern)
        labels.append(intent["tag"])

# Step 3: Preprocessing (lowercase, remove non-alphabetic chars, split into tokens)
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

texts = [" ".join([w for w in clean_text(t).split()]) for t in texts]

# Step 4: Encode the labels
lbl_encoder = LabelEncoder()
y = lbl_encoder.fit_transform(labels)

# Step 5: Vectorize text using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)

# Step 6: Train classifier
model = LogisticRegression()
model.fit(X, y)

# Step 7: Evaluate (on the same data as we have a small dataset)
y_pred = model.predict(X)
acc = accuracy_score(y, y_pred)
prec = precision_score(y, y_pred, average="weighted")
rec = recall_score(y, y_pred, average="weighted")
f1 = f1_score(y, y_pred, average="weighted")
report = classification_report(y, y_pred, target_names=lbl_encoder.classes_)

print("✅ Accuracy :", round(acc, 2))
print("✅ Precision:", round(prec, 2))
print("✅ Recall   :", round(rec, 2))
print("✅ F1 Score :", round(f1, 2))
print("\n📋 Classification Report:\n", report)

# Step 8: Save the model and preprocessing objects
pickle.dump(model, open("intent_model.pkl", "wb"))
pickle.dump(vectorizer, open("intent_vectorizer.pkl", "wb"))
pickle.dump(lbl_encoder, open("intent_label_encoder.pkl", "wb"))



✅ Accuracy : 1.0
✅ Precision: 1.0
✅ Recall   : 1.0
✅ F1 Score : 1.0

📋 Classification Report:
                            precision    recall  f1-score   support

               covid_info       1.00      1.00      1.00         8
                 greeting       1.00      1.00      1.00         8
                     joke       1.00      1.00      1.00         8
restaurant_recommendation       1.00      1.00      1.00         8
                  weather       1.00      1.00      1.00         8

                 accuracy                           1.00        40
                macro avg       1.00      1.00      1.00        40
             weighted avg       1.00      1.00      1.00        40

