In [None]:
# ==========================================
# üß† HEALTH & WELLNESS CHATBOT - COLAB VERSION (WITH METRICS)
# ==========================================

# Install dependencies
!pip install scikit-learn pandas numpy joblib --quiet

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import joblib, random

# =========================
# STEP 1: Load the dataset
# =========================
from google.colab import files
print("üìÅ Please upload your 'health_chatbot_dataset.csv' file")
uploaded = files.upload()

data = pd.read_csv("health_chatbot_dataset.csv")
print("‚úÖ Dataset loaded! Rows:", len(data))
display(data.head())

# =========================
# STEP 2: Train-test split
# =========================
X_train, X_test, y_train, y_test = train_test_split(
    data["text"], data["intent"],
    test_size=0.2, random_state=42, stratify=data["intent"]
)

# Encode intent labels
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)

# Convert text to TF-IDF vectors
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# =========================
# STEP 3: Train the model
# =========================
print("\n‚è≥ Training model...")

model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train_enc)

print("‚úÖ Model training complete!")

# =========================
# STEP 4: Evaluate the model
# =========================
print("\nüìä MODEL PERFORMANCE METRICS:\n")

y_pred = model.predict(X_test_tfidf)

accuracy = accuracy_score(y_test_enc, y_pred)
precision = precision_score(y_test_enc, y_pred, average="weighted", zero_division=0)
recall = recall_score(y_test_enc, y_pred, average="weighted", zero_division=0)
f1 = f1_score(y_test_enc, y_pred, average="weighted", zero_division=0)

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")

print("\nüìå Classification Report:\n")
print(classification_report(y_test_enc, y_pred, target_names=le.classes_))

# =========================
# STEP 5: Test chatbot in Colab
# =========================

def chatbot_response(user_input):
    X = vectorizer.transform([user_input])
    intent_pred = model.predict(X)[0]
    intent_name = le.inverse_transform([intent_pred])[0]

    # Choose a random response for predicted intent
    responses = data[data["intent"] == intent_name]["response"].values
    if len(responses) > 0:
        reply = random.choice(responses)
    else:
        reply = "I'm not sure, but try to stay healthy!"
    return reply

print("\nüí¨ Chatbot is ready! Type below to talk (type 'quit' to exit):\n")

while True:
    user = input("You: ")
    if user.lower() in ["quit", "exit", "bye"]:
        print("Bot: Goodbye! Stay healthy ‚ù§Ô∏è")
        break
    print("Bot:", chatbot_response(user))


üìÅ Please upload your 'health_chatbot_dataset.csv' file


‚úÖ Dataset loaded! Rows: 5000


Unnamed: 0,intent,text,response
0,goodbye,good night,good night! sleep well
1,symptom,i feel tired,get some sleep and eat something healthy
2,wellness_tips,how to stay fit,"eat clean, sleep well, and stay active"
3,exercise,how can i lose weight,try walking daily and eat light meals
4,hydration,how much water should i drink,around 8 glasses a day is good



‚è≥ Training model...
‚úÖ Model training complete!

üìä MODEL PERFORMANCE METRICS:

Accuracy:  1.0000
Precision: 1.0000
Recall:    1.0000
F1 Score:  1.0000

üìå Classification Report:

               precision    recall  f1-score   support

         diet       1.00      1.00      1.00        95
     exercise       1.00      1.00      1.00       102
      goodbye       1.00      1.00      1.00       107
     greeting       1.00      1.00      1.00        92
    hydration       1.00      1.00      1.00       109
mental_health       1.00      1.00      1.00       105
   motivation       1.00      1.00      1.00        97
        sleep       1.00      1.00      1.00        99
      symptom       1.00      1.00      1.00        99
wellness_tips       1.00      1.00      1.00        95

     accuracy                           1.00      1000
    macro avg       1.00      1.00      1.00      1000
 weighted avg       1.00      1.00      1.00      1000


üí¨ Chatbot is ready! Type below to t

KeyboardInterrupt: Interrupted by user

In [None]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model, vectorizer.transform(data["text"]),
                         le.transform(data["intent"]),
                         cv=5, scoring="f1_weighted")

scores


array([1., 1., 1., 1., 1.])

In [None]:
import joblib

joblib.dump(model, "health_chatbot_model.pkl")
joblib.dump(vectorizer, "health_chatbot_vectorizer.pkl")
joblib.dump(le, "health_chatbot_labelencoder.pkl")
