
# Emotion-Aware Chatbot (Gradio)

(`facebook/blenderbot-400M-distill`) and GoEmotions for emotion detection. It adds:

- Multi‑turn context (last 3 exchanges) in generation  
- Lightweight guardrails (profanity + crisis/self-harm detection)  
- Emotion **badge** in the UI (not mixed into the assistant text)  
### - Streaming replies (token-by-token feel)  
- Logging to JSONL (timestamp, session id, user, bot, emotion)  
- Polished decoding controls: Beam / Sampling, temperature, top‑p, repetition penalty, length penalty, no‑repeat‑ngram  
- Export conversation to JSON or TXT  
- Clear UX + disclaimer



In [1]:

# --- Imports & basic setup ---
import os, json, uuid, re
from datetime import datetime
from pathlib import Path

import torch
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification, pipeline,
    BlenderbotTokenizer, BlenderbotForConditionalGeneration
)
import gradio as gr

# Session + logs
SESSION_ID = str(uuid.uuid4())
LOG_DIR = Path("logs")
LOG_DIR.mkdir(parents=True, exist_ok=True)
LOG_FILE = LOG_DIR / "conversations.jsonl"

# Device & perf
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cpu":
    try:
        torch.set_num_threads(max(1, os.cpu_count() // 2))
    except Exception:
        pass
print("Using device:", device)


Using device: cuda


In [2]:

# --- Load models (same chatbot model as before) ---
# Emotion model
EMOTION_MODEL_NAME = "SamLowe/roberta-base-go_emotions"
emotion_classifier = pipeline(
    "text-classification",
    model=EMOTION_MODEL_NAME,
    top_k=None,
    device=0 if device == "cuda" else -1
)

# Chat model (same as your notebook)
CHAT_MODEL_NAME = "facebook/blenderbot-400M-distill"
chatbot_tokenizer = BlenderbotTokenizer.from_pretrained(CHAT_MODEL_NAME)
chatbot_model = BlenderbotForConditionalGeneration.from_pretrained(CHAT_MODEL_NAME)
if device == "cuda":
    chatbot_model = chatbot_model.half().to(device)
else:
    chatbot_model = chatbot_model.to(device)

# Safety: define pad/eos if needed
if chatbot_tokenizer.pad_token_id is None and chatbot_tokenizer.eos_token_id is not None:
    chatbot_tokenizer.pad_token = chatbot_tokenizer.eos_token


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Device set to use cuda:0


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/16.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/730M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/730M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/347 [00:00<?, ?B/s]

In [3]:

# --- Guardrails and helper functions ---
BAD = re.compile(r"\b(fuck|shit|bitch|bastard|asshole)\b", re.I)
CRISIS = re.compile(r"\b(kill myself|suicide|self-harm|hurt myself|end my life)\b", re.I)

def check_guardrails(text: str):
    if CRISIS.search(text or ""):
        return ("I'm really sorry you're feeling this way. "
                "I can’t help with crises, but you’re not alone — please reach out to a local helpline, "
                "your doctor, or someone you trust right away.")
    if BAD.search(text or ""):
        return "Let’s keep things respectful — could you rephrase that?"
    return None

# Map emotions -> style prefixes (kept short and safe)
STYLE_PREFIX = {
    "joy": "Be warm, friendly, and encouraging. ",
    "love": "Be empathetic and supportive. ",
    "optimism": "Be positive and constructive. ",
    "gratitude": "Acknowledge kindly and be gracious. ",
    "admiration": "Respond appreciatively and humbly. ",
    " amusement": "Stay light but clear. ",
    "sadness": "Be kind and reassuring. Offer gentle help. ",
    "fear": "Be calming and clear. Reduce uncertainty. ",
    "anger": "Stay calm, de-escalate, and be helpful. ",
    "disgust": "Acknowledge and steer to constructive next steps. ",
    "confusion": "Ask clarifying questions and keep it simple. ",
}

# GoEmotions has 28 labels; we’ll pick the top one and fall back to a smaller set
def get_emotion_label(text: str) -> str:
    try:
        preds = emotion_classifier(text)
        # pipeline returns [ [ {label, score}, ... ] ]
        top = max(preds[0], key=lambda d: d["score"])
        return top["label"]
    except Exception:
        return "neutral"

def style_prefix_for_emotion(label: str) -> str:
    # Normalize to a small, safe set
    for key in STYLE_PREFIX.keys():
        if key.strip().lower() in (label or "").lower():
            return STYLE_PREFIX[key]
    return "Be clear, helpful, and concise. "

def build_prompt(history, user_input, emotion_label, max_turns: int = 3) -> str:
    prefix = style_prefix_for_emotion(emotion_label)
    turns = (history or [])[-max_turns:]
    context = ""
    for u, b in turns:
        if u:
            context += f"User: {u}\n"
        if b:
            context += f"Bot: {b}\n"
    return prefix + context + f"User: {user_input}\nBot: "

def log_turn(user, bot, emotion):
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(json.dumps({
            "ts": datetime.utcnow().isoformat(),
            "session": SESSION_ID,
            "user": user,
            "bot": bot,
            "emotion": emotion
        }) + "\\n")


In [4]:

# --- Generation (with streaming) ---
@torch.inference_mode()
def generate_streaming(prompt: str,
                       decoding: str = "beam",
                       temperature: float = 0.8,
                       top_p: float = 0.9,
                       beams: int = 5,
                       length_penalty: float = 1.0,
                       repetition_penalty: float = 1.05,
                       no_repeat_ngram_size: int = 3,
                       max_new_tokens: int = 180):
    inputs = chatbot_tokenizer([prompt], return_tensors="pt").to(device)

    gen_kwargs = dict(
        max_new_tokens=max_new_tokens,
        repetition_penalty=repetition_penalty,
        no_repeat_ngram_size=no_repeat_ngram_size,
        return_dict_in_generate=True,
        output_scores=False,
    )

    if decoding == "beam":
        gen_kwargs.update(
            dict(num_beams=beams, early_stopping=True, length_penalty=length_penalty, do_sample=False)
        )
    else:
        gen_kwargs.update(
            dict(do_sample=True, temperature=temperature, top_p=top_p)
        )

    out = chatbot_model.generate(**inputs, **gen_kwargs)
    ids = out.sequences[0]

    # Stream token-by-token decode (simple approach)
    # We skip the prompt portion by decoding only newly generated tokens if possible.
    # For seq2seq, ids are only the generated sequence, so we decode incrementally.
    text = ""
    for i in range(1, ids.shape[0] + 1):
        partial = chatbot_tokenizer.decode(ids[:i], skip_special_tokens=True)
        if partial != text:
            text = partial
            yield text.strip()


In [7]:
# --- Gradio UI (with launch) ---
import json
from pathlib import Path
from datetime import datetime
import gradio as gr

DISCLAIMER = (
    "This is a demo assistant for general information. "
    "**It is not medical, legal, or professional advice.** "
    "If you feel unsafe or in crisis, please contact local emergency services or a helpline."
)

def export_history_json(history):
    out_path = Path(f"chat_export_{SESSION_ID}.json")
    data = [{"user": u, "bot": b} for (u, b) in (history or [])]
    out_path.write_text(json.dumps({
        "session": SESSION_ID,
        "exported_at": datetime.utcnow().isoformat(),
        "turns": data
    }, ensure_ascii=False, indent=2), encoding="utf-8")
    return str(out_path)

def export_history_txt(history):
    out_path = Path(f"chat_export_{SESSION_ID}.txt")
    lines = []
    for u, b in (history or []):
        if u: lines.append(f"User: {u}")
        if b: lines.append(f"Bot:  {b}")
        lines.append("")
    out_path.write_text("\n".join(lines), encoding="utf-8")
    return str(out_path)

def _clear():
    return [], [], "🫶 **Emotion:** `neutral`", ""

with gr.Blocks(title="Emotion-Aware Chatbot") as demo:
    gr.Markdown("# Emotion-Aware Chatbot (BlenderBot + GoEmotions)")
    gr.Markdown(DISCLAIMER)

    with gr.Row():
        with gr.Column(scale=3):
            emotion_md = gr.Markdown("🫶 **Emotion:** `neutral`")
            # FIX: must be "messages" or "tuples"
            chatbot = gr.Chatbot(height=480, type="tuples")
            msg = gr.Textbox(placeholder="Type your message...", label="Your message", lines=2)

            with gr.Row():
                send_btn = gr.Button("Send", variant="primary")
                clear_btn = gr.Button("Clear")
        with gr.Column(scale=2):
            gr.Markdown("### Decoding & Safety Controls")
            decoding = gr.Radio(["beam", "sampling"], value="beam", label="Decoding")
            temperature = gr.Slider(0.1, 1.5, value=0.8, step=0.05, label="Temperature (sampling)")
            top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (sampling)")
            beams = gr.Slider(1, 8, value=5, step=1, label="Beams (beam search)")
            length_penalty = gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="Length penalty (beam)")
            repetition_penalty = gr.Slider(1.0, 1.5, value=1.05, step=0.01, label="Repetition penalty")
            no_repeat_ngram_size = gr.Slider(1, 6, value=3, step=1, label="No-repeat n-gram size")
            max_new_tokens = gr.Slider(32, 256, value=180, step=8, label="Max new tokens")

            gr.Markdown("### Export")
            export_json_btn = gr.Button("Export JSON")
            export_txt_btn = gr.Button("Export TXT")
            export_file = gr.File(label="Download export", interactive=False)

    # Wire events (uses gradio_respond defined earlier)
    send_btn.click(
        gradio_respond,
        inputs=[msg, chatbot, decoding, temperature, top_p, beams, length_penalty, repetition_penalty, no_repeat_ngram_size, max_new_tokens],
        outputs=[chatbot, chatbot, emotion_md],
    )
    msg.submit(
        gradio_respond,
        inputs=[msg, chatbot, decoding, temperature, top_p, beams, length_penalty, repetition_penalty, no_repeat_ngram_size, max_new_tokens],
        outputs=[chatbot, chatbot, emotion_md],
    )
    clear_btn.click(_clear, None, [chatbot, chatbot, emotion_md, msg])

    export_json_btn.click(lambda h: export_history_json(h), inputs=[chatbot], outputs=[export_file])
    export_txt_btn.click(lambda h: export_history_txt(h), inputs=[chatbot], outputs=[export_file])

    gr.Markdown("---")
    gr.Markdown("**Tip:** For faster responses on CPU, reduce `max new tokens` to ~120 and keep beam size ≤ 5.")

# 👇 THIS actually shows the app
demo.queue().launch(share=False, inline=True)


  chatbot = gr.Chatbot(height=480, type="tuples")


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.
* To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

