In [1]:
!pip install torch transformers gradio flask peft

Collecting gradio
  Downloading gradio-5.16.2-py3-none-any.whl.metadata (16 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvi

In [2]:

!wget -O lora_finetune.zip https://www.dropbox.com/scl/fi/5s6bkx3k7ja7f6gul87s0/flan_t5_finetuned_opus_books_lora.zip?rlkey=ygbswjmgzfwwf707pt08eli46&st=t819ej0f&dl=0
!unzip lora_finetune.zip
!ls flan_t5_finetuned_opus_books_lora


--2025-02-20 12:53:46--  https://www.dropbox.com/scl/fi/5s6bkx3k7ja7f6gul87s0/flan_t5_finetuned_opus_books_lora.zip?rlkey=ygbswjmgzfwwf707pt08eli46
Resolving www.dropbox.com (www.dropbox.com)... 162.125.85.18, 2620:100:6035:18::a27d:5512
Connecting to www.dropbox.com (www.dropbox.com)|162.125.85.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://ucab07421d79187dd3fd85a85779.dl.dropboxusercontent.com/cd/0/inline/CkdrN6ER_Nz1o1ZysISnCuUavpnYtDBeLq0cbSZaBXcK2ios68F05mRyKF_c3v1vugrkgB6ADBYGZscQPi6A5bcd_Uc204Yhp8j1UWF5Liq7WhOb2zNbjLvDBg1wURdi1buhlXeEyUYW-0KLAqMCBy3f/file# [following]
--2025-02-20 12:53:47--  https://ucab07421d79187dd3fd85a85779.dl.dropboxusercontent.com/cd/0/inline/CkdrN6ER_Nz1o1ZysISnCuUavpnYtDBeLq0cbSZaBXcK2ios68F05mRyKF_c3v1vugrkgB6ADBYGZscQPi6A5bcd_Uc204Yhp8j1UWF5Liq7WhOb2zNbjLvDBg1wURdi1buhlXeEyUYW-0KLAqMCBy3f/file
Resolving ucab07421d79187dd3fd85a85779.dl.dropboxusercontent.com (ucab07421d79187dd3fd85a85779.dl.dropboxusercontent.

In [4]:
import re
import torch
import gradio as gr
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    AutoConfig,
    pipeline,
    logging
)
from peft import PeftModel

# Optional: reduce logging verbosity.
logging.set_verbosity_error()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------------- Ethical Filtering ----------------
banned_words = ["hate", "kill", "stupid", "idiot", "terrorist", "bomb", "nazi", "racist", "sexist"]

def contains_banned_words(text: str) -> bool:
    text = text or ""
    text_lower = text.lower()
    for word in banned_words:
        if re.search(r"\b" + re.escape(word) + r"\b", text_lower):
            return True
    return False

# ---------------- Sentiment Analysis ----------------
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

# ---------------- Model Loading ----------------
base_checkpoint = "google/flan-t5-large"
adapter_checkpoint = "flan_t5_finetuned_opus_books_lora"  # local folder with adapter files

# Load the base model and tokenizer.
tokenizer = AutoTokenizer.from_pretrained(base_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(
    base_checkpoint,
    torch_dtype=torch.float16,
    device_map="cuda"
)
# Load the LoRA adapter from local files.
model = PeftModel.from_pretrained(model, adapter_checkpoint, local_files_only=True)
model.to(device)

# ---------------- Response Function ----------------
def respond(message: str,
            history: list,  # history is managed automatically by Gradio.
            system_message: str,
            max_tokens: int,
            temperature: float,
            top_p: float):
    """
    Respond to the user's message.
    - Applies ethical filtering and sentiment checks.
    - Builds a prompt using the system message and current user message.
    - Generates a response using our LoRA-finetuned FLAN-T5-large model.
    Returns a single string reply.
    """
    if message is None or message.strip() == "":
        return ""

    # Ethical filtering.
    if contains_banned_words(message):
        return "I'm sorry, but I cannot engage with that request."

    # Sentiment analysis.
    sentiment = sentiment_pipeline(message)[0]
    if sentiment["label"] == "NEGATIVE" and sentiment["score"] > 0.85:
        return "I sense some negativity in your message. Let's try to keep our conversation respectful."

    # Build prompt: use the system message plus the current user message.
    prompt = system_message + "\nUser: " + message + "\nBot: "
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Ensure sampling parameters are in effect.
    outputs = model.generate(
        **inputs,
        max_length=max_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        num_beams=4,
        early_stopping=True
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Remove the prompt from the output if it's there.
    if response.startswith(prompt):
        response = response[len(prompt):].strip()
    return response

# ---------------- Gradio Chat Interface ----------------
demo = gr.ChatInterface(
    fn=respond,
    type="messages",  # This indicates that the conversation history is maintained by Gradio.
    title="Ethical Chatbot",
    description="Chatbot using a LoRA-finetuned FLAN-T5-large model for responses and DistilBERT for sentiment analysis with ethical filtering.",
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot. Please respond helpfully.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=150, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
    ]
)

if __name__ == "__main__":
    demo.launch(debug=True, share=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://fa2331684062c22f4a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://fa2331684062c22f4a.gradio.live
