### Prompt Engineering Playground 🚀

This notebook demonstrates **prompt engineering** using **Hugging Face LLMs** and **Gradio**. You'll learn how to:

✅ Explore different prompt types  
✅ Control LLM outputs with parameters (temperature, top-p, max tokens)  
✅ Build an interactive **Gradio app** for live prompt experimentation  

By the end of this notebook, you'll have an **interactive playground** you can deploy on **Hugging Face Spaces**!


In [2]:
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import AutoModelForSeq2SeqLM

# ===================================================
# STEP 1: Load Models and Tokenizers (on CPU)
# ===================================================
print("🔄 Loading distilgpt2 model...")
distilgpt2_tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
distilgpt2_model = AutoModelForCausalLM.from_pretrained("distilgpt2")
distilgpt2_model.to("cpu")  # Explicitly move to CPU
print("✅ distilgpt2 loaded successfully on CPU.")

print("🔄 Loading flan-t5-small model...")
flan_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
flan_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
flan_model.to("cpu")  # Explicitly move to CPU
print("✅ flan-t5-small loaded successfully on CPU.")


🔄 Loading distilgpt2 model...
✅ distilgpt2 loaded successfully on CPU.
🔄 Loading flan-t5-small model...
✅ flan-t5-small loaded successfully on CPU.


In [3]:
# ===========================
# Generation Function
# ===========================
def chat_with_model(message, history, model_name, max_tokens=200, temperature=0.7, top_p=0.9):
    # Construct the conversation history as a single prompt
    conversation = ""
    for user_msg, bot_msg in history:
        conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
    
    # Append current user message
    conversation += f"User: {message}\nAssistant:"

    # Choose the model
    if model_name == "distilgpt2":
        tokenizer = distilgpt2_tokenizer
        model = distilgpt2_model

        # Tokenize conversation history + latest user message
        inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=512).to("cpu")

        # Generate response
        outputs = model.generate(
            **inputs,
            max_length=max_tokens,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=1.2,
            do_sample=True
        )

        # Decode generated text
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # We want to extract **just the reply**, not the full history!
        # (Simple splitting logic, can be made smarter)
        response = response[len(conversation):].strip()

    elif model_name == "flan-t5-small":
        tokenizer = flan_tokenizer
        model = flan_model

        # Prompt for flan: explicit task format
        prompt = conversation  # Flan expects instruction/task format

        # Tokenize
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to("cpu")

        # Generate response
        outputs = model.generate(
            **inputs,
            max_length=max_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True
        )

        # Decode response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    else:
        response = "⚠️ Model not recognized."

    # Update history
    history.append((message, response))
    return history, history

# ===========================
# Gradio Interface
# ===========================
with gr.Blocks() as demo:
    gr.Markdown("# 🤖 Prompt Engineering Chatbot with Context Memory")

    # Model selector
    model_selector = gr.Dropdown(choices=["distilgpt2", "flan-t5-small"], value="distilgpt2", label="Choose Model")

    # Chatbot component (displays conversation)
    chatbot = gr.Chatbot()

    # User input field
    message_input = gr.Textbox(label="Your Message", placeholder="Ask me anything...")

    # Generation controls
    with gr.Row():
        max_tokens_slider = gr.Slider(10, 512, value=200, step=10, label="Max Tokens")
        temperature_slider = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
        top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top-p")

    # Clear chat button
    clear_button = gr.Button("Clear Chat")

    # State to hold conversation history
    state = gr.State([])

    # Events
    message_input.submit(
        fn=chat_with_model,
        inputs=[message_input, state, model_selector, max_tokens_slider, temperature_slider, top_p_slider],
        outputs=[chatbot, state]
    )

    clear_button.click(lambda: ([], []), None, [chatbot, state])

# Launch app
demo.launch()

  chatbot = gr.Chatbot()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


