In [5]:
#!pip install gradio
#!pip install transformers -U

In [6]:
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch


In [7]:
# Load the tokenizer and model from Hugging Face
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Ensure you're using GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)




GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [8]:
def generate_response(user_input):
    # Encode user input and convert to tensor
    inputs = tokenizer(user_input, return_tensors="pt").to(device)
    
    # Generate response
    output = model.generate(
        inputs["input_ids"],
        max_length=150,
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=2,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    
    # Decode the generated tokens
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Remove the original prompt from the response to get a clean reply
    return response[len(user_input):].strip()


In [9]:
# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🤖 Simple Chatbot using Hugging Face & Gradio")
    chatbot = gr.Chatbot()
    with gr.Row():
        user_input = gr.Textbox(
            label="Your Message", placeholder="Type a message..."
        )
        send_button = gr.Button("Send")
    
    # Function to handle user interaction
    def chat(user_message, history=[]):
        response = generate_response(user_message)
        history.append((user_message, response))
        return history, ""

    # Connect the Gradio components
    send_button.click(chat, [user_input, chatbot], [chatbot, user_input])

# Launch the Gradio app
demo.launch()




* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
