In [None]:
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
llama_models = {
    "Meta-Llama 3 70B Instruct": "meta-llama/Meta-Llama-3-70B-Instruct",
    "Meta-Llama 3 8B Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
    "Llama 3.1 70B Instruct": "meta-llama/Llama-3.1-70B-Instruct",
    "Llama 3.1 8B Instruct": "meta-llama/Llama-3.1-8B-Instruct",
}

In [None]:
def load_model(model_name):
    """Load the specified Llama model."""
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto')
    model.to(device)
    return model, tokenizer

Cache models

In [None]:
model_cache = {}

In [None]:
def summarize_text(text, model_choice):
    """Summarize text using the selected Llama model."""
    if model_choice not in model_cache:
        model_cache[model_choice] = load_model(llama_models[model_choice])
    model, tokenizer = model_cache[model_choice]
    prompt = f"Summarize the following text:\n\n\"{text}\"\n\nSummary:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=512, do_sample=False)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    summary = generated_text.split("Summary:")[-1].strip()
    return summary

Gradio interface

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("<h1><center>Summarization with Llama Models</center></h1>")
    model_choice = gr.Dropdown(list(llama_models.keys()), label="Select Llama Model")
    input_text = gr.Textbox(label="Enter text to summarize", lines=10)
    output_text = gr.Textbox(label="Summary")
    summarize_button = gr.Button("Summarize")
    summarize_button.click(summarize_text, [input_text, model_choice], output_text)

In [None]:
demo.launch()