In [2]:
!pip install -q json_repair

In [3]:
import gradio as gr
import json
import torch
import json_repair
from pydantic import BaseModel, Field
from typing import List
from transformers import AutoModelForCausalLM, AutoTokenizer

# --- Configuration & Model Loading ---
base_model_id = "Qwen/Qwen2.5-1.5B-Instruct"
finetuned_model_id = "duclo90/structured_output"

device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Loading model on {device}...")
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    device_map="auto" if device == "cuda" else None,
    torch_dtype="auto"
)
model.load_adapter(finetuned_model_id)
if device == "cpu":
    model.to("cpu")

# --- Pydantic Schemas ---
class Entity(BaseModel):
    entity_value: str = Field(..., description="The actual name or value of the entity.")
    entity_type: str = Field(..., description="The type of recognized entity.")

class NewsDetails(BaseModel):
    story_title: str = Field(..., description="A fully informative and SEO optimized title of the story.")
    story_keywords: List[str] = Field(..., description="Relevant keywords associated with the story.")
    story_summary: List[str] = Field(..., description="Summarized key points about the story (1-5 points).")
    story_category: str = Field(..., description="Category of the news story.")
    story_entities: List[Entity] = Field(..., description="List of identified entities in the story.")

# --- Helper Functions ---
def parse_json(text):
    try:
        return json_repair.loads(text)
    except:
        return {"error": "Failed to parse JSON", "raw": text}

def generate_resp(messages):
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=1024,
        do_sample=False
    )

    generated_ids = [
        output_ids[len(input_ids):]
        for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

def extract_details(story):
    if not story.strip():
        return {"Error": "Please enter a story."}

    messages = [
        {
            "role": "system",
            "content": "You are an NLP data parser. Extract JSON details from Arabic text according the Pydantic details. No intro/outro."
        },
        {
            "role": "user",
            "content": f"## Story:\n{story.strip()}\n\n## Pydantic Details:\n{json.dumps(NewsDetails.model_json_schema(), ensure_ascii=False)}\n\n## Story Details:\n```json"
        }
    ]

    raw_response = generate_resp(messages)
    return parse_json(raw_response)

Loading model on cuda...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

adapter_config.json: 0.00B [00:00, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/295M [00:00<?, ?B/s]

In [4]:
# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🔍 Arabic News Entity Extractor")
    gr.Markdown("Paste an Arabic news story below to extract structured data (Title, Keywords, Summary, and Entities).")

    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Arabic News Story",
                lines=15,
                placeholder="أدخل النص العربي هنا..."
            )
            submit_btn = gr.Button("Extract Structured Data", variant="primary")

        with gr.Column():
            output_json = gr.JSON(label="Extracted JSON Results")

    submit_btn.click(
        fn=extract_details,
        inputs=[input_text],
        outputs=[output_json]
    )

demo.launch()

  with gr.Blocks(theme=gr.themes.Soft()) as demo:


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://cb8c3bf5111ce2a3c1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


