In [2]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import gradio as gr
from collections import defaultdict


In [3]:
# Load trained NER model
model_path = "./ner_deberta_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForTokenClassification.from_pretrained(model_path)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

# Load summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")




In [6]:
def read_text_file(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()
    except Exception as e:
        return f"Error reading file: {e}"


In [8]:
def generate_summary(text):
    max_chunk_tokens = 1024
    min_words_required = 20

    if len(text.strip().split()) < min_words_required:
        return "⚠️ Text too short for meaningful summary."

    try:
        paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
        chunks, current_chunk = [], ""

        for para in paragraphs:
            if len((current_chunk + para).split()) <= max_chunk_tokens:
                current_chunk += " " + para
            else:
                chunks.append(current_chunk.strip())
                current_chunk = para
        if current_chunk:
            chunks.append(current_chunk.strip())

        summary_chunks = []
        for chunk in chunks:
            output = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
            if output and "summary_text" in output[0]:
                summary_chunks.append(output[0]["summary_text"])

        return " ".join(summary_chunks) if summary_chunks else "⚠️ Summary could not be generated."

    except Exception:
        return "❌ Could not generate summary due to error or model limit."


In [10]:
def run_pipeline(text, confidence_threshold):
    entity_groups = defaultdict(dict)
    lines = text.strip().split("\n")

    for line in lines:
        line = line.strip()
        if not line:
            continue
        try:
            entities = ner_pipeline(line)
            for ent in entities:
                word = ent["word"].strip()
                group = ent["entity_group"]
                score = round(ent["score"], 2)

                if score >= confidence_threshold:
                    if word not in entity_groups[group] or score > entity_groups[group][word]:
                        entity_groups[group][word] = score
        except:
            continue

    results = []
    for group, words in entity_groups.items():
        for word, score in sorted(words.items()):
            results.append([group, word, f"{score:.2f}"])

    summary_text = generate_summary(text)
    return results, summary_text


In [12]:
with gr.Blocks(title="NER + Summarizer UI") as demo:
    gr.Markdown("## 🧠 Named Entity Recognition (NER) + AI Summary")
    gr.Markdown("Upload a `.txt` file or paste your text. Adjust confidence threshold to control what gets shown.")

    with gr.Tab("📤 Upload File"):
        file_input = gr.File(file_types=[".txt"], label="Upload Text File")
        file_confidence = gr.Slider(minimum=0.5, maximum=1.0, value=0.85, step=0.01, label="Confidence Threshold")
        file_output = gr.Dataframe(headers=["Entity Type", "Text", "Confidence"], label="NER Output")
        file_summary = gr.Textbox(label="📝 AI-Generated Summary", lines=4, interactive=False)

    with gr.Tab("✍️ Paste Text"):
        text_input = gr.Textbox(label="Paste Your Text Here", lines=10, placeholder="Enter paragraph or sentences...")
        text_confidence = gr.Slider(minimum=0.5, maximum=1.0, value=0.85, step=0.01, label="Confidence Threshold")
        text_output = gr.Dataframe(headers=["Entity Type", "Text", "Confidence"], label="NER Output")
        text_summary = gr.Textbox(label="📝 AI-Generated Summary", lines=4, interactive=False)

    file_input.change(
        lambda file, threshold: run_pipeline(read_text_file(file.name), threshold) if file else ([], ""),
        inputs=[file_input, file_confidence],
        outputs=[file_output, file_summary],
    )

    text_input.change(
        run_pipeline,
        inputs=[text_input, text_confidence],
        outputs=[text_output, text_summary],
    )


In [14]:
if __name__ == "__main__":
    demo.launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.
