In [1]:
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Define a lambda function to handle whitespace in text
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))

# Model and tokenizer initialization
model_name = "CoderCoy/NLP-PROJECT-NMU"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Define the summarization function
def summarize_text(text_input):
    # Tokenize the input text using the tokenizer
    input_ids = tokenizer(
        [WHITESPACE_HANDLER(text_input)],
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=512
    )["input_ids"]

    # Generate a summary using the fine-tuned model
    output_ids = model.generate(
        input_ids=input_ids,
        max_length=84,
        no_repeat_ngram_size=2,
        num_beams=4
    )[0]

    # Decode the generated summary tokens
    summary = tokenizer.decode(
        output_ids,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )

    return summary

# Create Gradio interface
iface = gr.Interface(
    fn=summarize_text,
    inputs=gr.Textbox(placeholder="Enter text to summarize..."),
    outputs="text",
    live=True,
    css="footer{display:none !important}",
    title="Arabic/English Text Summarizer",
    description="This fine-tuned model supports over 40 languages and has been specifically trained on Arabic and English by the developers using the Arabic summarization (BBC News) Dataset for Arabic and the English summarization (BBC News) Dataset for English. ",
)

# Launch the Gradio interface
iface.launch()


Downloading pytorch_model.bin:   0%|          | 0.00/2.33G [00:00<?, ?B/s]

OSError: Consistency check failed: file should be of size 2329696205 but has size 1440729746 (pytorch_model.bin).
We are sorry for the inconvenience. Please retry download and pass `force_download=True, resume_download=False` as argument.
If the issue persists, please let us know by opening an issue on https://github.com/huggingface/huggingface_hub.