# üåé Gradio Experimenty

In [1]:
import gradio as gr
import numpy as np
from transformers import pipeline
from transformers.trainer_utils import get_last_checkpoint


In [2]:
import json

print(json.dumps("""The CEDMO hub‚Äôs fact-checking activities are based on an experienced and extensive ecosystem of fact-checkers, disinformation analysts, media literacy organisations and academics who detect, analyse, and expose emerging harmful information disorders. Therefore, the project pays special attention to disinformation targeting Central European and EU issues and policies. Through a rapid-alerts network, fact-checking and investigation, reports are sent to the relevant target group (media, public institutions, civil society and government) to minimise the impact of disinformation campaigns. Immediate disinformation responses and daily fact checking is delivered by seasoned professionals of the international news agency AFP , Demagog.cz, Demagog.sk, Konkret24 and Infosecurity.sk .""",ensure_ascii=False))

"The CEDMO hub‚Äôs fact-checking activities are based on an experienced and extensive ecosystem of fact-checkers, disinformation analysts, media literacy organisations and academics who detect, analyse, and expose emerging harmful information disorders. Therefore, the project pays special attention to disinformation targeting Central European and EU issues and policies. Through a rapid-alerts network, fact-checking and investigation, reports are sent to the relevant target group (media, public institutions, civil society and government) to minimise the impact of disinformation campaigns. Immediate disinformation responses and daily fact checking is delivered by seasoned professionals of the international news agency AFP , Demagog.cz, Demagog.sk, Konkret24 and Infosecurity.sk ."


In [3]:
MODELS = {
    "üá®üáø MBart (SumeCzech)": "ctu-aic/mbart-sumeczech-claim-extraction",
    "üá®üáø MBart": "ctu-aic/mbart25-large-eos",
    "üá¨üáß T5-small (BBC)": "ctu-aic/t5-small-feversum",
    "üá¨üáß T5-large (CNN)": "ctu-aic/t5-large-feversum",
    "üá∏üá∞ mBART (CNC, SMESum)": "ctu-aic/mbart25-large-eos-cnc-smesum",
    "üá∏üá∞ mBART (SumeCzech, CNC, SMESum)": "ctu-aic/mbart-at2h-cs-smesum-2",
    #"üá¨üáß Pegasus (BBC)": "/home/ullriher/ullriher/models/promising/t5-large-finetuned-xsum-cnn_feversum3_text2claim_bs2_ep30",
}

In [4]:
def get_pipeline(model_name_or_path):
    try:
        return pipeline("summarization", model=model_name_or_path, device="cuda:0")
    except:
        return pipeline("summarization", model=get_last_checkpoint(model_name_or_path), device="cuda:0")


summarizers = {}
for name, model in MODELS.items():
    try:
        summarizer = get_pipeline(model)
        if summarizer is not None:
            summarizers[name] = summarizer
    except:
        print(f"Error loading model {model}")


def output_to_text(output):
    return output[0]["summary_text"].replace("cs ", "").replace("<pad>", "").replace("</s>", "")


def process(input, summarizer, claims=1, k=1, min_length=10, max_length=40):
    output = [
        output_to_text(
            summarizers[summarizer](
                input,
                max_length=max_length,
                min_length=min_length,
                do_sample=False,
            )
        )
    ]

    for _ in range(claims - 1):
        output.append(
            output_to_text(
                summarizers[summarizer](
                    input, max_length=max_length, min_length=min_length, do_sample=True, top_k=k
                )
            )
        )

    return output


LONG_TEXT = "Prodej ≈æiv√Ωch delf√≠n≈Ø se ≈ô√≠d√≠ dohodou o mezin√°rodn√≠m obchodu s ohro≈æen√Ωmi druhy, kter√° zakazuje podobn√© transakce, pokud by mohly zv√≠≈ôat≈Øm u≈°kodit. ≈†alamounovy ostrovy, le≈æ√≠c√≠ asi 1800 kilometr≈Ø severov√Ωchodnƒõ od Austr√°lie, nicm√©nƒõ dohodu nepodepsaly. √özem√≠ je v souƒçasn√© dobƒõ zm√≠t√°no politickou kriz√≠ a etnick√Ωmi n√°silnostmi, kv≈Øli nim≈æ sem byli tento t√Ωden vysl√°ni austral≈°t√≠ voj√°ci. Ekologov√© vin√≠ mexick√© podnikatele, ≈æe krize na ≈†alamounov√Ωch ostrovech zneu≈æili."

# print(process(LONG_TEXT, claims=5, k=5, min_length=10, max_length=40))


Error loading model ctu-aic/mbart-sumeczech-claim-extraction
Error loading model ctu-aic/mbart25-large-eos
Error loading model ctu-aic/t5-large-feversum
Error loading model ctu-aic/mbart25-large-eos-cnc-smesum
Error loading model ctu-aic/mt5-base-multilingual-summarization-multilarge-cs-smesum


In [5]:
class MyInterface(gr.Interface):
    def __init__(self):
        gr.Interface.__init__(
            self,
            process,
            title='Factual Claim Extraction',
            description="This is a prototype CEDMO application to extract factual claims from an arbitrary text.",
            inputs=[
                gr.inputs.Textbox(lines=5, label="Text to extract"),
                gr.inputs.Radio(list(summarizers.keys()), label="Model", default="üá®üáø MBart"),
                gr.Slider(1, 10, step=1, label="Number of claims"),
                gr.Slider(1, 100, 10, step=1, label="Amount of randomness"),
                gr.Slider(1, 100, 10, step=1, label="Min length (# tokens)"),
                gr.Slider(1, 100, 40, step=1, label="Max length (# tokens)"),
            ],
            outputs=[gr.components.JSON(label="Claims")],
            theme=gr.themes.Soft(
                primary_hue="yellow"
            ),
        )
    
    def render_title_description(self) -> None:
        if self.title:
            gr.Markdown(
                "<h1 style='text-align: left; margin-bottom: .5rem;color:#3c3950'>"
                + '<img src="https://cedmohub.eu/wp-content/uploads/thegem-logos/logo_97ce70140f90745805929b382597e9b5_2x.png" style="height: 2.5rem; margin-right: 1.5rem; vertical-align: middle; float:left;"/>'
                + self.title
                + "</h1>"
            )
        if self.description:
            gr.Markdown(self.description)
    


demo = MyInterface()

if __name__ == "__main__":
    demo.launch(share=True, inbrowser=True)




Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://97f5ece48e5e4cb22b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


Unable to connect to VS Code server: Error in request.
Error: connect ENOENT /run/user/434653/vscode-ipc-e8bc4c27-ceb8-451c-b0e3-46e020c27708.sock
    at PipeConnectWrap.afterConnect [as oncomplete] (node:net:1157:16) {
  errno: -2,
  code: 'ENOENT',
  syscall: 'connect',
  address: '/run/user/434653/vscode-ipc-e8bc4c27-ceb8-451c-b0e3-46e020c27708.sock'
}
