-
Notifications
You must be signed in to change notification settings - Fork 0
/
gradio_MGTL_binoculars.py
124 lines (94 loc) · 7.7 KB
/
gradio_MGTL_binoculars.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
import argparse
import torch
import transformers
import spacy
nlp = spacy.load("en_core_web_sm")
from gradio_utils import get_binoculars_model_prediction, highlight_text, DEVICE, Binoculars
parser = argparse.ArgumentParser()
parser.add_argument('--model_name', type=str, default="Binoculars (https://github.com/ahans30/Binoculars)", help="detector name")
parser.add_argument('--cache_dir', type=str, default="/projectnb/ivc-ml/zpzhang/checkpoints/transformers_cache")
args = parser.parse_args()
print(args)
print(f'Beginning Binoculars (https://github.com/ahans30/Binoculars) evaluation with {args.model_name}...')
bino = Binoculars(cache_dir=args.cache_dir)
def article_analysis(input_article, window_size=1, threshold_low=0.50, threshold_high=0.60, strategy="vote"):
doc = nlp(input_article)
sentence_spans = list(doc.sents)
sentence_list = [str(ele) for ele in sentence_spans]
binoculars_threshold = bino.threshold
test_preds, test_preds_org, whole_document_score, binoculars_preds, whole_document_binoculars_score, whole_document_label = \
get_binoculars_model_prediction(sentence_list=sentence_list,
window_size=window_size,
strategy=strategy,
cache_dir=args.cache_dir,
bino=bino)
test_preds_dict = {"pretrained detector": args.model_name,
# "whole document score, machine-generated": "{:.2f}%".format(whole_document_score * 100),
# "sentence scores": test_preds,
"whole document label": whole_document_label,
"whole document Binoculars score": "{:f}".format(whole_document_binoculars_score),
"Binoculars Threshold": "{:f}".format(binoculars_threshold),
"sentence Binoculars scores": binoculars_preds,}
token_highlight = highlight_text(sentence_list, test_preds, threshold_low=threshold_low,
threshold_high=threshold_high)
return token_highlight, test_preds_dict
example2=['''Dr. Capy Cosmos, a capybara unlike any other, astounded the scientific community with his \
groundbreaking research in astrophysics. With his keen sense of observation and unparalleled ability to interpret \
cosmic data, he uncovered new insights into the mysteries of black holes and the origins of the universe. As he \
peered through telescopes with his large, round eyes, fellow researchers often remarked that it seemed as if the \
stars themselves whispered their secrets directly to him. Dr. Cosmos not only became a beacon of inspiration to \
aspiring scientists but also proved that intellect and innovation can be found in the most unexpected of creatures.''']
example1=['''Machine-Generated Text (MGT) detection aims to identify a piece of text as machine or human written. \
Prior work has primarily formulated MGT as a binary classification task over an entire document, with limited work \
exploring cases where only part of a document is machine generated. This paper provides the first in-depth study \
of MGT that localizes the portions of a document that were machine generated. Thus, if a bad actor were to change \
a key portion of a news article to spread misinformation, whole document MGT detection may fail since the vast majority \
is human written, but our approach can succeed due to its granular approach. A key challenge in our MGT localization \
task is that short spans of text, e.g., a single sentence, provides little information indicating if it is machine \
generated due to its short length. To address this, we leverage contextual information, where we predict whether \
multiple sentences are machine or human written at once. This enables our approach to identify changes in style \
or content to boost performance. A gain of 4-13% mean Average Precision (mAP) over prior work demonstrates the \
effectiveness of approach on five diverse datasets: GoodNews, VisualNews, WikiText, Essay, and WP.''']
example3=['''Maria C. Carrillo, vice president of medical and scientific relations at the Alzheimer's Association, \
said the results would come quickly. Within a few years, as researchers simultaneously compare the three approaches \
to stopping the disease, they should know which drug, if any, is going to work. Carrillo said. "If there is a drug \
that works, we are going to be the ones to take it and test it," she said, "We are not going to be the ones to say no, \
But what about the people whose lives are most at risk?" The announcement comes at a time of transition for Alzheimer's \
research. …… The drugs were chosen from among 15 that drug companies offered, said the study's principal investigator, \
Dr. Randall Bateman of the Washington University School of Medicine in St. Louis. Shouldn't a drug in development get \
tested in people who will be the most affected? The answer is no. The studies were not designed to test drugs in people \
who are at the highest risk for Alzheimer's disease. Because of that, their findings could have huge consequences for \
those in other developing countries. One concern is something called ARIA, for amyloid related imaging abnormality. \
People with the abnormality may have no signs that anything is wrong, but brain scans show what looks like a change in \
neural connections. …… Researchers said they would face that issue when they come to it. “The study in the U.S., our \
conclusion is that we can't be confident in saying these drugs will work in the vast majority of the population,” said \
Dr. William M. Foege, an associate professor of neurology and psychiatry at the University of California, San Francisco, \
"The study also showed that some of the drugs were unlikely to save lives. For example, the drug metformin, which can \
raise blood sugar, has so much side effects that most people with diabetes are put off by its side effects and don't \
use it at all.” Then we can put pressure on to bring down the cost."''']
block = gr.Blocks().queue()
with block:
with gr.Row():
gr.Markdown("## Machine-generated Text Localization (Binoculars)")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Input Article: ")
strategy = gr.Radio(["single-sentence", "multi-sentence"], label="strategy for MGT detectors", value="multi-sentence")
run_button = gr.Button(value="Run") # label="Run" for older Gradio version
with gr.Accordion("Advanced options for Binoculars", open=False):
window_size = gr.Slider(label="number of sentences per window", minimum=1, maximum=10, value=3, step=1)
threshold_low = gr.Slider(label="lower threshold for MGT", minimum=0, maximum=1, value=0.5, step=0.01)
threshold_high = gr.Slider(label="upper threshold for MGT", minimum=0, maximum=1, value=0.6, step=0.01)
examples = gr.Examples(examples=[example2, example1, example3],
inputs=[prompt],
)
with gr.Column():
result_highlight = gr.HighlightedText(label="highlight machine-generated text", show_label=True,
combine_adjacent=True,
show_legend=True,
color_map={"human": "green", "likely-machine": "yellow", "machine": "red"})
result_json = gr.Json(label="MGTL analysis json results", show_label=True,)
result_html = gr.HTML(label="MGTL HTML visualization", show_label=True)
run_button.click(fn=article_analysis, inputs=[prompt, window_size, threshold_low, threshold_high, strategy], outputs=[result_highlight, result_json])
block.launch(server_name='0.0.0.0', share=True)