In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
# to use a specific GPU
os.environ["CUDA_VISIBLE_DEVICES"]="1,3"

In [3]:
from mgtbench import AutoDetector, AutoExperiment
from mgtbench.loading.dataloader import load

In [4]:
'''
supported LLMs and detect categories:

categories = ['Physics', 'Medicine', 'Biology', 'Electrical_engineering', 'Computer_science', 'Literature', 'History', 'Education', 'Art', 'Law', 'Management', 'Philosophy', 'Economy', 'Math', 'Statistics', 'Chemistry']

llms = ['Moonshot', 'gpt35', 'Mixtral', 'Llama3', 'gpt-4omini']
'''
data_name = 'AITextDetect'
detectLLM = 'Llama3'
category = 'Art'

In [5]:
data = load(data_name, detectLLM, category)

loading human data
loading machine data
data loaded


parsing data: 100%|██████████| 8394/8394 [00:00<00:00, 16179.58it/s]


In [6]:
def get_demo_data(data, size):
    demo = {}
    demo['train'] = {'text': data['train']['text'][:size], 'label': data['train']['label'][:size]}
    demo['test'] = {'text': data['test']['text'][:size], 'label': data['test']['label'][:size]}
    return demo

#### Model-based Detector

In [6]:
# local path to the model, or model name on huggingface
model_name_or_path = '/data1/models/distilbert-base-uncased'
metric = AutoDetector.from_detector_name('LM-D', 
                                            model_name_or_path=model_name_or_path)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at /data1/models/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


length is set to 512


In [7]:
experiment = AutoExperiment.from_experiment_name('supervised',detector=[metric])
demo = get_demo_data(data, 200)
experiment.load_data(demo)
config = {'need_finetune': True,
          'need_save': False,
          'epochs': 1, # for model-based detectors
          }
res = experiment.launch(**config)

print('==========')
print('train:', res[0].train)
print('test:', res[0].test)

Calculate result for each data point
Running prediction of detector LM-D
False




Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss


Fine-tune finished
Predict training data


100%|██████████| 200/200 [00:01<00:00, 140.43it/s]


Predict testing data


100%|██████████| 200/200 [00:01<00:00, 161.77it/s]

Run classification for results
train: Metric(acc=0.795, precision=0.7183098591549296, recall=0.9902912621359223, f1=0.8326530612244898, auc=0.9357421679511561, conf_m=None)
test: Metric(acc=0.78, precision=0.7014925373134329, recall=0.9591836734693877, f1=0.8103448275862069, auc=0.9283713485394157, conf_m=None)





#### Metric-based Detector

In [8]:
# log-likelihood detector
model_name_or_path = '/data1/zzy/gpt2-medium'
metric = AutoDetector.from_detector_name('ll',
                                         model_name_or_path=model_name_or_path)

In [9]:
experiment = AutoExperiment.from_experiment_name('threshold',detector=[metric])
demo = get_demo_data(data, 200)
experiment.load_data(demo)
res = experiment.launch()

print('==========')
print('train:', res[0].train)
print('test:', res[0].test)

Calculate result for each data point
Running prediction of detector ll
Predict training data


100%|██████████| 200/200 [00:03<00:00, 54.04it/s]


Predict testing data


100%|██████████| 200/200 [00:03<00:00, 57.39it/s]

Run classification for results
train: Metric(acc=0.835, precision=0.8301886792452831, recall=0.8543689320388349, f1=0.8421052631578947, auc=0.8934040636572915, conf_m=None)
test: Metric(acc=0.84, precision=0.8367346938775511, recall=0.8367346938775511, f1=0.8367346938775511, auc=0.9183173269307723, conf_m=None)





#### Fast-DetectGPT


In [10]:
scoring_model_name_or_path = '/data_sda/zhiyuan/models/gpt-neo-2.7B'
reference_model_name_or_path = '/data_sda/zhiyuan/models/gpt-j-6B'
fastDetectGPT = AutoDetector.from_detector_name('fast-detectGPT', 
                                            scoring_model_name_or_path=scoring_model_name_or_path,
                                            reference_model_name_or_path= reference_model_name_or_path
                                            )

Some weights of the model checkpoint at /data_sda/zhiyuan/models/gpt-j-6B were not used when initializing GPTJForCausalLM: ['transformer.h.0.attn.bias', 'transformer.h.0.attn.masked_bias', 'transformer.h.1.attn.bias', 'transformer.h.1.attn.masked_bias', 'transformer.h.10.attn.bias', 'transformer.h.10.attn.masked_bias', 'transformer.h.11.attn.bias', 'transformer.h.11.attn.masked_bias', 'transformer.h.12.attn.bias', 'transformer.h.12.attn.masked_bias', 'transformer.h.13.attn.bias', 'transformer.h.13.attn.masked_bias', 'transformer.h.14.attn.bias', 'transformer.h.14.attn.masked_bias', 'transformer.h.15.attn.bias', 'transformer.h.15.attn.masked_bias', 'transformer.h.16.attn.bias', 'transformer.h.16.attn.masked_bias', 'transformer.h.17.attn.bias', 'transformer.h.17.attn.masked_bias', 'transformer.h.18.attn.bias', 'transformer.h.18.attn.masked_bias', 'transformer.h.19.attn.bias', 'transformer.h.19.attn.masked_bias', 'transformer.h.2.attn.bias', 'transformer.h.2.attn.masked_bias', 'transforme

In [11]:
experiment = AutoExperiment.from_experiment_name('perturb', detector=[fastDetectGPT])
demo = get_demo_data(data, 100)
experiment.load_data(demo)
res = experiment.launch()

print('==========')
print('train:', res[0].train)
print('test:', res[0].test)

Calculate result for each data point
Running prediction of detector fast-detectGPT
Predict training data


Detecting: 100%|██████████| 100/100 [00:25<00:00,  3.89it/s]


Predict testing data


Detecting: 100%|██████████| 100/100 [00:26<00:00,  3.72it/s]

Run classification for results
Finding best threshold for f1 score...
train: Metric(acc=0.89, precision=0.9069767441860465, recall=0.8478260869565217, f1=0.8764044943820225, auc=0.930756843800322, conf_m=None)
test: Metric(acc=0.85, precision=0.8888888888888888, recall=0.8, f1=0.8421052631578947, auc=0.9292, conf_m=None)





#### Binocolars 


In [6]:
observer_model_name_or_path = '/data_sda/zhiyuan/models/falcon-7b'
performer_model_name_or_path = '/data_sda/zhiyuan/models/falcon-7b-instruct'

binoculars = AutoDetector.from_detector_name('Binoculars', 
                                            observer_model_name_or_path=observer_model_name_or_path,
                                            performer_model_name_or_path= performer_model_name_or_path,
                                            max_length=1024,
                                            mode='low-fpr', # accuracy (f1) or low-fpr
                                            # 'default' or 'new', default is the threshold used in the paper, 'new' is the threshold calculated on the new training set
                                            threshold='new' 
                                            )

/data_sda/zhiyuan/models/falcon-7b /data_sda/zhiyuan/models/falcon-7b-instruct


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
# threshold set for low-fpr
binoculars.change_mode('low-fpr')
experiment = AutoExperiment.from_experiment_name('threshold', detector=[binoculars])
demo = get_demo_data(data, 50)
experiment.load_data(demo)
res = experiment.launch()
print('==========')
print('train:', res[0].train)
print('test:', res[0].test)

# threshold set for f1
binoculars.change_mode('accuracy')
experiment = AutoExperiment.from_experiment_name('threshold', detector=[binoculars])
experiment.load_data(demo)
res = experiment.launch()
print('==========')
print('train:', res[0].train)
print('test:', res[0].test)

Calculate result for each data point
Running prediction of detector Binoculars
Predict training data


Detecting:   0%|          | 0/50 [00:00<?, ?it/s]

Detecting: 100%|██████████| 50/50 [00:16<00:00,  3.02it/s]


Predict testing data


Detecting: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s]


Run classification for results
Finding best threshold for low-fpr...
train: Metric(acc=0.56, precision=0.56, recall=1.0, f1=0.717948717948718, auc=0.9253246753246753, conf_m=None)
test: Metric(acc=0.56, precision=0.56, recall=1.0, f1=0.717948717948718, auc=0.9131493506493507, conf_m=None)
Calculate result for each data point
Running prediction of detector Binoculars
Predict training data


Detecting: 100%|██████████| 50/50 [00:15<00:00,  3.16it/s]


Predict testing data


Detecting: 100%|██████████| 50/50 [00:16<00:00,  2.98it/s]

Run classification for results
Finding best threshold for accuracy...
train: Metric(acc=0.88, precision=0.8666666666666667, recall=0.9285714285714286, f1=0.896551724137931, auc=0.9253246753246753, conf_m=None)
test: Metric(acc=0.86, precision=0.8620689655172413, recall=0.8928571428571429, f1=0.8771929824561403, auc=0.9131493506493507, conf_m=None)





#### RADAR

In [8]:
radar = AutoDetector.from_detector_name('RADAR')

Radar Detector is loaded


In [9]:
experiment = AutoExperiment.from_experiment_name('supervised', detector=[radar])
demo = get_demo_data(data, 200)
experiment.load_data(demo)
res = experiment.launch()

print('==========')
print('train:', res[0].train)
print('test:', res[0].test)

Calculate result for each data point
Running prediction of detector RADAR
Predict training data


100%|██████████| 200/200 [00:03<00:00, 53.85it/s]


Predict testing data


100%|██████████| 200/200 [00:03<00:00, 52.50it/s]

Run classification for results
train: Metric(acc=0.625, precision=0.7692307692307693, recall=0.3883495145631068, f1=0.5161290322580645, auc=0.7982183965569012, conf_m=None)
test: Metric(acc=0.63, precision=0.8035714285714286, recall=0.4166666666666667, f1=0.5487804878048781, auc=0.8345410628019323, conf_m=None)





#### Detect-GPT

In [7]:
model_name_or_path = '/data1/models/Llama-2-7b-chat-hf'
mask_model_name_or_path = '/data1/models/t5-base'
detectGPT = AutoDetector.from_detector_name('detectGPT', 
                                            model_name_or_path=model_name_or_path,
                                            mask_model_name_or_path= mask_model_name_or_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
experiment = AutoExperiment.from_experiment_name('perturb',detector=[detectGPT])
demo = get_demo_data(data, 10)
experiment.load_data(demo)
res = experiment.launch(n_perturbations=5)

print('==========')
print('train:', res[0].train)
print('test:', res[0].test)

Calculate result for each data point
Running prediction of detector detectGPT
Predict training data
Running perturb on the given texts


100%|██████████| 3/3 [00:47<00:00, 15.98s/it]


Perturb finished.


100%|██████████| 10/10 [00:01<00:00,  8.41it/s]
100%|██████████| 50/50 [00:05<00:00,  8.51it/s]


Predict testing data
Running perturb on the given texts


100%|██████████| 3/3 [00:52<00:00, 17.66s/it]


Perturb finished.


100%|██████████| 10/10 [00:01<00:00,  9.60it/s]
100%|██████████| 50/50 [00:05<00:00,  9.57it/s]

Run classification for results
train: Metric(acc=0.6, precision=0.5, recall=0.75, f1=0.6, auc=0.6458333333333334, conf_m=None)
test: Metric(acc=0.8, precision=0.8, recall=0.8, f1=0.8, auc=0.7200000000000002, conf_m=None)





#### DNA-gpt Detector


In [9]:
base_model_name_or_path = '/data1/zzy/gpt2-medium'
dna_gpt = AutoDetector.from_detector_name('DNA-GPT',
                                          base_model_name_or_path=base_model_name_or_path,
                                          )

In [10]:
experiment = AutoExperiment.from_experiment_name('perturb', detector=[dna_gpt])
demo = get_demo_data(data, 5)
experiment.load_data(demo)
res = experiment.launch()

print('==========')
print('train:', res[0].train)
print('test:', res[0].test)

Calculate result for each data point
Running prediction of detector DNA-GPT
Predict training data


Detecting: 100%|██████████| 5/5 [00:48<00:00,  9.67s/it]


Predict testing data


Detecting: 100%|██████████| 5/5 [00:42<00:00,  8.59s/it]

Run classification for results
Finding best threshold for accuracy...
train: Metric(acc=0.8, precision=1.0, recall=0.5, f1=0.6666666666666666, auc=0.6666666666666667, conf_m=None)
test: Metric(acc=0.8, precision=0.0, recall=0.0, f1=0.0, auc=0.75, conf_m=None)



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
