# All together to calculation

# Statisitcal (Perplexity)

### GLTR

In [7]:
import torch
import time
import numpy as np
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [43]:
def top_k_logits(logits, k):
    """
    Filters logits to only the top k choices
    from https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/examples/run_gpt2.py
    """
    if k == 0:
        return logits
    values, _ = torch.topk(logits, k)
    min_values = values[:, -1]
    return torch.where(logits < min_values,
                       torch.ones_like(logits, dtype=logits.dtype) * -1e10,
                       logits)

class LM:
    def __init__(self, model_name_or_path="gpt2", device="mps"):
        self.device = device
        self.enc = GPT2Tokenizer.from_pretrained(model_name_or_path)
        self.model = GPT2LMHeadModel.from_pretrained(model_name_or_path)
        self.model.to(self.device)
        self.model.eval()
        self.start_token = self.enc(self.enc.bos_token, return_tensors='pt').data['input_ids'][0]
        print(f"Loaded GPT-2 model! on {self.device}")

    def check_probabilities(self, in_text, topk=40):
        # Process input
        token_ids = self.enc(in_text, return_tensors='pt').data['input_ids'][0]
        token_ids = torch.concat([self.start_token, token_ids])
        # Forward through the model
        output = self.model(token_ids.to(self.device))
        all_logits = output.logits[:-1].detach().squeeze()
        # construct target and pred
        # yhat = torch.softmax(logits[0, :-1], dim=-1)
        all_probs = torch.softmax(all_logits, dim=1)

        y = token_ids[1:]
        # Sort the predictions for each timestep
        sorted_preds = torch.argsort(all_probs, dim=1, descending=True).cpu()
        # [(pos, prob), ...]
        real_topk_pos = list(
            [int(np.where(sorted_preds[i] == y[i].item())[0][0])
             for i in range(y.shape[0])])
        real_topk_probs = all_probs[np.arange(
            0, y.shape[0], 1), y].data.cpu().numpy().tolist()
        real_topk_probs = list(map(lambda x: round(x, 5), real_topk_probs))

        real_topk = list(zip(real_topk_pos, real_topk_probs))
        # [str, str, ...]
        bpe_strings = self.enc.convert_ids_to_tokens(token_ids[:])

        bpe_strings = [self.postprocess(s) for s in bpe_strings]

        topk_prob_values, topk_prob_inds = torch.topk(all_probs, k=topk, dim=1)

        pred_topk = [list(zip(self.enc.convert_ids_to_tokens(topk_prob_inds[i]),
                              topk_prob_values[i].data.cpu().numpy().tolist()
                              )) for i in range(y.shape[0])]
        pred_topk = [[(self.postprocess(t[0]), t[1]) for t in pred] for pred in pred_topk]


        # pred_topk = []
        payload = {'bpe_strings': bpe_strings,
                   'real_topk': real_topk,
                   'pred_topk': pred_topk}
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        return payload

    def sample_unconditional(self, length=100, topk=5, temperature=1.0):
        '''
        Sample `length` words from the model.
        Code strongly inspired by
        https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/examples/run_gpt2.py

        '''

        # Get the token ID
        start_token_id = self.enc.encode('<|endoftext|>', add_special_tokens=False)[0]

        context = torch.full((1, 1),
                            #  self.enc.encoder[self.start_token],
                             start_token_id, 
                             device=self.device,
                             dtype=torch.long)
        prev = context
        output = context
        past = None
        # Forward through the model
        with torch.no_grad():
            for i in range(length):
                # logits, past_key_values = self.model(prev, past_key_values=past)
                outputs = self.model(prev, past_key_values=past)
                logits = outputs.logits
                past = outputs.past_key_values
                logits = logits[:, -1, :] / temperature

                # Filter predictions to topk and softmax
                probs = torch.softmax(top_k_logits(logits, k=topk),
                                      dim=-1)
                # Sample
                prev = torch.multinomial(probs, num_samples=1)
                # Construct output
                output = torch.cat((output, prev), dim=1)

        output_text = self.enc.decode(output[0].tolist())
        return output_text

    def postprocess(self, token):
        with_space = False
        with_break = False
        if token.startswith('Ġ'):
            with_space = True
            token = token[1:]
            # print(token)
        elif token.startswith('â'):
            token = ' '
        elif token.startswith('Ċ'):
            token = ' '
            with_break = True

        token = '-' if token.startswith('â') else token
        token = '“' if token.startswith('ľ') else token
        token = '”' if token.startswith('Ŀ') else token
        token = "'" if token.startswith('Ļ') else token

        if with_space:
            token = '\u0120' + token
        if with_break:
            token = '\u010A' + token

        return token


In [50]:
'''
Tests for GPT-2
'''

raw_text = """
My name is bobae bak where lived in korea. 

"""

lm = LM()
start = time.time()
payload = lm.check_probabilities(raw_text, topk=40)
end = time.time()
print("{:.2f} Seconds for a check with GPT-2".format(end - start))

start = time.time()
sample = lm.sample_unconditional()
end = time.time()
print("{:.2f} Seconds for a sample from GPT-2".format(end - start))
print("SAMPLE:", sample)

Loaded GPT-2 model! on mps
0.41 Seconds for a check with GPT-2
2.37 Seconds for a sample from GPT-2
SAMPLE: <|endoftext|>
"We are very proud that our members are now able to join our community," said Mayor Ed Murray, who announced his support for the initiative on Twitter on Tuesday.

Murray added that he was "very excited to hear about our supporters and their support of the initiative."

The city will be donating $1,500 each to the campaign, which aims to help fund the project by paying for its initial $10,000 goal.

The city's first phase of the initiative


In [48]:
payload

{'bpe_strings': ['<|endoftext|>',
  'Ċ ',
  'In',
  'Ġa',
  'Ġshocking',
  'Ġfinding',
  ',',
  'Ġscientist',
  'Ġdiscovered',
  'Ġa',
  'Ġherd',
  'Ġof',
  'Ġunic',
  'orns',
  'Ġliving',
  'Ġin',
  'Ġa',
  'Ġremote',
  ',',
  'Ġpreviously',
  'Ġunexpl',
  'ored',
  'Ġvalley',
  ',',
  'Ġin',
  'Ġthe',
  'ĠAnd',
  'es',
  'ĠMountains',
  '.',
  'ĠEven',
  'Ġmore',
  'Ġsurprising',
  'Ġto',
  'Ġthe',
  'Ġresearchers',
  'Ġwas',
  'Ġthe',
  'Ġfact',
  'Ġthat',
  'Ġthe',
  'Ġunic',
  'orns',
  'Ġspoke',
  'Ġperfect',
  'ĠEnglish',
  '.',
  'Ċ '],
 'real_topk': [(0, 0.0623),
  (2, 0.02149),
  (1, 0.09866),
  (16, 0.00723),
  (82, 0.00113),
  (0, 0.48222),
  (1516, 4e-05),
  (82, 0.00165),
  (1, 0.10015),
  (2767, 2e-05),
  (0, 0.97458),
  (869, 0.00016),
  (0, 0.99857),
  (11, 0.01713),
  (0, 0.54261),
  (0, 0.34731),
  (7, 0.01207),
  (5, 0.04114),
  (417, 0.00027),
  (2, 0.0744),
  (0, 0.99256),
  (8, 0.01505),
  (4, 0.04377),
  (1, 0.06449),
  (0, 0.17177),
  (68, 0.00269),
  (0, 0.562

In [42]:
'''
Tests for GPT-2
'''
lm = LM()
start = time.time()
payload = lm.check_probabilities(raw_text, topk=5)
end = time.time()
print("{:.2f} Seconds for a check with GPT-2".format(end - start))

start = time.time()
sample = lm.sample_unconditional()
end = time.time()
print("{:.2f} Seconds for a sample from GPT-2".format(end - start))
print("SAMPLE:", sample)

Loaded GPT-2 model! on mps
0.12 Seconds for a check with GPT-2


TypeError: string indices must be integers

In [18]:
# Define the start token (usually <|endoftext|> for GPT-2)
start_token = '<|endoftext|>'

# Get the token ID
start_token_id = enc.encode(start_token, add_special_tokens=False)[0]
start_token_id

50256

In [31]:
context = torch.full((1, 1),
        # enc.encoder[start_token],
        50256,

        device="mps",
        dtype=torch.long)

context

TypeError: full() received an invalid combination of arguments - got (tuple, list, dtype=torch.dtype, device=str), but expected one of:
 * (tuple of ints size, Number fill_value, *, tuple of names names, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, Number fill_value, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)


# DetectGPT

In [54]:
# parameters:
"""
pct_words_masked: 
    how many words masked for purturbation
    pct_words_masked * (span_length / (span_length + 2 * buffer_size))
    (default) 0.3
span_length:
    (default) 2  
base_model_name:
    target model
    (default) gpt2-medium
mask_filling_model_name:
    masking model
    (default) t5-large
"""

'\npct_words_masked: \n    how many words masked for purturbation\n    pct_words_masked * (span_length / (span_length + 2 * buffer_size))\n    (default) 0.3\nspan_length:\n    (default) 2  \nbase_model_name:\n    target model\n    (default) gpt2-medium\nmask_filling_model_name:\n    masking model\n    (default) t5-large\n  \n'

In [68]:
import sys 
sys.path.append('/Users/bobaebak/git/ai_text_detection')

from pydantic import BaseModel
from models.detectgpt import *

In [75]:
class DetectGPTArgs(BaseModel):
    base_model_name: str="gpt2"
    mask_filling_model_name: str="t5-small"
    pct_words_masked: float=0.3 
    span_length: int=2 
    n_perturbation_list: str="1,10"
    n_perturbation_rounds: int=1
    cache_dir:str = "../cache"
    n_samples: int = 1
args = DetectGPTArgs()

In [74]:
# generic generative model
# base_model, base_tokenizer = load_base_model_and_tokenizer(args.base_model_name)
base_model_kwargs = {}
optional_tok_kwargs = {}
base_model = transformers.AutoModelForCausalLM.from_pretrained(args.base_model_name, **base_model_kwargs, cache_dir=args.cache_dir)
base_tokenizer = transformers.AutoTokenizer.from_pretrained(args.base_model_name, **optional_tok_kwargs, cache_dir=args.cache_dir)
base_tokenizer.pad_token_id = base_tokenizer.eos_token_id



# mask filling t5 model
int8_kwargs = {}
half_kwargs = {}
# int8_kwargs = dict(load_in_8bit=True, device_map='auto', torch_dtype=torch.bfloat16)
# half_kwargs = dict(torch_dtype=torch.bfloat16)
mask_model = transformers.AutoModelForSeq2SeqLM.from_pretrained(args.mask_filling_model_name, **int8_kwargs, **half_kwargs, cache_dir=args.cache_dir)
n_positions = mask_model.config.n_positions
mask_tokenizer = transformers.AutoTokenizer.from_pretrained(args.mask_filling_model_name, model_max_length=n_positions, cache_dir=args.cache_dir)

n_perturbation_list = [int(x) for x in args.n_perturbation_list.split(",")]
n_perturbation_rounds = 1


for n_perturbations in n_perturbation_list:
    perturbation_results = get_perturbation_results(args.span_length, n_perturbations, args.n_samples)
    for perturbation_mode in ['d', 'z']:
        output = run_perturbation_experiment(
            perturbation_results, perturbation_mode, span_length=args.span_length, n_perturbations=n_perturbations, n_samples=n_samples)
        outputs.append(output)
        with open(os.path.join(SAVE_FOLDER, f"perturbation_{n_perturbations}_{perturbation_mode}_results.json"), "w") as f:
            json.dump(output, f)

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

# Fine tuned

In [76]:
device = "mps" # example: cuda:0
detector_path_or_id = "TrustSafeAI/RADAR-Vicuna-7B"
detector = transformers.AutoModelForSequenceClassification.from_pretrained(detector_path_or_id)
tokenizer = transformers.AutoTokenizer.from_pretrained(detector_path_or_id)
detector.eval()
detector.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
 