In [None]:
# Ref:
# https://huggingface.co/docs/transformers/perplexity
# https://github.com/BurhanUlTayyab/DetectGPT,
# @misc{mitchell2023detectgpt,
#     url = {https://arxiv.org/abs/2301.11305},
#     author = {Mitchell, Eric and Lee, Yoonho and Khazatsky, Alexander and Manning, Christopher D. and Finn, Chelsea},
#     title = {DetectGPT: Zero-Shot Machine-Generated Text Detection using Probability Curvature},
#     publisher = {arXiv},
#     year = {2023},
# }

In [None]:
!pip -q install transformers==4.26.0
!pip -q install sentencepiece
!pip -q install accelerate

In [None]:
import time
import torch
import itertools
import math
import numpy as np
import random
import re
import transformers
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
from transformers import pipeline
from transformers import T5Tokenizer
from transformers import AutoTokenizer

from collections import OrderedDict

from scipy.stats import norm
from multiprocessing.pool import ThreadPool

torch.manual_seed(0)
np.random.seed(0)

class DetectGPT:
    """
      This model is based on Standford's DetectGPT https://arxiv.org/abs/2301.11305,
      which determines whether an input is generated by AI (GPT-2).

      The model is based on the assumption that: minor rewrites of AI-generated
      text tend to have lower log likelihood under the model than the original sample,
      while minor rewrites of human-written text may have higher or lower log likelihood
      than the original sample.

      The model first generates minor perturbations(rewrites) of input sample,
      then computes the log likelihood of the perturbation and the original sample,
      if the average log ratio is high, the sample is likely generated from the source model.

      The perturbations are generated by T5-small model,
      then the log likelihood is scored based on GPT-2.

      chuck_size: size of each perturbations (number of tokens)
      stride: number of tokens forward to move after each prediction. 1 for the most precise result,
              but to reduce computation time, move by larger strides.
      threshold: if average log ratio is greater than threshold, then it is generated by AI
    """
    def __init__(self, device="cuda", chuck_size = 20, stride = 100, threshold = 0.7):
        self.device = device
        self.score_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
        self.score_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")

        self.chuck_size = chuck_size
        self.stride = stride
        self.threshold = threshold

        self.t5_model = transformers.AutoModelForSeq2SeqLM.from_pretrained("t5-small").to(device)
        self.t5_tokenizer = T5Tokenizer.from_pretrained("t5-small", model_max_length=512)

    def run(self, sentence):
        sentence = re.sub("\[[0-9]+\]", "", sentence)

        words = re.split("[ \n]", sentence)
        groups = len(words) // self.chuck_size + 1
        lines = []
        stride = len(words) // groups + 1
        for i in range(0, len(words), stride):
            start_pos = i
            end_pos = min(i+stride, len(words))

            selected_text = " ".join(words[start_pos:end_pos])
            selected_text = selected_text.strip()
            if selected_text == "":
                continue

            lines.append(selected_text)


        # sentence by sentence
        offset = ""
        scores = []
        probs = []
        labels = []
        for line in lines:
            if re.search("[a-zA-Z0-9]+", line) == None:
                continue
            score, diff, sd = self.getScore(line)
            if score == -1 or math.isnan(score):
                continue
            scores.append(score)

            if score > self.threshold:
                labels.append(1)
                prob = "{:.2f}%\n(A.I.)".format(norm.cdf(abs(self.threshold - score)) * 100)
                probs.append(prob)
            else:
                labels.append(0)
                prob = "{:.2f}%\n(Human)".format(norm.cdf(abs(self.threshold - score)) * 100)
                probs.append(prob)

        mean_score = sum(scores)/len(scores)
        mean_prob = norm.cdf(abs(self.threshold - mean_score)) * 100
        label = 0 if mean_score > self.threshold else 1

        print("probs: ", probs)
        print("labels: ", labels)
        print("scores: ", scores)

        print("mean_prob: ", mean_prob)
        print("mean_score: ", mean_score)
        print("label: ", label)

        print(f"probability for {'A.I.' if label == 0 else 'Human'}:", "{:.2f}%".format(mean_prob))
        return {"prob": "{:.2f}%".format(mean_prob), "label": label}, self.getVerdict(mean_score)

    def getScore(self, sentence):
        original_sentence = sentence
        sentence_length = len(list(re.finditer("[^\d\W]+", sentence)))
        #sentences = self.mask(original_sentence, original_sentence, n=50, remaining=50)
        sentences = self.mask(original_sentence, original_sentence)

        real_log_likelihood = self.getLogLikelihood(original_sentence)

        generated_log_likelihoods = []
        for sentence in sentences:
            generated_log_likelihoods.append(self.getLogLikelihood(sentence).cpu().detach().numpy())

        if len(generated_log_likelihoods) == 0:
            return -1

        generated_log_likelihoods = np.asarray(generated_log_likelihoods)
        mean_generated_log_likelihood = np.mean(generated_log_likelihoods)
        std_generated_log_likelihood = np.std(generated_log_likelihoods)

        diff = real_log_likelihood - mean_generated_log_likelihood

        score = diff/(std_generated_log_likelihood)

        return float(score), float(diff), float(std_generated_log_likelihood)

    def getLogLikelihood(self, sentence):
        encodings = self.score_tokenizer(sentence, return_tensors="pt")
        seq_len = encodings.input_ids.size(1)

        nlls = []
        prev_end_loc = 0
        for begin_loc in range(0, seq_len, self.stride):
            end_loc = min(begin_loc + self.score_model.config.n_positions, seq_len)
            trg_len = end_loc - prev_end_loc
            input_ids = encodings.input_ids[:, begin_loc:end_loc].to(self.device)
            target_ids = input_ids.clone()
            target_ids[:, :-trg_len] = -100

            with torch.no_grad():
                outputs = self.score_model(input_ids, labels=target_ids)

                neg_log_likelihood = outputs.loss * trg_len

            nlls.append(neg_log_likelihood)

            prev_end_loc = end_loc
            if end_loc == seq_len:
                break
        return -1 * torch.stack(nlls).sum() / end_loc

    def apply_extracted_fills(self, masked_texts, extracted_fills):
        texts = []
        for idx, (text, fills) in enumerate(zip(masked_texts, extracted_fills)):
            tokens = list(re.finditer("<extra_id_\d+>", text))
            if len(fills) < len(tokens):
                continue

            offset = 0
            for fill_idx in range(len(tokens)):
                start, end = tokens[fill_idx].span()
                text = text[:start+offset] + fills[fill_idx] + text[end+offset:]
                offset = offset - (end - start) + len(fills[fill_idx])
            texts.append(text)

        return texts

    def unmasker(self, text, num_of_masks):
        num_of_masks = max(num_of_masks)
        stop_id = self.t5_tokenizer.encode(f"<extra_id_{num_of_masks}>")[0]
        tokens = self.t5_tokenizer(text, return_tensors="pt", padding=True)
        for key in tokens:
            tokens[key] = tokens[key].to(self.device)

        output_sequences = self.t5_model.generate(**tokens, max_length=512, do_sample=True, top_p=0.96, num_return_sequences=1, eos_token_id=stop_id)
        results = self.t5_tokenizer.batch_decode(output_sequences, skip_special_tokens=False)

        texts = [x.replace("<pad>", "").replace("</s>", "").strip() for x in results]
        pattern = re.compile("<extra_id_\d+>")
        extracted_fills = [pattern.split(x)[1:-1] for x in texts]
        extracted_fills = [[y.strip() for y in x] for x in extracted_fills]

        perturbed_texts = self.apply_extracted_fills(text, extracted_fills)

        return perturbed_texts

    def replaceMask(self, text, num_of_masks):
        with torch.no_grad():
            list_generated_texts = self.unmasker(text, num_of_masks)

        return list_generated_texts

    # code took reference from https://github.com/eric-mitchell/detect-gpt
    def maskRandomWord(self, text, ratio):
        span = 2
        tokens = text.split(' ')
        mask_string = '<<<mask>>>'

        n_spans = ratio//(span + 2)

        n_masks = 0
        while n_masks < n_spans:
            start = np.random.randint(0, len(tokens) - span)
            end = start + span
            search_start = max(0, start - 1)
            search_end = min(len(tokens), end + 1)
            if mask_string not in tokens[search_start:search_end]:
                tokens[start:end] = [mask_string]
                n_masks += 1

        # replace each occurrence of mask_string with <extra_id_NUM>, where NUM increments
        num_filled = 0
        for idx, token in enumerate(tokens):
            if token == mask_string:
                tokens[idx] = f'<extra_id_{num_filled}>'
                num_filled += 1
        assert num_filled == n_masks, f"num_filled {num_filled} != n_masks {n_masks}"
        text = ' '.join(tokens)
        return text, n_masks

    def multiMaskRandomWord(self, text, ratio, n):
        mask_texts = []
        list_num_of_masks = []
        for i in range(n):
            mask_text, num_of_masks = self.maskRandomWord(text, ratio)
            mask_texts.append(mask_text)
            list_num_of_masks.append(num_of_masks)
        return mask_texts, list_num_of_masks

    def getGeneratedTexts(self, args):
        original_text = args[0]
        n = args[1]
        texts = list(re.finditer("[^\d\W]+", original_text))
        ratio = int(0.3 * len(texts))

        mask_texts, list_num_of_masks = self.multiMaskRandomWord(original_text, ratio, n)
        list_generated_sentences = self.replaceMask(mask_texts, list_num_of_masks)
        return list_generated_sentences

    def mask(self, original_text, text, n=50, remaining=50):
        """
        text: string representing the sentence
        n: top n mask-filling to be choosen
        remaining: The remaining slots to be fill
        """

        if remaining <= 0:
            return []

        torch.manual_seed(0)
        np.random.seed(0)
        start_time = time.time()
        out_sentences = []
        pool = ThreadPool(remaining//n)
        out_sentences = pool.map(self.getGeneratedTexts, [(original_text, n) for _ in range(remaining//n)])
        out_sentences = list(itertools.chain.from_iterable(out_sentences))
        end_time = time.time()

        return out_sentences

    def getVerdict(self, score):
        if score < self.threshold:
            return "This text is most likely written by an Human"
        else:
            return "This text is most likely generated by an A.I."

In [None]:
detector = DetectGPT(device = "cpu")

In [None]:
human_text = """
All children, except one, grow up. They soon know that they will grow up, and the way Wendy knew was this. One day when she was two years old she was playing in a garden, and she plucked another flower and ran with it to her mother. I suppose she must have looked rather delightful, for Mrs. Darling put her hand to her heart and cried, “Oh, why can’t you remain like this for ever!” This was all that passed between them on the subject, but henceforth Wendy knew that she must grow up. You always know after you are two. Two is the beginning of the end.  Of course they lived at 14, and until Wendy came her mother was the chief one. She was a lovely lady, with a romantic mind and such a sweet mocking mouth. Her romantic mind was like the tiny boxes, one within the other, that come from the puzzling East, however many you discover there is always one more; and her sweet mocking mouth had one kiss on it that Wendy could never get, though there it was, perfectly conspicuous in the right-hand corner.  The way Mr. Darling won her was this: the many gentlemen who had been boys when she was a girl discovered simultaneously that they loved her, and they all ran to her house to propose to her except Mr. Darling, who took a cab and nipped in first, and so he got her. He got all of her, except the innermost box and the kiss. He never knew about the box, and in time he gave up trying for the kiss. Wendy thought Napoleon could have got it, but I can picture him trying, and then going off in a passion, slamming the door.  Mr. Darling used to boast to Wendy that her mother not only loved him but respected him. He was one of those deep ones who know about stocks and shares. Of course no one really knows, but he quite seemed to know, and he often said stocks were up and shares were down in a way that would have made any woman respect him.
"""

In [None]:
detector.run(human_text)

probs:  ['61.60%\n(Human)', '50.27%\n(Human)', '58.70%\n(Human)', '56.33%\n(Human)', '67.32%\n(Human)', '79.97%\n(Human)', '68.78%\n(Human)', '70.83%\n(Human)', '63.30%\n(Human)', '61.54%\n(Human)', '71.88%\n(Human)', '56.16%\n(Human)', '72.78%\n(Human)', '81.38%\n(Human)', '59.67%\n(A.I.)', '60.55%\n(Human)', '71.77%\n(Human)', '52.78%\n(A.I.)', '61.79%\n(A.I.)']
labels:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1]
scores:  [0.4048844873905182, 0.6931112408638, 0.48024046421051025, 0.5405792593955994, 0.25136443972587585, -0.1406182199716568, 0.21025952696800232, 0.15155057609081268, 0.360275536775589, 0.40649646520614624, 0.12058395147323608, 0.5450475215911865, 0.09394136071205139, -0.19211921095848083, 0.9449411034584045, 0.4324142038822174, 0.12408289313316345, 0.769826352596283, 1.0]
mean_prob:  62.597732501520184
mean_score:  0.3787822080285926
label:  1
probability for Human: 62.60%


({'prob': '62.60%', 'label': 1},
 'This text is most likely written by an Human')

In [None]:
gpt3_text = """
In Aldous Huxley's novel "Brave New World," Mustapha Mond is portrayed as a powerful and mysterious figure. The novel depicts a dystopian society in which the government, led by Mond, maintains strict control over its citizens through the use of advanced technology and manipulation of emotions. Despite this, I argue that Mond should be viewed positively for three key reasons: his efforts to maintain stability in society, his recognition of the limitations of happiness, and his belief in individual freedom.  Firstly, Mond's role as World Controller is to maintain stability in society. He recognizes that in order for society to function, there must be a balance between individual desires and the needs of the community. He also understands that in order to maintain this balance, it is necessary to control certain aspects of society, such as the use of technology and the manipulation of emotions. This is evident in his decision to ban literature, which he believes will cause dissent and disrupt the stability of society. In this way, Mond can be seen as a pragmatic leader who is willing to make difficult decisions for the greater good.  Secondly, Mond recognizes the limitations of happiness. In the novel, the government encourages the citizens to pursue pleasure and happiness at all times, but Mond understands that this is not a sustainable or fulfilling way of life. He acknowledges that true happiness cannot be found through constant pleasure and that individuals need to find meaning and purpose in their lives. This is evident in his statement, "ending is better than mending. The more stitches, the less riches." This quote shows that Mond recognizes that true happiness cannot be found in constant pleasure, but rather in finding meaning in one's life.  Lastly, Mond believes in individual freedom. Despite his role in controlling society, he recognizes that individuals have the right to make their own choices and live their lives as they see fit. This is evident in his decision to provide a reservation for those who do not want to conform to the rules of society. This shows that Mond understands that individuals should have the freedom to live their lives as they choose, even if it means going against the norms of society.  In conclusion, Mustapha Mond, the World Controller in Aldous Huxley's "Brave New World," should be viewed positively for his efforts to maintain stability in society, his recognition of the limitations of happiness and his belief in individual freedom. Although his methods are controversial, they are necessary to maintain the balance of society. His recognition of the limitations of happiness and his belief in individual freedom also show that he is a nuanced and thoughtful leader who understands the complexity of human nature.
"""

In [None]:
detector.run(gpt3_text)

probs:  ['50.86%\n(A.I.)', '65.41%\n(Human)', '76.18%\n(Human)', '60.78%\n(Human)', '88.45%\n(Human)', '62.19%\n(Human)', '54.21%\n(A.I.)', '54.91%\n(Human)', '74.74%\n(Human)', '69.41%\n(Human)', '83.73%\n(Human)', '58.54%\n(Human)', '51.61%\n(A.I.)', '69.59%\n(Human)', '75.56%\n(Human)', '65.85%\n(Human)', '53.05%\n(A.I.)', '70.88%\n(Human)', '63.61%\n(Human)', '56.11%\n(Human)', '62.52%\n(Human)', '56.67%\n(Human)', '62.64%\n(A.I.)']
labels:  [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1]
scores:  [0.7215920686721802, 0.3034645617008209, -0.012062099762260914, 0.42642199993133545, -0.4977119266986847, 0.38957303762435913, 0.8056710362434387, 0.5766503214836121, 0.033785343170166016, 0.192595973610878, -0.28360387682914734, 0.48418810963630676, 0.7403052449226379, 0.1873776614665985, 0.0076712737791240215, 0.2915721833705902, 0.7764610648155212, 0.1501644253730774, 0.35187166929244995, 0.5461698770523071, 0.38077831268310547, 0.5321094989776611, 1.0224411487579

({'prob': '63.56%', 'label': 1},
 'This text is most likely written by an Human')

In [None]:
gpt2_text = """
The scientist named the population, after their distinctive horn, Ovid’s Unicorn. These four-horned, silver-white unicorns were previously unknown to science.  Now, after almost two centuries, the mystery of what sparked this odd phenomenon is finally solved.  Dr. Jorge Pérez, an evolutionary biologist from the University of La Paz, and several companions, were exploring the Andes Mountains when they found a small valley, with no other animals or humans. Pérez noticed that the valley had what appeared to be a natural fountain, surrounded by two peaks of rock and silver snow.  Pérez and the others then ventured further into the valley. “By the time we reached the top of one peak, the water looked blue, with some crystals on top,” said Pérez.  Pérez and his friends were astonished to see the unicorn herd. These creatures could be seen from the air without having to move too much to see them – they were so close they could touch their horns.  While examining these bizarre creatures the scientists discovered that the creatures also spoke some fairly regular English. Pérez stated, “We can see, for example, that they have a common ‘language,’ something like a dialect or dialectic.”  Dr. Pérez believes that the unicorns may have originated in Argentina, where the animals were believed to be descendants of a lost race of people who lived there before the arrival of humans in those parts of South America.  While their origins are still unclear, some believe that perhaps the creatures were created when a human and a unicorn met each other in a time before human civilization. According to Pérez, “In South America, such incidents seem to be quite common.”  However, Pérez also pointed out that it is likely that the only way of knowing for sure if unicorns are indeed the descendants of a lost alien race is through DNA. “But they seem to be able to communicate in English quite well, which I believe is a sign of evolution, or at least a change in social organization,” said the scientist.
"""

In [None]:
detector.run(gpt2_text)

probs:  ['80.64%\n(Human)', '66.05%\n(Human)', '76.69%\n(Human)', '75.90%\n(Human)', '76.90%\n(Human)', '78.77%\n(Human)', '75.26%\n(Human)', '51.65%\n(A.I.)', '80.77%\n(Human)', '72.58%\n(Human)', '65.50%\n(Human)', '58.80%\n(A.I.)', '73.70%\n(Human)', '61.17%\n(Human)', '67.95%\n(Human)', '62.27%\n(Human)', '62.47%\n(A.I.)', '61.79%\n(A.I.)']
labels:  [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1]
scores:  [-0.1647581160068512, 0.2862550914287567, -0.028605831786990166, -0.003081496339291334, -0.03552785888314247, -0.0983869805932045, 0.017453771084547043, 0.7414236068725586, -0.16943992674350739, 0.09976842254400253, 0.3012216091156006, 0.9224386215209961, 0.06602905690670013, 0.4162788689136505, 0.2337300181388855, 0.3874930441379547, 1.017975926399231, 1.0]
mean_prob:  66.3765870254372
mean_score:  0.27723710148388314
label:  1
probability for Human: 66.38%


({'prob': '66.38%', 'label': 1},
 'This text is most likely written by an Human')

In [None]:
detector.run(gpt2_text)

probs:  ['55.41%\n(A.I.)', '58.75%\n(Human)', '63.61%\n(Human)', '56.05%\n(Human)', '65.73%\n(Human)', '75.34%\n(Human)', '61.79%\n(A.I.)']
labels:  [1, 0, 0, 0, 0, 0, 1]
scores:  [0.836114764213562, 0.4789677560329437, 0.35201725363731384, 0.5477941036224365, 0.2949652075767517, 0.01479245349764824, 1.0]
mean_prob:  57.78821123364709
mean_score:  0.5035216483686652
label:  1
probability for Human: 57.79%


({'prob': '57.79%', 'label': 1},
 'This text is most likely written by an Human')