In [1]:
# prompt: install datasets
# !pip install datasets

In [2]:
# !pip install transformers

In [3]:
# !pip install torch numpy tqdm openai nltk matplotlib

In [4]:
# %conda activate F_GPT

# Fast Detect GPT

In [5]:

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import time
import os

def from_pretrained(cls, model_name, kwargs, cache_dir):
    local_path = os.path.join(cache_dir, 'local.' + model_name.replace("/", "_"))
    try:
        obj = cls.from_pretrained(local_path, **kwargs)
    except Exception as ex:
        print(ex)
        obj = cls.from_pretrained(model_name, **kwargs, cache_dir=cache_dir)
        obj.save_pretrained(local_path)
    return obj

# predefined models
model_fullnames = {  'gpt2': 'gpt2',
                     'gpt2-xl': 'gpt2-xl',
                     'opt-2.7b': 'facebook/opt-2.7b',
                     'gpt-neo-2.7B': 'EleutherAI/gpt-neo-2.7B',
                     'gpt-j-6B': 'EleutherAI/gpt-j-6B',
                     'gpt-neox-20b': 'EleutherAI/gpt-neox-20b',
                     'mgpt': 'sberbank-ai/mGPT',
                     'pubmedgpt': 'stanford-crfm/pubmedgpt',
                     'mt5-xl': 'google/mt5-xl',
                     'llama-13b': 'huggyllama/llama-13b',
                     'llama2-13b': 'TheBloke/Llama-2-13B-fp16',
                     'bloom-7b1': 'bigscience/bloom-7b1',
                     'opt-13b': 'facebook/opt-13b',
                     }
float16_models = ['gpt-j-6B', 'gpt-neox-20b', 'llama-13b', 'llama2-13b', 'bloom-7b1', 'opt-13b']

def get_model_fullname(model_name):
    return model_fullnames[model_name] if model_name in model_fullnames else model_name

def load_model(model_name, device, cache_dir):
    model_fullname = get_model_fullname(model_name)
    print(f'Loading model {model_fullname}...')
    model_kwargs = {}
    if model_name in float16_models:
        model_kwargs.update(dict(torch_dtype=torch.float16))
    if 'gpt-j' in model_name:
        model_kwargs.update(dict(revision='float16'))
    model = from_pretrained(AutoModelForCausalLM, model_fullname, model_kwargs, cache_dir)
    print('Moving model to GPU...', end='', flush=True)
    start = time.time()
    model.to(device)
    print(f'DONE ({time.time() - start:.2f}s)')
    return model

def load_tokenizer(model_name, for_dataset, cache_dir):
    model_fullname = get_model_fullname(model_name)
    optional_tok_kwargs = {}
    if "facebook/opt-" in model_fullname:
        print("Using non-fast tokenizer for OPT")
        optional_tok_kwargs['fast'] = False
    if for_dataset in ['pubmed']:
        optional_tok_kwargs['padding_side'] = 'left'
    else:
        optional_tok_kwargs['padding_side'] = 'right'
    base_tokenizer = from_pretrained(AutoTokenizer, model_fullname, optional_tok_kwargs, cache_dir=cache_dir)
    if base_tokenizer.pad_token_id is None:
        base_tokenizer.pad_token_id = base_tokenizer.eos_token_id
        if '13b' in model_fullname:
            base_tokenizer.pad_token_id = 0
    return base_tokenizer


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# !pip install scikit-learn

In [7]:

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, precision_recall_curve, auc

# 15 colorblind-friendly colors
COLORS = ["#0072B2", "#009E73", "#D55E00", "#CC79A7", "#F0E442",
            "#56B4E9", "#E69F00", "#000000", "#0072B2", "#009E73",
            "#D55E00", "#CC79A7", "#F0E442", "#56B4E9", "#E69F00"]


def get_roc_metrics(real_preds, sample_preds):
    fpr, tpr, _ = roc_curve([0] * len(real_preds) + [1] * len(sample_preds), real_preds + sample_preds)
    roc_auc = auc(fpr, tpr)
    return fpr.tolist(), tpr.tolist(), float(roc_auc)


def get_precision_recall_metrics(real_preds, sample_preds):
    precision, recall, _ = precision_recall_curve([0] * len(real_preds) + [1] * len(sample_preds),
                                                  real_preds + sample_preds)
    pr_auc = auc(recall, precision)
    return precision.tolist(), recall.tolist(), float(pr_auc)


In [8]:
import random

import numpy as np
import torch
import torch.nn.functional as F
import tqdm
import argparse
import json

In [9]:

def get_samples(logits, labels):
    assert logits.shape[0] == 1
    assert labels.shape[0] == 1
    nsamples = 10000
    lprobs = torch.log_softmax(logits, dim=-1)
    distrib = torch.distributions.categorical.Categorical(logits=lprobs)
    samples = distrib.sample([nsamples]).permute([1, 2, 0])
    return samples

def get_likelihood(logits, labels):
    assert logits.shape[0] == 1
    assert labels.shape[0] == 1
    labels = labels.unsqueeze(-1) if labels.ndim == logits.ndim - 1 else labels
    lprobs = torch.log_softmax(logits, dim=-1)
    log_likelihood = lprobs.gather(dim=-1, index=labels)
    return log_likelihood.mean(dim=1)

def get_sampling_discrepancy(logits_ref, logits_score, labels):
    assert logits_ref.shape[0] == 1
    assert logits_score.shape[0] == 1
    assert labels.shape[0] == 1
    if logits_ref.size(-1) != logits_score.size(-1):
        # print(f"WARNING: vocabulary size mismatch {logits_ref.size(-1)} vs {logits_score.size(-1)}.")
        vocab_size = min(logits_ref.size(-1), logits_score.size(-1))
        logits_ref = logits_ref[:, :, :vocab_size]
        logits_score = logits_score[:, :, :vocab_size]

    samples = get_samples(logits_ref, labels)
    log_likelihood_x = get_likelihood(logits_score, labels)
    log_likelihood_x_tilde = get_likelihood(logits_score, samples)
    miu_tilde = log_likelihood_x_tilde.mean(dim=-1)
    sigma_tilde = log_likelihood_x_tilde.std(dim=-1)
    discrepancy = (log_likelihood_x.squeeze(-1) - miu_tilde) / sigma_tilde
    return discrepancy.item()

def get_sampling_discrepancy_analytic(logits_ref, logits_score, labels):
    assert logits_ref.shape[0] == 1
    assert logits_score.shape[0] == 1
    assert labels.shape[0] == 1
    if logits_ref.size(-1) != logits_score.size(-1):
        # print(f"WARNING: vocabulary size mismatch {logits_ref.size(-1)} vs {logits_score.size(-1)}.")
        vocab_size = min(logits_ref.size(-1), logits_score.size(-1))
        logits_ref = logits_ref[:, :, :vocab_size]
        logits_score = logits_score[:, :, :vocab_size]

    labels = labels.unsqueeze(-1) if labels.ndim == logits_score.ndim - 1 else labels
    lprobs_score = torch.log_softmax(logits_score, dim=-1)
    probs_ref = torch.softmax(logits_ref, dim=-1)
    log_likelihood = lprobs_score.gather(dim=-1, index=labels).squeeze(-1)
    mean_ref = (probs_ref * lprobs_score).sum(dim=-1)
    var_ref = (probs_ref * torch.square(lprobs_score)).sum(dim=-1) - torch.square(mean_ref)
    discrepancy = (log_likelihood.sum(dim=-1) - mean_ref.sum(dim=-1)) / var_ref.sum(dim=-1).sqrt()
    discrepancy = discrepancy.mean()
    return discrepancy.item()



In [10]:
import random

import numpy as np
import torch
import os
import glob
import argparse
import json
import transformers
import datasets

In [11]:
# !git clone https://github.com/baoguangsheng/fast-detect-gpt.git

In [12]:
# %cd fast-detect-gpt
%cd /home/ziangcao2022/workspace/CS330/STF_CS330_FastGPT/private_support_code/fast-detect-gpt

/mnt/disks/disk/CS330/STF_CS330_FastGPT/private_support_code/fast-detect-gpt


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [13]:
%ls

[0m[34;42mCS330_FastGPT_med_cuda[0m/  [01;32mREADME.md[0m*         [34;42mlocal_infer_ref[0m/   [01;32mtemperature.sh[0m*
[01;32mFirstAgent.zip[0m*          [34;42m__pycache__[0m/       [01;32mmain.sh[0m*           [34;42mtensorboard_log[0m/
[01;32mGPT_NEW_ACT.py[0m*          [01;32mattack.sh[0m*         [01;32mmain_ext.sh[0m*       [01;32mtopk.sh[0m*
[01;32mGPT_NEW_ACT_A2C.py[0m*      [01;32mcommon_helper.py[0m*  [01;32mrequirements.txt[0m*  [01;32mtopp.sh[0m*
[01;32mGPT_NEW_ACT_DQN.py[0m*      [34;42mexp_gpt3to4[0m/       [34;42mscripts[0m/
[01;32mGPT_OLD_ACT.py[0m*          [34;42mexp_main[0m/          [01;32msetup.sh[0m*
[01;32mLICENSE[0m*                 [01;32mgpt3to4.sh[0m*        [01;32msupervised.sh[0m*


In [14]:
# reference_model_name = "gpt-j-6B"
# scoring_model_name = "gpt-neo-2.7B"

reference_model_name = "gpt2"
scoring_model_name = "gpt2"


dataset = "xsum"
ref_path = "./local_infer_ref"
device = "cuda"
cache_dir = "../cache"

In [15]:
class ProbEstimator:
    def __init__(self):
        self.real_crits = []
        self.fake_crits = []
        for result_file in glob.glob(os.path.join(ref_path, '*.json')):
            with open(result_file, 'r') as fin:
                res = json.load(fin)
                self.real_crits.extend(res['predictions']['real'])
                self.fake_crits.extend(res['predictions']['samples'])
        print(f'ProbEstimator: total {len(self.real_crits) * 2} samples.')


    def crit_to_prob(self, crit):
        offset = np.sort(np.abs(np.array(self.real_crits + self.fake_crits) - crit))[100]
        cnt_real = np.sum((np.array(self.real_crits) > crit - offset) & (np.array(self.real_crits) < crit + offset))
        cnt_fake = np.sum((np.array(self.fake_crits) > crit - offset) & (np.array(self.fake_crits) < crit + offset))
        return cnt_fake / (cnt_real + cnt_fake)

In [16]:
class FastDetectGPT:
    def __init__(self):
        self.device = device
        # load model
        self.scoring_tokenizer = load_tokenizer(scoring_model_name, dataset, cache_dir)
        self.scoring_model = load_model(scoring_model_name, device, cache_dir)
        self.scoring_model.eval()
        self.reference_model_name = reference_model_name
        self.scoring_model_name = scoring_model_name
        if self.reference_model_name != self.scoring_model_name:
            self.reference_tokenizer = load_tokenizer(self.reference_model_name, dataset, cache_dir)
            self.reference_model = load_model(self.reference_model_name, device, cache_dir)
            self.reference_model.eval()
        # evaluate criterion
        self.criterion_name = "sampling_discrepancy_analytic"
        self.criterion_fn = get_sampling_discrepancy_analytic
        self.prob_estimator = ProbEstimator()
        # input text
        print('Local demo for Fast-DetectGPT, where the longer text has more reliable result.')
        print('')

    def infer(self, text):
        # evaluate text     # (1, 112)
        tokenized = self.scoring_tokenizer(text, return_tensors="pt", padding=True, return_token_type_ids=False).to(self.device)
        labels = tokenized.input_ids[:, 1:]
        with torch.no_grad():
            logits_score = self.scoring_model(**tokenized).logits[:, :-1]
            if self.reference_model_name == self.scoring_model_name:
                logits_ref = logits_score
            else:
                tokenized = self.reference_tokenizer(text, return_tensors="pt", padding=True, return_token_type_ids=False).to(self.device)
                assert torch.all(tokenized.input_ids[:, 1:] == labels), "Tokenizer is mismatch."
                logits_ref = self.reference_model(**tokenized).logits[:, :-1]
            crit = self.criterion_fn(logits_ref, logits_score, labels)
        # estimate the probability of machine generated text
        prob = self.prob_estimator.crit_to_prob(crit)
        print(f'Fast-DetectGPT criterion is {crit:.4f}, suggesting that the text has a probability of {prob * 100:.0f}% to be fake.')
        return prob

In [17]:
detector = FastDetectGPT()

Loading model gpt2...
Moving model to GPU...DONE (1.73s)
ProbEstimator: total 1800 samples.
Local demo for Fast-DetectGPT, where the longer text has more reliable result.



In [18]:
from typing import List, Set

def model2hfname(model: str) -> str:
    return {
        "bert-tiny": "prajjwal1/bert-tiny",
        "bert-med": "prajjwal1/bert-medium",
        "small": "gpt2",
        "med": "gpt2-medium",
        "large": "gpt2-large",
        "full": "gpt2-xl",
        "gpt2-sm": "gpt2",
        "gpt2-med": "gpt2-medium",
        "gpt2-lg": "gpt2-large",
        "gpt2": "gpt2-xl",
        "neo": "EleutherAI/gpt-neo-2.7B",
    }[model]

def get_model_and_tokenizer(model: str, Cls = transformers.AutoModelForCausalLM, **model_kwargs):
    hf_model_name = model2hfname(model)

    m = Cls.from_pretrained(hf_model_name, **model_kwargs)
    if isinstance(m, transformers.GPT2LMHeadModel):
        m.transformer.gradient_checkpointing_enable()

    tok = transformers.AutoTokenizer.from_pretrained(hf_model_name)

    if tok.pad_token_id is None:
        if Cls == transformers.AutoModelForCausalLM:
            tok.pad_token = tok.eos_token
        else:
            print("Adding pad token to tokenizer")
            tok.add_special_tokens({"pad_token": "[PAD]"})
            tok.pad_token = "[PAD]"
    return m, tok


def stop_tokens(tokenizer, stop_strings: Set[str] = set(".")) -> List[int]:
    tokens = []
    for idx in range(len(tokenizer)):
        if tokenizer.decode(idx) in stop_strings:
            tokens.append(idx)
    print("Stop tokens:", tokens)
    return tokens

def ignore_tokens(tokenizer, stop_strings: Set[str] = set("\n")) -> List[int]:
    tokens = []
    for idx in range(len(tokenizer)):
        if tokenizer.decode(idx) in stop_strings:
            tokens.append(idx)
    print("Ignore tokens:", tokens)
    return tokens

def ignore_tokens_replace(tokenizer, stop_strings: Set[str] = set(" ")) -> List[int]:
    tokens = []
    for idx in range(len(tokenizer)):
        if tokenizer.decode(idx) in stop_strings:
            tokens.append(idx)
    print("Ignore tokens replaced by:", tokens)
    return tokens[0]

def top_k_logits(logits, k):
    if k == 0:
        return logits
    values, _ = torch.topk(logits, k)
    min_values = values[:, -1]
    return torch.where(logits < min_values, torch.ones_like(logits, dtype=logits.dtype) * -1e10, logits)


In [19]:
max_sample_tokens = 200
model_name = "med"
env_device = "cuda"

algorithm = "PPO"

In [20]:
import gymnasium as gym
# import gym


class LMEnv(gym.Env):
    def __init__(self, sampling_mode: str = "likelihood", topK_logistics: int=10, dataset: str="xsum", n_train:int = 256, 
    random_seed:int=42, obs_dim:int = 10):

        # Dataset
        self.random_seed = random_seed
        self.dataset = dataset
        self.n_train = n_train
        self._load_datasets()

        ## LLM
        self.max_sample_tokens = max_sample_tokens
        self.model, self.tok = get_model_and_tokenizer(model_name)
        assert isinstance(self.model, transformers.GPT2LMHeadModel)
        self.model.to(env_device)
        self.stop_tokens = stop_tokens(self.tok)
        self.ignore_tokens = ignore_tokens(self.tok)
        self.ignore_tokens_replace = ignore_tokens_replace(self.tok)
        self._seed = None
        self.vocab_size = len(self.tok)
        # Current inputs and logits
        self.initial_text = self._get_new_input()
        self.past_kvs = None
        self.topK_logistics = topK_logistics

        self.sampling_mode = sampling_mode  # "likelihood" or "argmax"
        self.purturb_mode = "argmax"
        self.input_ids = None

        ## RL: Basic Action Space and Obs Space
        # The first integer can take values 0 or 1 (2 possibilities)
        # The second integer can take values 1 to 10 (10 possibilities)
        # self.action_space = gym.spaces.MultiDiscrete([2, self.topK_logistics])
        # self.action_space = gym.spaces.MultiDiscrete([self.topK_logistics])
        self.obs_dim = obs_dim
        self.action_space = gym.spaces.MultiDiscrete([2, self.topK_logistics])


        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.obs_dim, self.topK_logistics), dtype=np.float32)

        from torch.utils.tensorboard import SummaryWriter
        self.writer = SummaryWriter(f"CS330_FastGPT_{model_name}_{env_device}/{algorithm}/OLD_Action")

        self.reset()

    
    def _feedforward(self, cur_input, past_kvs=None):
        # Change 1: Speed up feedforward by utilizing past_kvs
        """
        :param cur_input: When past_kvs = None, tensor shape [batch_size, seq_len]. When past_kvs is not None, tensor shape [batch_size, 1]
        :param past_kvs: a cache to speed up model inference
        :return returned_logits: tensor shape [batch_size, obs_dim, vocab_size] all logits up to the last point, clipped by obs_dim
        :return new_past_kvs: the new model state cache
        """
        # print("cur_input: ", cur_input.shape)
        # if cur_input.shape[-1] ==0:
        #     input()
        with torch.inference_mode():
            outputs = self.model(cur_input, past_key_values=past_kvs, use_cache=True)
            all_logits = outputs.logits
            B, S, V = all_logits.shape
            returned_logits = torch.ones(B, self.obs_dim, V).float().to(env_device)
            if S < self.obs_dim:
                returned_logits[:, self.obs_dim - S:, :] = all_logits
            else:
                returned_logits = all_logits[:, S - self.obs_dim:, :]
            new_past_kvs = outputs.past_key_values
            return returned_logits, new_past_kvs

    def _cat_new_word(self, sampled_token):
        return torch.cat((self.input_ids, sampled_token.clone().detach().long().expand(1, 1)), dim=1)    
    
    def _sample_tokens(self, local_logits):
        # Change 2: Return the new token as well as concatenated previous tokens
        """
        :param local_logits: tensor shape [batch_size, vocab_size] local logits at the last point
        :return new_token: works together with past_kvs returned from get_logits() to feed in the next round of get_logits().
        :return new_input_ids: when past_kvs = None, this would return the complete input concat with output up to this point
        """
        if self.sampling_mode == "argmax":
            sampled_token = torch.argmax(local_logits, dim=-1)
        elif self.sampling_mode == "likelihood":
            # print(local_logits.shape, x.shape)
            sampled_token = torch.multinomial(F.softmax(local_logits, dim=-1), num_samples=1).squeeze(dim=1)
            # sampled_token = torch.multinomial(x, num_samples=1).squeeze(dim=1)
        else:
            raise NotImplementedError
        
        # Replace tokens such as new line with spaces
        if sampled_token[0] in self.ignore_tokens:
            sampled_token[0] = self.ignore_tokens_replace

        new_token = sampled_token.unsqueeze(0)
        new_input_ids = self._cat_new_word(new_token)
        return new_token, new_input_ids
         
    def _perturb_tokens(self, local_logits, perturb_ranking):
        """
        :param local_logits: tensor shape [batch_size, vocab_size] local logits at the last point
        :param perturb_ranking: perturb selection of the last word
        :return new_token: the selected token to generate
        :return new_input_ids: the new input ids after the perturbation
        """
        # Get the top k predictions （1-10）
        _, topk_indices = torch.topk(local_logits, perturb_ranking)
        # Select the last item
        new_token = topk_indices[0][-1]
        new_input_ids = self._cat_new_word(new_token)
        return new_token, new_input_ids

    def _obs_wrapper(self, all_logits):
        # Sorted topk_values
        # TODO(ziangcao): add previous model parts to the observation
        topk_values, _ = torch.topk(all_logits, self.topK_logistics, dim=-1)
        # Normalize the topk_values
        topk_values = F.softmax(topk_values, dim=-1)
        # Remove batch dim
        topk_values = topk_values.squeeze(dim=0)
        return topk_values.detach().cpu().numpy()

    def _load_datasets(self):
        print("Dataset:", self.dataset)
        if self.dataset == "xsum":
            d = datasets.load_dataset(self.dataset, split="train").shuffle(seed=self.random_seed)
            filter_fn = lambda rows: [
                len(a.split(" ")) < 100 for a in rows["document"]
            ]
            d = d.filter(filter_fn, batched=True, batch_size=None)
            d = d["document"][:self.n_train]
            self.data = d
        else:
            raise NotImplementedError

    def _get_new_input(self):
        return self.data[np.random.randint(self.n_train)].replace('\n', ' ')
    
    def _sample_done(self):
        a = self.input_ids[0][-1] in self.stop_tokens
        b = self.input_ids.shape[1] >= self.max_sample_tokens
        return a or b

    def reset(self, seed: int = None):
        print("Resetting environment=============")
        ## Get a new generate starting point
        initial_text = self._get_new_input()
        self.input_ids = self.tok(initial_text, return_tensors="pt")["input_ids"].to(env_device)
        while self.input_ids.shape[-1] ==0:
            initial_text = self._get_new_input()
            self.input_ids = self.tok(initial_text, return_tensors="pt")["input_ids"].to(env_device)
        ## First 1 step
        all_logits, new_past_kvs = self._feedforward(self.input_ids)
        local_logits = all_logits[:, -1, :]
        self.last_logits = local_logits
        self.past_kvs = new_past_kvs

        _, new_input_ids = self._sample_tokens(local_logits)
        self.input_ids = new_input_ids

        obs = self._obs_wrapper(all_logits)

        # reset_info = None  # or reset_info = {} if you prefer
        reset_info = {"TimeLimit.truncated": False,}  # or reset_info = {} if you prefer
        return obs, reset_info
        # return obs

    def get_text(self):
        return self.tok.decode(torch.squeeze(self.input_ids, dim=0))

    def step(self, action):
        reward = 0.
        # Parse Action
        perturb = action[0]
        ## perturb_ranking: 10 options -- shift the choice from 0-9 toward 1-10
        perturb_ranking = action[1] + 1

        # ## perturb_ranking: 10 options -- shift the choice from 0-9 toward 1-10
        # perturb_ranking = action[1] + 1

        sampled_token, sampled_output = self._sample_tokens(self.last_logits)

        if not perturb:
            self.input_ids = sampled_output
            cur_input = sampled_token
            # print("Raw: ", reward)
            prob_drop = 0.
            sampled_score = 0.
            perturbed_score = 0.
        else:
            reward -= 1. # Cost of applying perturb
            # TODO(ziangcao): better give a large value instead of 1
            _, perturbed_output = self._perturb_tokens(self.last_logits, perturb_ranking)

            # Record Scores -- prob
            print()
            sampled_score = detector.infer(self.tok.decode(torch.squeeze(sampled_output, dim=0)))
            perturbed_score = detector.infer(self.tok.decode(torch.squeeze(perturbed_output, dim=0)))

            assert sampled_score>=0
            assert perturbed_score>=0

            reward += (sampled_score-perturbed_score) * 100. # Benefits of applying perturb

            self.input_ids = perturbed_output
            cur_input = self.input_ids
            self.past_kvs = None

            # print("Perturbed: ", reward)
        

        idx = self.input_ids.shape[1]
        prob_drop = sampled_score-perturbed_score

        self.writer.add_scalar("perturb", perturb, idx)
        self.writer.add_scalar("reward", reward, idx)
        self.writer.add_scalar("Prob_Drop", prob_drop, idx)
        self.writer.add_scalar("sampled_score", sampled_score, idx)
        self.writer.add_scalar("perturbed_score", perturbed_score, idx)


        ## GET NEW OBS
        all_logits, new_past_kvs = self._feedforward(cur_input, self.past_kvs)
        local_logits = all_logits[:, -1, :]
        self.last_logits = local_logits
        self.past_kvs = new_past_kvs

        obs = self._obs_wrapper(all_logits)

        info = {"TimeLimit.truncated": False,}

        done = self._sample_done()

        # If your environment does not have a concept of truncation, you can set truncated to the same value as done
        truncated = done
        return obs, reward, done, truncated, info
        # return obs, reward, done, info

    
    def seed(self, seed=None):
        self._seed = seed
        pass
    


In [21]:
from stable_baselines3 import PPO, SAC
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback, BaseCallback
from stable_baselines3.common.utils import obs_as_tensor, safe_mean, set_random_seed
from stable_baselines3.common.monitor import Monitor

In [22]:
from stable_baselines3.common.vec_env.subproc_vec_env import  SubprocVecEnv, _flatten_obs
from stable_baselines3.common.vec_env.dummy_vec_env import DummyVecEnv

from stable_baselines3.common.env_checker import check_env

def init_env_for_agent_training(n_envs: int=1):
    def make_env():
        def _make_env():
            env=LMEnv()
            check_env(env)

            return env
        
        if n_envs == -1:
            return _make_env()
        else:
            return _make_env()

    if n_envs == -1:
        return make_env()
    if n_envs == 1:
        return DummyVecEnv([make_env for _ in range(n_envs)])
    else:
        return SubprocVecEnv([make_env for _ in range(n_envs)])


In [23]:
vec_env = init_env_for_agent_training()

Dataset: xsum
Stop tokens: [13, 764]
Ignore tokens: [198]
Ignore tokens replaced by: [220]

Fast-DetectGPT criterion is 0.2211, suggesting that the text has a probability of 8% to be fake.
Fast-DetectGPT criterion is 0.3144, suggesting that the text has a probability of 15% to be fake.

Fast-DetectGPT criterion is -0.5149, suggesting that the text has a probability of 2% to be fake.
Fast-DetectGPT criterion is -0.5284, suggesting that the text has a probability of 3% to be fake.





Fast-DetectGPT criterion is -0.3197, suggesting that the text has a probability of 0% to be fake.
Fast-DetectGPT criterion is -0.3933, suggesting that the text has a probability of 2% to be fake.

Fast-DetectGPT criterion is -0.2048, suggesting that the text has a probability of 3% to be fake.
Fast-DetectGPT criterion is -0.0838, suggesting that the text has a probability of 5% to be fake.

Fast-DetectGPT criterion is -0.1472, suggesting that the text has a probability of 5% to be fake.
Fast-DetectGPT criterion is 0.0262, suggesting that the text has a probability of 7% to be fake.

Fast-DetectGPT criterion is 0.0139, suggesting that the text has a probability of 7% to be fake.
Fast-DetectGPT criterion is 0.0597, suggesting that the text has a probability of 7% to be fake.

Fast-DetectGPT criterion is 0.1518, suggesting that the text has a probability of 7% to be fake.
Fast-DetectGPT criterion is 0.0561, suggesting that the text has a probability of 7% to be fake.


In [24]:
if algorithm=="PPO":
    model = PPO("MlpPolicy", vec_env, verbose=1, tensorboard_log="./tensorboard_log")
    model.learn(total_timesteps=20000, tb_log_name="old_ActionSpace")
    # model.save("FirstAgent")

Using cuda device
Logging to ./tensorboard_log/old_ActionSpace_1



Fast-DetectGPT criterion is 0.3550, suggesting that the text has a probability of 14% to be fake.
Fast-DetectGPT criterion is 0.6357, suggesting that the text has a probability of 28% to be fake.

Fast-DetectGPT criterion is 0.6855, suggesting that the text has a probability of 27% to be fake.
Fast-DetectGPT criterion is 0.5002, suggesting that the text has a probability of 19% to be fake.

Fast-DetectGPT criterion is 0.5640, suggesting that the text has a probability of 25% to be fake.
Fast-DetectGPT criterion is 0.5978, suggesting that the text has a probability of 25% to be fake.

Fast-DetectGPT criterion is 0.5833, suggesting that the text has a probability of 25% to be fake.
Fast-DetectGPT criterion is 0.5456, suggesting that the text has a probability of 22% to be fake.

Fast-DetectGPT criterion is 0.5966, suggesting that the text has a probability of 24% to be fake.
Fast-DetectGPT criterion is 0.5197, suggesting that the text has a probability of 20% to be fake.

Fast-DetectGPT

KeyboardInterrupt: 

: 

In [None]:
pwd

'/mnt/disks/disk/CS330/STF_CS330_FastGPT/private_support_code/fast-detect-gpt'