In [33]:
from tqdm import tqdm
import copy
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM, AutoModelForCausalLM
from sklearn.linear_model import LogisticRegression


# Here are a few different model options you can play around with:
model_name = "gpt-j"
# model_name = "gpt-j"
# model_name = "t5"

# if you want to cache the model weights somewhere, you can specify that here
cache_dir = None

if model_name == "gpt-j":
    model_type = "decoder"
    tokenizer = AutoTokenizer.from_pretrained("/home/ckqsudo/code2024/0models/gpt-j-6b", cache_dir=cache_dir)
    model = AutoModelForCausalLM.from_pretrained("/home/ckqsudo/code2024/0models/gpt-j-6b", cache_dir=cache_dir)
    model.cuda()
    # model.parallelize()
else:
    print("Not implemented!")

Some weights of the model checkpoint at /home/ckqsudo/code2024/0models/gpt-j-6b were not used when initializing GPTJForCausalLM: ['transformer.h.0.attn.bias', 'transformer.h.0.attn.masked_bias', 'transformer.h.1.attn.bias', 'transformer.h.1.attn.masked_bias', 'transformer.h.10.attn.bias', 'transformer.h.10.attn.masked_bias', 'transformer.h.11.attn.bias', 'transformer.h.11.attn.masked_bias', 'transformer.h.12.attn.bias', 'transformer.h.12.attn.masked_bias', 'transformer.h.13.attn.bias', 'transformer.h.13.attn.masked_bias', 'transformer.h.14.attn.bias', 'transformer.h.14.attn.masked_bias', 'transformer.h.15.attn.bias', 'transformer.h.15.attn.masked_bias', 'transformer.h.16.attn.bias', 'transformer.h.16.attn.masked_bias', 'transformer.h.17.attn.bias', 'transformer.h.17.attn.masked_bias', 'transformer.h.18.attn.bias', 'transformer.h.18.attn.masked_bias', 'transformer.h.19.attn.bias', 'transformer.h.19.attn.masked_bias', 'transformer.h.2.attn.bias', 'transformer.h.2.attn.masked_bias', 'tran

OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacity of 23.65 GiB of which 77.69 MiB is free. Including non-PyTorch memory, this process has 23.56 GiB memory in use. Of the allocated memory 23.08 GiB is allocated by PyTorch, and 25.56 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [31]:
# Let's just try IMDB for simplicity
data = load_dataset("/home/ckqsudo/code2024/0dataset/amazon_polarity")["test"]


In [30]:
def get_encoder_hidden_states(model, tokenizer, input_text, layer=-1):
    """
    Given an encoder model and some text, gets the encoder hidden states (in a given layer, by default the last) 
    on that input text (where the full text is given to the encoder).

    Returns a numpy array of shape (hidden_dim,)
    """
    # tokenize
    encoder_text_ids = tokenizer(input_text, truncation=True, return_tensors="pt").input_ids.to(model.device)

    # forward pass
    with torch.no_grad():
        output = model(encoder_text_ids, output_hidden_states=True)

    # get the appropriate hidden states
    hs_tuple = output["hidden_states"]
    
    hs = hs_tuple[layer][0, -1].detach().cpu().numpy()

    return hs


def get_decoder_hidden_states(model, tokenizer, input_text, layer=-1):
    """
    Given a decoder model and some text, gets the hidden states (in a given layer, by default the last) on that input text

    Returns a numpy array of shape (hidden_dim,)
    """
    # tokenize (adding the EOS token this time)
    input_ids = tokenizer(input_text + tokenizer.eos_token, return_tensors="pt").input_ids.to(model.device)
    # 主要的不同在于输入的不同，decoder 模型有EOS

    # forward pass
    with torch.no_grad():
        output = model(input_ids, output_hidden_states=True)

    # get the last layer, last token hidden states
    hs_tuple = output["hidden_states"]
    hs = hs_tuple[layer][0, -1].detach().cpu().numpy()

    return hs

def get_hidden_states(model, tokenizer, input_text, layer=-1, model_type="encoder"):
    """
    Given a model and some text, gets the hidden states (in a given layer, by default the last) on that input text
    Returns a numpy array of shape (hidden_dim,)
    使用给定的模型和分词器对输入数据进行处理，生成模型输入。
将处理后的输入数据传递给模型，获取模型的隐藏状态。
根据某些条件（可能是数据中的标签或其他特征），将隐藏状态分为负样本和正样本。
返回负样本隐藏状态、正样本隐藏状态以及对应的标签向量。
    """
    fn = {"decoder": get_decoder_hidden_states}[model_type]

    return fn(model, tokenizer, input_text, layer=layer)

In [28]:
def format_imdb(text, index):
    """
    Given an imdb example ("text") and corresponding label (0 for negative, or 1 for positive), 
    returns a zero-shot prompt for that example (which includes that label as the answer).
    
    (This is just one example of a simple, manually created prompt.)
    """
    # 下面这个电影评论表达了positive /negative 的 sentiment: 对应评论
    return "The following movie review expresses a " + ["negative", "positive"][index] + " sentiment:\n" + text


def get_hidden_states_many_examples(model, tokenizer, data, model_type, n=100):
    """
    Given an encoder-decoder model, a list of data, computes the contrast hidden states on n random examples.
    Returns numpy arrays of shape (n, hidden_dim) for each candidate label, along with a boolean numpy array of shape (n,)
    with the ground truth labels
    
    This is deliberately simple so that it's easy to understand, rather than being optimized for efficiency
    """
    # setup
    model.eval()
    # 将模型设置为推理模式（这很重要）
    all_neg_hs, all_pos_hs = [], []
    
    # 分别是错误、正确的hidden_state
    all_gt_labels=[]
    # ground_truth labels
    
    # loop
    for _ in tqdm(range(n)):
        # for simplicity, sample a random example until we find one that's a reasonable length
        # (most examples should be a reasonable length, so this is just to make sure)
        while True:
            idx = np.random.randint(len(data))
            text, true_label = data[idx]["content"], data[idx]["label"]
            # This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen! ^_^
            # 1positive
            # the actual formatted input will be longer, so include a bit of a marign
            if len(tokenizer(text)) < 400:  
                ##如果模型停不下来,
                # print("模型停不下来")
                break
                
        # get hidden states
        neg_hs = get_hidden_states(model, tokenizer, format_imdb(text, 0), model_type=model_type)
        pos_hs = get_hidden_states(model, tokenizer, format_imdb(text, 1), model_type=model_type)

        # collect
        all_neg_hs.append(neg_hs)
        all_pos_hs.append(pos_hs)
        all_gt_labels.append(true_label)
    # 维数组（或列表）沿着新的轴堆叠起来，形成一个新的多维数组。具体来说，这段代码的作用是将all_neg_hs、all_pos_hs和all_gt_labels这三个一维数组堆叠成一个新的二维数组。
    all_neg_hs = np.stack(all_neg_hs)
    all_pos_hs = np.stack(all_pos_hs)
    all_gt_labels = np.stack(all_gt_labels)
    # 这种操作通常用于将多个样本的特征或标签合并成一个大的数据集，以便进行批量处理或进一步的分析。

    return all_neg_hs, all_pos_hs, all_gt_labels

In [29]:
neg_hs, pos_hs, y = get_hidden_states_many_examples(model, tokenizer, data, model_type)

  0%|          | 0/100 [00:00<?, ?it/s]


AttributeError: 'list' object has no attribute 'to'

In [None]:
# let's create a simple 50/50 train split (the data is already randomized)
n = len(y)
# split the negative hidden states into training and testing sets
neg_hs_train, neg_hs_test = neg_hs[:n//2], neg_hs[n//2:]
# split the positive hidden states into training and testing sets
pos_hs_train, pos_hs_test = pos_hs[:n//2], pos_hs[n//2:]
# split the labels into training and testing sets
y_train, y_test = y[:n//2], y[n//2:]
# 二分数据集

# for simplicity we can just take the difference between positive and negative hidden states
# (concatenating also works fine)
x_train = neg_hs_train - pos_hs_train
# 负样本减去正样本
x_test = neg_hs_test - pos_hs_test
# 负样本减去正样本
# create a logistic regression model with balanced class weights
lr = LogisticRegression(class_weight="balanced")
# 并训练一个逻辑回归模型，然后评估该模型在测试数据上的准确率。逻辑回归是一种用于分类问题的线性模型，它通过计算输入数据的线性组合，并应用逻辑函数来预测输入数据属于某个类别的概率。

# 具体步骤如下：

# 创建逻辑回归模型：使用LogisticRegression类创建一个逻辑回归模型实例。class_weight="balanced"参数用于自动调整类别权重，使得每个类别在损失函数中的贡献与其样本数量成反比。这有助于处理类别不平衡的问题，即某些类别的样本数量远多于其他类别。
# fit the model to the training data
lr.fit(x_train, y_train)
# print the accuracy of the model on the testing data
print("Logistic regression accuracy: {}".format(lr.score(x_test, y_test)))

Logistic regression accuracy: 0.96


In [None]:
class MLPProbe(nn.Module):
    def __init__(self, d):

        super().__init__()
        # self.linear1 = nn.Linear(d, 100)：创建一个线性层 linear1，输入维度为 d，输出维度为 100。该线性层将输入数据进行线性变换，其内部包含可学习的权重和偏置。
        self.linear1 = nn.Linear(d, 100)
        # 创建另一个线性层 linear2，输入维度为 100，输出维度为 1。它接收 linear1 的输出作为输入，并将其转换为最终的输出，输出维度为 1。
        self.linear2 = nn.Linear(100, 1)

    def forward(self, x):
        # def forward(self, x):：这是 MLPProbe 类中的 forward 方法，它接收一个参数 x，该方法定义了数据在神经网络中的前向传播过程。
        h = F.relu(self.linear1(x))
        # h = F.relu(self.linear1(x))：将输入 x 通过 self.linear1 线性层进行线性变换，然后将结果通过 F.relu 激活函数进行非线性激活，得到结果 h。F.relu 是 ReLU（Rectified Linear Unit）激活函数，它将输入中小于 0 的元素置为 0，大于等于 0 的元素保持不变，有助于为神经网络引入非线性特性，增强其表达能力。
        o = self.linear2(h)
        # o = self.linear2(h)：将激活后的结果 h 作为输入，通过 self.linear2 线性层进行另一次线性变换，得到结果 o。
        # return torch.sigmoid(o)：将 o 的结果通过 torch.sigmoid 函数进行处理，将输出值压缩到 0 到 1 的范围，通常用于二分类问题中表示概率，最后将结果返回。
        return torch.sigmoid(o)

class CCS(object):
    def __init__(self, x0, x1, nepochs=1000, ntries=10, lr=1e-3, batch_size=-1, 
                 verbose=False, device="cuda", linear=True, weight_decay=0.01, var_normalize=False):
        # data这是 CCS 类的构造函数，接收多个参数，包括 x0 和 x1 作为数据，以及一些训练相关的参数，如 nepochs（训练的轮数，默认为 1000）、ntries（尝试训练的次数，默认为 10）、lr（学习率，默认为 1e-3）、batch_size（批大小，默认为 -1 表示使用全批量）、verbose（是否打印详细信息，默认为 False）、device（设备，默认为 "cuda"）、linear（是否使用线性探针，默认为 True）、weight_decay（权重衰减，默认为 0.01）和 var_normalize（是否进行方差归一化，默认为 False）。
        self.var_normalize = var_normalize
        self.x0 = self.normalize(x0)
        self.x1 = self.normalize(x1)
        self.d = self.x0.shape[-1]

        # training
        self.nepochs = nepochs
        self.ntries = ntries
        self.lr = lr
        self.verbose = verbose
        self.device = device
        self.batch_size = batch_size
        self.weight_decay = weight_decay
        
        # probe
        self.linear = linear
        self.initialize_probe()
        # self.initialize_probe()：调用 initialize_probe 方法来初始化探针，可能会根据 self.linear 的值创建不同类型的探针。
        self.best_probe = copy.deepcopy(self.probe)
        # self.best_probe = copy.deepcopy(self.probe)：使用 copy.deepcopy 对 self.probe 进行深拷贝，将结果存储在 self.best_probe 属性中，可能用于存储训练过程中性能最好的探针。

        
    def initialize_probe(self):
        if self.linear:
            # 根据 self.linear 属性的值进行判断。
            self.probe = nn.Sequential(nn.Linear(self.d, 1), nn.Sigmoid())
            # 如果 self.linear 为 True，则创建一个顺序容器 nn.Sequential，其中包含一个线性层 nn.Linear(self.d, 1) 和一个 Sigmoid 激活函数 nn.Sigmoid()，并将其赋值给 self.probe。这里的线性层将输入维度为 self.d 的数据映射到维度为 1 的输出，Sigmoid 函数将线性层的输出转换为 0 到 1 之间的值，通常用于二分类问题的概率表示。
        else:
            self.probe = MLPProbe(self.d)
            # MPL探针
        self.probe.to(self.device)    


    def normalize(self, x):
        """
        Mean-normalizes the data x (of shape (n, d))
        If self.var_normalize, also divides by the standard deviation
        """
        normalized_x = x - x.mean(axis=0, keepdims=True)
        if self.var_normalize:
            normalized_x /= normalized_x.std(axis=0, keepdims=True)

        return normalized_x

        
    def get_tensor_data(self):
        """
        Returns x0, x1 as appropriate tensors (rather than np arrays)
        """
        x0 = torch.tensor(self.x0, dtype=torch.float, requires_grad=False, device=self.device)
        x1 = torch.tensor(self.x1, dtype=torch.float, requires_grad=False, device=self.device)
        return x0, x1
    

    def get_loss(self, p0, p1):
        """
        Returns the CCS loss for two probabilities each of shape (n,1) or (n,)
        """
        informative_loss = (torch.min(p0, p1)**2).mean(0)
        consistent_loss = ((p0 - (1-p1))**2).mean(0)
        return informative_loss + consistent_loss


    def get_acc(self, x0_test, x1_test, y_test):
        """
        Computes accuracy for the current parameters on the given test inputs
        """
        x0 = torch.tensor(self.normalize(x0_test), dtype=torch.float, requires_grad=False, device=self.device)
        x1 = torch.tensor(self.normalize(x1_test), dtype=torch.float, requires_grad=False, device=self.device)
        with torch.no_grad():
            p0, p1 = self.best_probe(x0), self.best_probe(x1)
        avg_confidence = 0.5*(p0 + (1-p1))
        predictions = (avg_confidence.detach().cpu().numpy() < 0.5).astype(int)[:, 0]
        acc = (predictions == y_test).mean()
        acc = max(acc, 1 - acc)

        return acc
    
        
    def train(self):
        """
        Does a single training run of nepochs epochs
        """
        x0, x1 = self.get_tensor_data()
        permutation = torch.randperm(len(x0))
        x0, x1 = x0[permutation], x1[permutation]
        
        # set up optimizer
        optimizer = torch.optim.AdamW(self.probe.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        
        batch_size = len(x0) if self.batch_size == -1 else self.batch_size
        nbatches = len(x0) // batch_size

        # Start training (full batch)
        for epoch in range(self.nepochs):
            for j in range(nbatches):
                x0_batch = x0[j*batch_size:(j+1)*batch_size]
                x1_batch = x1[j*batch_size:(j+1)*batch_size]
            
                # probe
                p0, p1 = self.probe(x0_batch), self.probe(x1_batch)

                # get the corresponding loss
                loss = self.get_loss(p0, p1)

                # update the parameters
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        return loss.detach().cpu().item()
    
    def repeated_train(self):
        best_loss = np.inf
        for train_num in range(self.ntries):
            self.initialize_probe()
            loss = self.train()
            if loss < best_loss:
                self.best_probe = copy.deepcopy(self.probe)
                best_loss = loss
        print(best_loss)

        return best_loss

In [12]:
# Train CCS without any labels
ccs = CCS(neg_hs_train, pos_hs_train)
ccs.repeated_train()

# Evaluate
ccs_acc = ccs.get_acc(neg_hs_test, pos_hs_test, y_test)
print("CCS accuracy: {}".format(ccs_acc))



CCS accuracy: 0.6599999999999999
