In [3]:
%load_ext autoreload
%autoreload 2
import os
import copy
import numpy as np
import json
import argparse
import random
import scipy
import config
from LLAMA import LLAMA
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline
import utils_llama.activation as ana
import scipy
import math
import time
import pickle
import datasets
from collections import Counter
import torch
import torch.nn as nn
import torch.optim as optim
from itertools import chain, product


# 设置随机种子以便结果可重复
torch.manual_seed(0)
torch.cuda.empty_cache()
torch.set_grad_enabled(True)

  from .autonotebook import tqdm as notebook_tqdm
2024-02-02 15:40:25,727 - datasets - INFO - PyTorch version 2.1.0a0+29c30b1 available.


[2024-02-02 15:40:28,682] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


<torch.autograd.grad_mode.set_grad_enabled at 0x7f9e15767940>

In [4]:
class ARGS:
    def __init__(self):
        self.subject = 'S1'
        self.gpt = 'perceived'
        self.sessions = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 20]
        self.layer = 17
        self.layer2 = 18
        self.act_name = 'ffn_gate'
        self.window = 15
        self.chunk = 4

args = ARGS()

model_dir = '/ossfs/workspace/nas/gzhch/data/models/Llama-2-7b-hf'
model = AutoModelForCausalLM.from_pretrained(
    model_dir, 
    device_map='auto',
    torch_dtype=torch.float16,
).eval()

# model = None

tokenizer = AutoTokenizer.from_pretrained(model_dir)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

## load cached llm act if possible
cache_dir = '/ossfs/workspace/nas/gzhch/data/cache'
llama = LLAMA(model, tokenizer, cache_dir)

Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.87s/it]


In [5]:
def load_data(task_name, n_shot=1, seed=42):
    data_dirs = {
        'xsum' : '/ossfs/workspace/nas/gzhch/data/datasets/xsum',
        'gsm8k' : '/ossfs/workspace/nas/gzhch/data/datasets/gsm8k',
        'alpaca' : '/ossfs/workspace/nas/gzhch/data/datasets/alpaca',
        'wmt' : '/ossfs/workspace/nas/gzhch/data/datasets/wmt14_de-en_test',
        'wikitext2' : '/ossfs/workspace/nas/gzhch/data/datasets/wikitext-2-v1',
        'wikitext_dense' : '/ossfs/workspace/nas/gzhch/data/datasets/wikitext-2-v1',
        'wikitext_eval' : '/ossfs/workspace/nas/gzhch/data/datasets/wikitext-2-v1',
        'cross_language' : '/ossfs/workspace/nas/gzhch/data/datasets/wmt14_de-en_test',
    }
    if task_name == 'gsm8k':
        dataset = datasets.load_dataset(data_dirs[task_name])
    elif task_name == 'wikitext2':
        dataset = datasets.load_from_disk(data_dirs[task_name])
        dataset = dataset['train'].filter(lambda x: len(x['text'])>100) 
        dataset = dataset.select(random.sample(range(len(dataset)), 1000))

    elif task_name == 'wikitext_eval':
        dataset = datasets.load_from_disk(data_dirs[task_name])
        dataset = dataset['test'].filter(lambda x: len(x['text'])>100) 

    elif task_name == 'cross_language':
        dataset = datasets.load_from_disk(data_dirs[task_name])
        de_data = dataset.map(lambda e: dict(text=e['translation']['de']))
        en_data = dataset.map(lambda e: dict(text=e['translation']['en']))
        return en_data, de_data

    elif task_name == 'wikitext_dense':
        def tokenize_texts(examples):
            tokenized_inputs = tokenizer(examples["text"])
            return tokenized_inputs

        def group_texts(examples):
            # Concatenate all texts.
            max_length = 1024
            concatenated_examples = {k: list(chain(*examples[k])) for k in ['input_ids']}
            total_length = len(concatenated_examples['input_ids'])
            # print(len(concatenated_examples['input_ids']), '\n')
            # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
            # customize this part to your needs.
            if total_length >= max_length:
                total_length = (total_length // max_length) * max_length
            # else:
                # print('aaa')
            # Split by chunks of max_len.
            # result = {
            #     k: [t[i : i + max_length] for i in range(0, total_length, max_length)]
            #     for k, t in concatenated_examples.items()
            # }
            result = {'input_ids': [concatenated_examples['input_ids'][i : i + max_length] for i in range(0, total_length, max_length)]}
            return result

        dataset = datasets.load_from_disk(data_dirs[task_name])
        dataset = dataset.map(tokenize_texts, batched=True, num_proc=4)
        dataset = dataset.map(group_texts, batched=True, num_proc=4, remove_columns=['text', 'attention_mask'])
        dataset['train'] = dataset['train'].shuffle(seed=seed)

    return dataset

# 创建一个简单的两层全连接神经网络
class Projector(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Projector, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        # self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.act = nn.SiLU()
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.act(out)
        out = self.fc2(out)
        out = self.act(out)
        return out

class LinearProjector(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearProjector, self).__init__()
        self.fc = nn.Linear(input_size, output_size)
        # self.act = nn.SiLU()
    
    def forward(self, x):
        out = self.fc(x)
        # out = self.act(out)
        return out


@torch.no_grad()
def eval(x, y, net):
    output = net(x)
    loss = criterion(output, y)
    return loss

def deduplication(data):
    tokens = data['context'][:, 5]
    unique_tokens = []
    unique_token_ids = []
    for idx in range(len(tokens)):
        if tokens[idx] not in unique_tokens:
            unique_tokens.append(tokens[idx])
            unique_token_ids.append(idx)
    random.shuffle(unique_token_ids)
    ids = unique_token_ids

    return {k : v[ids] for k, v in data.items()}

def train(net, train_set, stim_neurons=None, resp_neurons=None, max_step=100000):
    logs = []
    # layer1, layer2 = 10, 15
    total_batch = len(train_set) // batch_size

    total_batch = min(total_batch, max_step)
    for b in range(total_batch):
        input_ids = train_set[b * batch_size: (b + 1) * batch_size]['input_ids']
        input_ids = torch.tensor(input_ids)
        input = dict(input_ids=input_ids, attention_mask=torch.ones(input_ids.shape))
        with torch.no_grad():
            res = llama.get_neuron_activation_and_loss(input)

        if stim_neurons is not None:
            X = res['ffn_gate'][:, layer1, stim_neurons].cuda().float()
        else:
            X = res['ffn_gate'][:, layer1, :].cuda().float()
        if resp_neurons is not None:
            Y = res['ffn_gate'][:, layer2, resp_neurons].cuda().float()
        else:
            Y = res['ffn_gate'][:, layer2, :].cuda().float()

        output = net(X)
        loss = criterion(output, Y)
        
        optimizer.zero_grad() 
        (loss * output.shape[1]).backward()        
        optimizer.step()       
        
        if (b+1) % 1 == 0:
            eval_loss = eval(test_X.cuda(), test_Y.cuda(), net).item()
            print(f'Epoch [{b+1}/{total_batch}], Train Loss: {loss.item():.6f}, Eval Loss: {eval_loss:.6f}')
            logs.append(f'Epoch [{b+1}/{total_batch}], Train Loss: {loss.item():.6f}, Eval Loss: {eval_loss:.6f}')
    return logs

def evaluate_ppl(eval_data, model, fake_ffn=None, num_of_batch=3, **forwrd_args):
    ppls = []
    batch_size = 100
    for b in range(num_of_batch):
        input = tokenizer(eval_data['text'][b * batch_size: (b + 1) * batch_size], padding='longest', return_tensors='pt')
        result = ana.custom_forward(model, input['input_ids'].cuda(), inspect_acts=['ffn_gate'], fake_ffn=fake_ffn, **forwrd_args)
        logits = result['logits']
        labels = input['input_ids']
        input_ids = input['input_ids'][:, :-1]

        # calculate loss
        shift_logits = logits[..., :-1, :].contiguous().view(-1, 32000)
        shift_labels = labels[..., 1:].contiguous().view(-1)
        loss_fct = torch.nn.CrossEntropyLoss(reduce=False)
        loss = loss_fct(shift_logits, shift_labels).view(labels.shape[0], -1)
        t = (loss * input['attention_mask'][:, :-1]).sum(dim=1)/input['attention_mask'].sum(dim=1)
        ppls += torch.exp(t).tolist()
    ppl = torch.nan_to_num(torch.tensor(ppls)).mean().tolist()
    return ppl

In [6]:
wiki_data = load_data('wikitext_dense')

# get test data once and for all
batch_size = 10
test_data = []
for b in range(5):
    input_ids = wiki_data['validation'][b * batch_size: (b + 1) * batch_size]['input_ids']
    input_ids = torch.tensor(input_ids)
    input = dict(input_ids=input_ids, attention_mask=torch.ones(input_ids.shape))
    with torch.no_grad():
        res = llama.get_neuron_activation_and_loss(input)
        test_data.append(res)
test_data = {k: torch.cat([i[k] for i in test_data]) for k in test_data[0].keys()}



In [7]:
def get_log(layer1, layer2):

    with open(f'/ossfs/workspace/cache_v2/{layer1}-{layer2}.txt', 'r') as f:
        logs = f.readlines()
    return logs


def get_predicted(layer1, layer2):

    with open(f'/ossfs/workspace/cache_v2/{layer1}-{layer2}.txt', 'r') as f:
        logs = f.readlines()
    # return logs

    stim_neurons = None
    resp_neurons = None

    ### get text set
    test_X, test_Y = [], []
    if stim_neurons is not None:
        test_X = test_data['ffn_gate'][:, layer1, stim_neurons].cuda().half()
    else:
        test_X = test_data['ffn_gate'][:, layer1, :].cuda().half()
    if resp_neurons is not None:
        test_Y = test_data['ffn_gate'][:, layer2, resp_neurons].cuda().half()
    else:
        test_Y = test_data['ffn_gate'][:, layer2, :].cuda().half()

    save_path = f'/ossfs/workspace/cache_v2/net_{layer1}_{layer2}.pt'
    # save_path = f'/ossfs/workspace/nas/gzhch/data/cache/llama-7b/net_{layer1}_{layer2}.pt'
    # if not os.path.exists(save_path):
    #     save_path = f'/ossfs/workspace/nas/gzhch/data/cache/llama-7b/net_{layer1}_{layer2}.pt'

    net = torch.load(save_path).half()

    pred = net(test_X)

    th = 0.6
    pred = net(test_X)
    delta = pred - test_Y
    ids = torch.nonzero(((delta.std(dim=0)) / test_Y.std(dim=0)).abs() < th).squeeze()
    return pred, test_Y, logs

In [12]:
# for layer1, layer2 in product(range(0, 32, 2), range(0, 32, 2)):
# for layer1 in range(0, 32-2, 2):
for layer1 in range(0, 31, 1):
    layer2 = layer1 + 1
    pred, test_Y, _ = get_predicted(layer1, layer2)
    neuron_pearson = []
    for i in range(pred.shape[1]):
        stat = scipy.stats.pearsonr(pred[:, i].cpu().detach(), test_Y[:, i].cpu().detach())
        neuron_pearson.append(stat.statistic)
    neuron_pearson = torch.tensor(neuron_pearson).half()
    neuron_std = (pred - test_Y).std(dim=0).cpu()
    with open(f'/ossfs/workspace/cache_v2/neuron_statistic_llama7b/{layer1}_{layer2}.pkl', 'wb') as f:
        result = dict(pearson=neuron_pearson, l2=neuron_std, Y_mean=test_Y.mean(dim=0), Y_std=test_Y.std(dim=0))
        pickle.dump(result, f)

In [13]:
block_size = 8
for layer1 in range(0, 32, block_size):
    for layer2 in range(layer1 + 1, layer1 + block_size - 1, 1):
        print(layer2 - 1, layer2)
    print('\n')

0 1
1 2
2 3
3 4
4 5
5 6


8 9
9 10
10 11
11 12
12 13
13 14


16 17
17 18
18 19
19 20
20 21
21 22


24 25
25 26
26 27
27 28
28 29
29 30




In [620]:
filters = {}
for stat_name in neuron_stat.keys():
    stat = neuron_stat[stat_name].float()
    up_half = stat > stat.median()
    down_half = ~up_half
    filters[f'{stat_name}_up'] = up_half
    filters[f'{stat_name}_down'] = down_half


In [628]:
t = test_Y * list(filters.values())[0].to(test_Y.device)

In [629]:
t.shape

torch.Size([51150, 11008])

In [503]:
## get log
results = [[] for _ in range(0, 32, 2)]
for i, layer1 in enumerate(range(0, 32, 2)):
    for j, layer2 in enumerate(range(0, 32, 2)):
        logs = get_log(layer1, layer2)
        results[i].append(float(logs[-1].split()[-1]))

KeyboardInterrupt: 

In [553]:
layer1 = 22
layer2 = 24
pred, test_Y, _ = f(layer1, layer2)

neuron_pearson = []
for i in range(pred.shape[1]):
    stat = scipy.stats.pearsonr(pred[:, i].cpu().detach(), test_Y[:, i].cpu().detach())
    neuron_pearson.append(stat.statistic)
neuron_pearson = torch.tensor(neuron_pearson)

neuron_std = (pred - test_Y).std(dim=0).cpu()

In [303]:
# indices = neuron_pearson.topk(100, largest=True).indices.cpu()
indices = neuron_std.topk(10, largest=True).indices.cpu()
neuron_id = indices
neuron_weight = model.model.layers[layer2].mlp.down_proj.weight[:, neuron_id]
lm_head = model.lm_head.weight
logit_contribution = torch.matmul(lm_head, neuron_weight.to(lm_head.device)).transpose(0, 1)
logits = logit_contribution.topk(100, dim=1).indices
# tokenizer.convert_ids_to_tokens(logit_contribution.topk(10, dim=0).indices.view(-1))

In [304]:
test

976

In [536]:
layer = 20
neurons = test_data['ffn_gate'][:, layer, :]
print(neurons.shape)

torch.Size([51150, 11008])


In [537]:
token = test_data['context'][:, 4]
unique_token_id = Counter(token.tolist())

In [554]:
id, count = unique_token_id.most_common()[0]
token_neurons = neurons[token == id]
print('token:', tokenizer.decode(id), 'token_id:', id, 'count:', count)
# avg_token_neurons = token_neurons.mean(dim=0)
# ids = (token_neurons>0.5).sum(dim=0).topk(1).indices

ids = torch.nonzero((token_neurons.abs()>0.3).sum(dim=0) > count*0.2)
print('neuron_count:', len(ids))
print(neuron_pearson[ids].mean(), neuron_pearson[ids].std())
print(neuron_pearson.mean(), neuron_pearson.std())
print(neuron_std[ids].mean(), neuron_std[ids].std())
print(neuron_std.mean(), neuron_std.std())

token:  token_id: 29871 count: 5777
neuron_count: 48
tensor(0.3103, dtype=torch.float64) tensor(0.1316, dtype=torch.float64)
tensor(0.2876, dtype=torch.float64) tensor(0.0995, dtype=torch.float64)
tensor(0.1432, dtype=torch.float16, grad_fn=<MeanBackward0>) tensor(0.0427, dtype=torch.float16, grad_fn=<StdBackward0>)
tensor(0.1321, dtype=torch.float16, grad_fn=<MeanBackward0>) tensor(0.0329, dtype=torch.float16, grad_fn=<StdBackward0>)


In [552]:
# random_mask = torch.rand(neurons.shape[0])>0.9999
count = 100
random_mask = random.sample(range(neurons.shape[0]), count)
token_neurons = neurons[random_mask]
# ids = torch.nonzero((token_neurons.abs()>0.3).sum(dim=0) > count*0.1)
ids = torch.nonzero((neurons.abs()>1).sum(dim=0)>1000)
print('neuron_count:', len(ids))
print(neuron_pearson[ids].mean(), neuron_pearson[ids].std())
print(neuron_pearson.mean(), neuron_pearson.std())
print(neuron_std[ids].mean(), neuron_std[ids].std())
print(neuron_std.mean(), neuron_std.std())

neuron_count: 51
tensor(0.2373, dtype=torch.float64) tensor(0.0713, dtype=torch.float64)
tensor(0.2259, dtype=torch.float64) tensor(0.0695, dtype=torch.float64)
tensor(0.1271, dtype=torch.float16, grad_fn=<MeanBackward0>) tensor(0.0395, dtype=torch.float16, grad_fn=<StdBackward0>)
tensor(0.1250, dtype=torch.float16, grad_fn=<MeanBackward0>) tensor(0.0432, dtype=torch.float16, grad_fn=<StdBackward0>)


In [551]:
(neurons.abs()>1).sum(dim=0).topk(100)

torch.return_types.topk(
values=tensor([20414, 16531,  8489,  6020,  4658,  3533,  3358,  2221,  2163,  2014,
         1919,  1885,  1878,  1841,  1787,  1762,  1756,  1647,  1630,  1534,
         1465,  1383,  1346,  1328,  1326,  1319,  1316,  1279,  1259,  1255,
         1236,  1234,  1232,  1225,  1196,  1196,  1190,  1176,  1166,  1165,
         1154,  1154,  1121,  1118,  1113,  1070,  1046,  1038,  1032,  1031,
         1030,   999,   999,   995,   980,   970,   966,   960,   941,   938,
          936,   928,   923,   923,   923,   914,   911,   904,   903,   879,
          859,   859,   854,   845,   843,   840,   837,   833,   830,   821,
          819,   817,   804,   800,   799,   789,   788,   770,   755,   752,
          746,   743,   735,   732,   731,   728,   726,   725,   724,   724]),
indices=tensor([ 9851,  4752,  2018,  8771,  9750,  4420, 10606,  7525,  1068,  5708,
          403,  1900,  3230,  9115,  5469,  2579,  5026,  9315,  1830, 10318,
         9671,  8074, 

In [488]:
(torch.rand(neurons.shape[0])>0.8).sum()

tensor(10186)

In [261]:
layer1 = 4
layer2 = 6
en_data, de_data = load_data('cross_language')
eval_data = en_data
fake_ffn = ana.FFNProjector(layer1, layer2, torch.load(f'/ossfs/workspace/cache_v2/net_{layer1}_{layer2}.pt'))

batch = [9]
ppls = []
batch_size = 100
for b in range(10):
    input = tokenizer(eval_data['text'][b * batch_size: (b + 1) * batch_size], padding='longest', return_tensors='pt')
    result = ana.custom_forward(model, input['input_ids'].cuda(), inspect_acts=['ffn_gate'], fake_ffn=fake_ffn)
    logits = result['logits']
    labels = input['input_ids']
    input_ids = input['input_ids'][:, :-1]

    # calculate loss
    shift_logits = logits[..., :-1, :].contiguous().view(-1, 32000)
    shift_labels = labels[..., 1:].contiguous().view(-1)
    loss_fct = torch.nn.CrossEntropyLoss(reduce=False)
    loss = loss_fct(shift_logits, shift_labels).view(labels.shape[0], -1)
    # print(loss)
    t = (loss * input['attention_mask'][:, :-1]).sum(dim=1)/input['attention_mask'].sum(dim=1)
    ppls += torch.exp(t).tolist()
ppl = torch.nan_to_num(torch.tensor(ppls)).mean().tolist()




In [563]:
type([1])==list

True

In [566]:
block_size = 8

for layer1 in range(0, 32, block_size):
    for layer2 in range(layer1+2, layer1 + block_size - 1, 2):
        print(layer2-2, layer2)
    print('\n')


0 2
2 4
4 6


8 10
10 12
12 14


16 18
18 20
20 22


24 26
26 28
28 30




1 3
3 5
5 7
7 9
9 11
11 13
13 15
15 17
17 19
19 21
21 23
23 25
25 27
27 29
29 31
