In [8]:
import pandas as pd
import torch
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # 用于显示进度条
import time  # 用于计算时间
from memory_profiler import memory_usage  # 用于监控内存
import numpy as np  # 用于计算准确率


# 加载本地模型和 tokenizer
model_name = '../gpt2-imdb-sentiment-classifier'  # 替换为您的模型路径
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2ForSequenceClassification.from_pretrained(model_name)

# 检查可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


# 2. 准备数据集
data = pd.read_csv("../tinyimdb.csv")  # 替换为你的数据集文件名
texts = data['review'].tolist()  # 获取文本列表
true_labels = data['sentiment'].tolist()  # 假设真实标签在'sentiment'列中

# 3. 将真实标签映射到数字
label_mapping = {'negative': 0, 'positive': 1}
true_labels_mapped = [label_mapping[label] for label in true_labels]

# 4. 进行推理
# 将输入数据分批次处理以适应 GPU 内存
batch_size = 16  # 根据您的 GPU 内存大小调整
predictions = []

# 设置模型为评估模式
model.eval()

# 禁用梯度计算以提高推理效率
with torch.no_grad():
    start_time = time.time()  # 开始时间
    
    # 使用 tqdm 显示进度条
    for i in tqdm(range(0, len(texts), batch_size), desc="原权重推理进度"):
        batch_texts = texts[i:i + batch_size]
        inputs = tokenizer(batch_texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(device)
        
        # 进行推理
        outputs = model(**inputs)
        
        # 获取预测结果
        logits = outputs.logits
        batch_predictions = torch.argmax(logits, dim=1).cpu().numpy()  # 转换为numpy数组并移回CPU
        predictions.extend(batch_predictions)  # 将批次结果添加到总预测结果中

    end_time = time.time()  # 结束时间

# 5. 处理结果
data['predictions'] = predictions  # 将预测结果添加到数据集中

# 6. 计算准确率
accuracy = np.mean(np.array(predictions) == np.array(true_labels_mapped))  # 计算准确率
print(f"原权重准确率: {accuracy:.2f}")

# 7. 计算推理时间
inference_time = end_time - start_time
print(f"原权重推理时间: {inference_time:.2f} 秒")

# 8. 监控内存使用情况
mem_usage = memory_usage(proc=None, interval=0.1, timeout=1)
print(f"原权重内存占用: {max(mem_usage)} MB")

print("原权重推理完成")

原权重推理进度: 100%|██████████| 2/2 [00:00<00:00,  5.34it/s]


原权重准确率: 0.94
原权重推理时间: 0.38 秒
原权重内存占用: 940.046875 MB
原权重推理完成


小数据贪婪策略

In [12]:
import pandas as pd
import torch
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # 用于显示进度条
import time  # 用于计算时间
from memory_profiler import memory_usage  # 用于监控内存
import numpy as np  # 用于计算准确率

# 定义稀疏化权重的函数
def greedy_sparse_weights(weights, activation_ratio=0.5):
    abs_weights = torch.abs(weights)
    num_weights = weights.numel()
    num_active = int(num_weights * activation_ratio)
    _, indices = torch.topk(abs_weights.view(-1), num_active)
    sparse_weights = torch.zeros_like(weights)
    sparse_weights.view(-1)[indices] = weights.view(-1)[indices]
    return sparse_weights

# 加载本地模型和 tokenizer
model_name = '../gpt2-imdb-sentiment-classifier'  # 替换为您的模型路径
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2ForSequenceClassification.from_pretrained(model_name)

# 检查可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 复制模型
model_sparse = GPT2ForSequenceClassification.from_pretrained(model_name)
model_sparse.to(device)  # 移动到同一设备

# 2. 准备数据集
data = pd.read_csv("../tinyimdb.csv")  # 替换为你的数据集文件名
texts = data['review'].tolist()  # 获取文本列表
true_labels = data['sentiment'].tolist()  # 假设真实标签在'sentiment'列中

# 3. 将真实标签映射到数字
label_mapping = {'negative': 0, 'positive': 1}
true_labels_mapped = [label_mapping[label] for label in true_labels]

# 4. 进行推理
# 将输入数据分批次处理以适应 GPU 内存
batch_size = 16  # 根据您的 GPU 内存大小调整
predictions = []
predictions_sparse = []  # 用于存储稀疏模型的预测

# 设置模型为评估模式
model.eval()
model_sparse.eval()

# 禁用梯度计算以提高推理效率
with torch.no_grad():
    start_time = time.time()  # 开始时间
    
    # 使用 tqdm 显示进度条
    for i in tqdm(range(0, len(texts), batch_size), desc="原权重推理进度"):
        batch_texts = texts[i:i + batch_size]
        inputs = tokenizer(batch_texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(device)
        
        # 进行原模型推理
        outputs = model(**inputs)
        logits = outputs.logits
        batch_predictions = torch.argmax(logits, dim=1).cpu().numpy()  # 转换为numpy数组并移回CPU
        predictions.extend(batch_predictions)  # 将批次结果添加到总预测结果中

        # 对稀疏模型的权重进行稀疏化
        for layer in model_sparse.transformer.h:
            layer.attn.c_attn.weight.data = greedy_sparse_weights(layer.attn.c_attn.weight.data)

        # 进行稀疏模型推理
        outputs_sparse = model_sparse(**inputs)
        logits_sparse = outputs_sparse.logits
        batch_predictions_sparse = torch.argmax(logits_sparse, dim=1).cpu().numpy()  # 转换为numpy数组并移回CPU
        predictions_sparse.extend(batch_predictions_sparse)  # 将批次结果添加到稀疏模型的总预测结果中

    end_time = time.time()  # 结束时间

# 5. 处理结果
data['predictions'] = predictions  # 将原模型的预测结果添加到数据集中
data['predictions_sparse'] = predictions_sparse  # 将稀疏模型的预测结果添加到数据集中

# 6. 计算准确率
accuracy = np.mean(np.array(predictions) == np.array(true_labels_mapped))  # 计算原模型准确率
accuracy_sparse = np.mean(np.array(predictions_sparse) == np.array(true_labels_mapped))  # 计算稀疏模型准确率
print(f"原权重准确率: {accuracy:.2f}")
print(f"稀疏权重准确率: {accuracy_sparse:.2f}")

# 7. 计算推理时间
inference_time = end_time - start_time
print(f"推理时间: {inference_time:.2f} 秒")

# 8. 监控内存使用情况
mem_usage = memory_usage(proc=None, interval=0.1, timeout=1)
print(f"内存占用: {max(mem_usage)} MB")

print("推理完成")

原权重推理进度: 100%|██████████| 2/2 [00:00<00:00,  5.21it/s]


原权重准确率: 0.94
稀疏权重准确率: 1.00
推理时间: 0.39 秒
内存占用: 996.55078125 MB
推理完成


小数据随机策略

In [21]:
import pandas as pd
import torch
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # 用于显示进度条
import time  # 用于计算时间
from memory_profiler import memory_usage  # 用于监控内存
import numpy as np  # 用于计算准确率

def frobenius_norm(tensor):
    return torch.norm(tensor, p='fro')

def sparse_weights(weights, activation_ratio=0):
    # 随机选择70%的权重进行激活
    num_weights = weights.numel()
    num_active = int(num_weights * activation_ratio)
    
    # 获取所有权重的索引
    indices = torch.randperm(num_weights)[:num_active]
    
    # 创建一个全零的权重矩阵
    sparse_weights = torch.zeros_like(weights)
    
    # 激活随机选择的权重
    sparse_weights.view(-1)[indices] = weights.view(-1)[indices]
    return sparse_weights

def compute_outer_product(y1):
    # y1 的形状应为 (batch_size, sequence_length, hidden_size)
    batch_size, seq_length, hidden_size = y1.shape
    
    # 初始化结果张量
    outer_product = torch.zeros((batch_size, seq_length, seq_length), device=y1.device)

    # 对于每个样本，计算外积
    for b in range(batch_size):
        outer_product[b] = torch.matmul(y1[b], y1[b].transpose(0, 1))  # 计算外积

    return outer_product

def compute_inner_product(y1, y2):
    # 确保 y1 和 y2 的形状相同
    assert y1.shape == y2.shape, "y1 and y2 must have the same shape"
    
    batch_size, seq_length, hidden_size = y1.shape
    
    # 初始化结果张量
    inner_product = torch.zeros((batch_size, seq_length, seq_length), device=y1.device)

    # 对于每个样本，计算内积
    for b in range(batch_size):
        inner_product[b] = torch.matmul(y1[b], y2[b].transpose(0, 1))  # 计算内积

    return inner_product



# 自定义模型推理带随机稀疏化
def model_inference_with_random_sparsity(model_original, model_sparse, input_ids):
    hidden_states = model_original.transformer.wte(input_ids)  # 获取输入的嵌入表示
    full_activation_count = 0  # 全激活层计数

    for layer_original, layer_sparse in zip(model_original.transformer.h, model_sparse.transformer.h):
        print(f"Processing Layer {layer_original}...")

        # 原始权重
        weights = layer_original.attn.c_attn.weight

        # 计算 y1
        batch_size, seq_length = hidden_states.size(0), hidden_states.size(1)
        y1 = layer_original.attn(hidden_states)[0]  # 当前层的注意力机制得到输出
        Y1 = compute_outer_product(y1)
        print('Y1.shape',Y1.shape)

        attempts = 0
        while attempts < 20:
            attempts += 1

            # 随机稀疏化权重
            sparse_weights_matrix = sparse_weights(weights)  # 使用随机稀疏化权重
            
            # 将稀疏化后的权重应用到复制的模型中
            layer_sparse.attn.c_attn.weight.data = sparse_weights_matrix

            # 使用复制模型进行推理
            y2 = layer_sparse.attn(hidden_states)[0]  # 使用稀疏化后的权重计算输出
            Y2 = compute_inner_product(y1, y2)
            print('Y2.shape', Y2.shape)

            # 计算 Frobenius 范数
            frobenius_Y1 = frobenius_norm(Y1)
            frobenius_Y2 = frobenius_norm(Y2)

            # 检查条件
            if frobenius_Y2 >= 0 * frobenius_Y1:
                print("Condition met for Layer:", layer_original)
                hidden_states = y2  # 使用稀疏化后的输出
                break
            else:
                print(f"Attempt {attempts}: Condition not met, retrying...")

        '''if attempts == 20:
            print("Max attempts reached without meeting condition.")
            hidden_states = y1  # 如果不满足条件，使用原始输出
            full_activation_count += 1  # 全激活层计数增加'''

    return hidden_states, full_activation_count  # 返回经过所有层的最终输出和全激活层计数

# 加载本地模型和 tokenizer
model_name = '../gpt2-imdb-sentiment-classifier'  # 替换为您的模型路径
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2ForSequenceClassification.from_pretrained(model_name)

# 检查可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 复制模型
model_sparse = GPT2ForSequenceClassification.from_pretrained(model_name)
model_sparse.to(device)  # 移动到同一设备

# 准备数据集
data = pd.read_csv("../tinyimdb.csv")  # 替换为你的数据集文件名
texts = data['review'].tolist()  # 获取文本列表
true_labels = data['sentiment'].tolist()  # 假设真实标签在'sentiment'列中

# 将真实标签映射到数字
label_mapping = {'negative': 0, 'positive': 1}
true_labels_mapped = [label_mapping[label] for label in true_labels]

# 推理
batch_size = 16  # 根据您的 GPU 内存大小调整
predictions = []

model.eval()

with torch.no_grad():
    start_time = time.time()  # 开始时间
    
    full_activation_count_total = 0  # 全激活层计数总和
    
    for i in tqdm(range(0, len(texts), batch_size), desc="随机稀疏推理进度"):
        batch_texts = texts[i:i + batch_size]
        inputs = tokenizer(batch_texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(device)
        
        # 使用随机稀疏化进行推理
        hidden_states, full_activation_count = model_inference_with_random_sparsity(model, model_sparse, inputs['input_ids'])
        full_activation_count_total += full_activation_count
        
        # 计算 logits
        logits = model_sparse.forward(inputs['input_ids']).logits  # 通过模型的 forward 方法计算 logits
        batch_predictions = torch.argmax(logits, dim=1).cpu().numpy()  # 转换为numpy数组并移回CPU
        predictions.extend(batch_predictions)  # 将批次结果添加到总预测结果中

    end_time = time.time()  # 结束时间

# 处理结果
data['predictions'] = predictions  # 将预测结果添加到数据集中

# 计算准确率
accuracy = np.mean(np.array(predictions) == np.array(true_labels_mapped))  # 计算准确率
print(f"随机稀疏权重准确率: {accuracy:.2f}")

# 计算推理时间
inference_time = end_time - start_time
print(f"随机稀疏推理时间: {inference_time:.2f} 秒")

# 输出全激活层数量
print(f"全激活层数量: {full_activation_count_total}")

# 监控内存使用情况
mem_usage = memory_usage(proc=None, interval=0.1, timeout=1)
print(f"随机稀疏权重内存占用: {max(mem_usage)} MB")

print("随机稀疏推理完成")

随机稀疏推理进度:   0%|          | 0/2 [00:00<?, ?it/s]

Processing Layer GPT2Block(
  (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (attn): GPT2SdpaAttention(
    (c_attn): Conv1D(nf=2304, nx=768)
    (c_proj): Conv1D(nf=768, nx=768)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (resid_dropout): Dropout(p=0.1, inplace=False)
  )
  (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (mlp): GPT2MLP(
    (c_fc): Conv1D(nf=3072, nx=768)
    (c_proj): Conv1D(nf=768, nx=3072)
    (act): NewGELUActivation()
    (dropout): Dropout(p=0.1, inplace=False)
  )
)...
Y1.shape torch.Size([16, 512, 512])
Y2.shape torch.Size([16, 512, 512])
Condition met for Layer: GPT2Block(
  (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (attn): GPT2SdpaAttention(
    (c_attn): Conv1D(nf=2304, nx=768)
    (c_proj): Conv1D(nf=768, nx=768)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (resid_dropout): Dropout(p=0.1, inplace=False)
  )
  (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (mlp): GPT2

随机稀疏推理进度:  50%|█████     | 1/2 [00:00<00:00,  1.83it/s]

Processing Layer GPT2Block(
  (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (attn): GPT2SdpaAttention(
    (c_attn): Conv1D(nf=2304, nx=768)
    (c_proj): Conv1D(nf=768, nx=768)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (resid_dropout): Dropout(p=0.1, inplace=False)
  )
  (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (mlp): GPT2MLP(
    (c_fc): Conv1D(nf=3072, nx=768)
    (c_proj): Conv1D(nf=768, nx=3072)
    (act): NewGELUActivation()
    (dropout): Dropout(p=0.1, inplace=False)
  )
)...
Y1.shape torch.Size([1, 166, 166])
Y2.shape torch.Size([1, 166, 166])
Condition met for Layer: GPT2Block(
  (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (attn): GPT2SdpaAttention(
    (c_attn): Conv1D(nf=2304, nx=768)
    (c_proj): Conv1D(nf=768, nx=768)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (resid_dropout): Dropout(p=0.1, inplace=False)
  )
  (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (mlp): GPT2ML

随机稀疏推理进度: 100%|██████████| 2/2 [00:00<00:00,  2.43it/s]

Condition met for Layer: GPT2Block(
  (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (attn): GPT2SdpaAttention(
    (c_attn): Conv1D(nf=2304, nx=768)
    (c_proj): Conv1D(nf=768, nx=768)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (resid_dropout): Dropout(p=0.1, inplace=False)
  )
  (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (mlp): GPT2MLP(
    (c_fc): Conv1D(nf=3072, nx=768)
    (c_proj): Conv1D(nf=768, nx=3072)
    (act): NewGELUActivation()
    (dropout): Dropout(p=0.1, inplace=False)
  )
)
Processing Layer GPT2Block(
  (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (attn): GPT2SdpaAttention(
    (c_attn): Conv1D(nf=2304, nx=768)
    (c_proj): Conv1D(nf=768, nx=768)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (resid_dropout): Dropout(p=0.1, inplace=False)
  )
  (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (mlp): GPT2MLP(
    (c_fc): Conv1D(nf=3072, nx=768)
    (c_proj): Conv1D(nf=768, nx=30




随机稀疏权重内存占用: 1050.33984375 MB
随机稀疏推理完成
