In [None]:
"""adding 2 vectors together"""

In [2]:
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
from accelerate.test_utils.testing import get_backend
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer
import nbimporter
from task_vectors import TaskVector
from safetensors.torch import load_file
from transformers import pipeline
import numpy as np
import evaluate
import pandas as pd

from transformers import GPT2LMHeadModel, GPT2TokenizerFast
from accelerate.test_utils.testing import get_backend

from transformers import GPT2LMHeadModel, GPT2TokenizerFast
from accelerate.test_utils.testing import get_backend




In [3]:
device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
model_id = "./pretrained_gpt2"
finetuned_model1_path = "./finetuned_gpt2_female_new"
finetuned_model2_path = "./finetuned_gpt2_black"
model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token  # Required for some models

In [4]:
from datasets import load_dataset

test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")

Token indices sequence length is longer than the specified maximum sequence length for this model (287644 > 1024). Running this sequence through the model will result in indexing errors


In [11]:
def normalize_weights(param_diff, eps=1e-6):
    norm = torch.norm(param_diff)
    return param_diff / (norm + eps)    
    
# Task Vector Class
class TaskVector:
    def __init__(self, pretrained_checkpoint, finetuned_checkpoint):
        self.pretrained_model = AutoModelForCausalLM.from_pretrained(pretrained_checkpoint).cpu()
        self.finetuned_model = AutoModelForCausalLM.from_pretrained(finetuned_checkpoint).cpu()
        
        self.vector = {}
        for name, param_pre in self.pretrained_model.named_parameters():
            param_fine = dict(self.finetuned_model.named_parameters())[name]
            self.vector[name] = normalize_weights(param_fine.data - param_pre.data)

    @classmethod
    def from_vector(cls, vector):
        obj = cls.__new__(cls)  # Bypass __init__
        obj.vector = vector
        obj.pretrained_model = None
        obj.finetuned_model = None
        return obj
    
    def __neg__(self):
        neg_vector = TaskVector.__new__(TaskVector)
        neg_vector.pretrained_model = self.pretrained_model
        neg_vector.finetuned_model = self.finetuned_model
        for p_pre, p_fine in zip(
            neg_vector.pretrained_model.parameters(), neg_vector.finetuned_model.parameters()
        ):
            p_fine.data = p_pre.data - p_fine.data
        return neg_vector

    def __add__(self, other):
        """Add two task vectors together."""
        with torch.no_grad():
            new_vector = {}
            for key in self.vector:
                if key not in other.vector:
                    print(f'Warning, key {key} is not present in both task vectors.')
                    continue
                new_vector[key] = self.vector[key] + other.vector[key]
        return TaskVector.from_vector(new_vector)

    def apply_to(self, pretrained_checkpoint, scaling_coef=1.0):
        """Apply a task vector to a pretrained model."""
        model = AutoModelForCausalLM.from_pretrained(pretrained_checkpoint).cpu()
        new_state_dict = {}
        pretrained_state_dict = pretrained_model.state_dict()
        with torch.no_grad():   
            for key in pretrained_state_dict:
                if key not in self.vector:
                    print(f'Warning: key {key} is present in the pretrained state dict but not in the task vector')
                    continue
                new_state_dict[key] = pretrained_state_dict[key] + scaling_coef * self.vector[key]
        pretrained_model.load_state_dict(new_state_dict, strict=False)
         
        # Clear memory here for better memory management
        self.pretrained_model.to("cpu")
        self.finetuned_model.to("cpu")
        torch.cuda.empty_cache()
        return model
    """
    def apply_to(self, base_model, scaling_coef):
        # Move only temporarily
        self.pretrained_model.to(device)
        self.finetuned_model.to(device)
        base_model = base_model.to(device)
    
        for p_base, p_pre, p_fine in zip(
            base_model.parameters(),
            self.pretrained_model.parameters(),
            self.finetuned_model.parameters(),
        ):
            p_base.data += scaling_coef * normalize_weights(p_fine.data - p_pre.data)

        # Clear memory here for better memory management
        self.pretrained_model.to("cpu")
        self.finetuned_model.to("cpu")
        torch.cuda.empty_cache()

        return base_model
    """

In [12]:
# Initialize Task Vectors
task_vector_fem = TaskVector(pretrained_checkpoint="./pretrained_gpt2", finetuned_checkpoint="./finetuned_gpt2_female_new")
task_vector_black = TaskVector(pretrained_checkpoint="./pretrained_gpt2", finetuned_checkpoint="./finetuned_gpt2_black")

new_task_vector = task_vector_fem + task_vector_black

In [43]:
# Create a new TaskVector instance without initializing it
combined_vector = TaskVector.__new__(TaskVector)
combined_vector.vector = {}

# Combine the vectors with desired scaling coefficients
for key in task_vector_fem.vector:
    combined_vector.vector[key] = (
        0.0 * task_vector_fem.vector[key] + 0.0 * task_vector_black.vector[key]
    )

In [44]:
from transformers import AutoModelForCausalLM

# Load the pretrained model
model = AutoModelForCausalLM.from_pretrained("./pretrained_gpt2")

# Apply the combined task vector
with torch.no_grad():
    for name, param in model.named_parameters():
        if name in combined_vector.vector:
            param.add_(combined_vector.vector[name])

In [45]:
from transformers import AutoTokenizer

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Prepare input
prompt = "You're a real"
inputs = tokenizer(prompt, return_tensors="pt")

# List to store generated texts
generated_texts = []

# Generate 1000 samples
model.eval()
for _ in range(1000):
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_length=50,
            num_return_sequences=1,
            do_sample=True,
            top_k=50,
            top_p=0.95
        )
        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
        generated_texts.append(generated_text)

# Save the generated texts to a CSV file
df = pd.DataFrame(generated_texts, columns=["text"])
df.to_csv("generated_samples_00fem_00black.csv", index=False)

print("1000 samples generated and saved to 'generated_samples.csv'.")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

1000 samples generated and saved to 'generated_samples.csv'.
