In [1]:
#Download Dataset
from datasets import load_dataset

# Load the IMDb dataset
dataset = load_dataset("imdb")

# Use only the training split for fine-tuning
train_texts = dataset["train"]["text"]

Downloading readme:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

Downloading data: 100%|███████████████████████████████████████████████████████████| 21.0M/21.0M [00:01<00:00, 17.6MB/s]
Downloading data: 100%|███████████████████████████████████████████████████████████| 20.5M/20.5M [00:00<00:00, 26.0MB/s]
Downloading data: 100%|███████████████████████████████████████████████████████████| 42.0M/42.0M [00:01<00:00, 34.9MB/s]


Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [3]:
#Tokenize the Dataset
from transformers import AutoTokenizer

# Load GPT-2 tokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Assign padding token
tokenizer.pad_token = tokenizer.eos_token  # Use the end-of-sequence token as the padding token

# Tokenize the text
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=128, padding="max_length")

# Apply tokenization to the dataset
tokenized_dataset = dataset["train"].map(tokenize_function, batched=True)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

In [4]:
#Prepare DataLoaders
from torch.utils.data import DataLoader

# Create DataLoader for the training set
train_dataloader = DataLoader(tokenized_dataset, batch_size=8, shuffle=True)

In [5]:
#Load GPT-2 and Define Training Loop
from transformers import GPT2LMHeadModel, AdamW
import torch

# Load the GPT-2 model
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

# Define optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]



In [6]:
model.save_pretrained("./pretrained_gpt2")
tokenizer.save_pretrained("./pretrained_gpt2")

('./pretrained_gpt2\\tokenizer_config.json',
 './pretrained_gpt2\\special_tokens_map.json',
 './pretrained_gpt2\\vocab.json',
 './pretrained_gpt2\\merges.txt',
 './pretrained_gpt2\\added_tokens.json',
 './pretrained_gpt2\\tokenizer.json')

In [7]:
#Fine-Tune GPT-2
from tqdm import tqdm

# Fine-tuning loop
model.train()
epochs = 3
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}")
    loop = tqdm(train_dataloader, leave=True)
    for batch in loop:
        # Move batch to GPU
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)

        # Forward pass
        outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
        loss = outputs.loss

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()
        optimizer.zero_grad()

        # Print loss
        loop.set_description(f"Epoch {epoch}")
        loop.set_postfix(loss=loss.item())

Epoch 1


Epoch 0: 100%|█████████████████████████████████████████████████████████| 3125/3125 [1:31:11<00:00,  1.75s/it, loss=3.4]


Epoch 2


Epoch 1: 100%|████████████████████████████████████████████████████████| 3125/3125 [2:09:39<00:00,  2.49s/it, loss=3.14]


Epoch 3


Epoch 2: 100%|████████████████████████████████████████████████████████| 3125/3125 [2:03:28<00:00,  2.37s/it, loss=3.28]


In [8]:
#Save the Fine-Tuned Model
model.save_pretrained("./finetuned_gpt2")
tokenizer.save_pretrained("./finetuned_gpt2")

('./finetuned_gpt2\\tokenizer_config.json',
 './finetuned_gpt2\\special_tokens_map.json',
 './finetuned_gpt2\\vocab.json',
 './finetuned_gpt2\\merges.txt',
 './finetuned_gpt2\\added_tokens.json',
 './finetuned_gpt2\\tokenizer.json')

In [36]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer
import nbimporter
from task_vectors import TaskVector
from safetensors.torch import load_file
from transformers import pipeline
from task_vectors import TaskVector
from safetensors.torch import load_file

# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Fine-tuned model (assume you have a fine-tuned model)
finetuned_model_path = "./finetuned_gpt2"  # Replace with your model path
finetuned_model = GPT2LMHeadModel.from_pretrained(finetuned_model_path)

def normalize_weights(param_diff):
    return param_diff / torch.norm(param_diff)
    
# Define TaskVector class
class TaskVector:
    def __init__(self, pretrained_checkpoint, finetuned_checkpoint):
        self.pretrained_model = GPT2LMHeadModel.from_pretrained(pretrained_checkpoint)
        self.finetuned_model = GPT2LMHeadModel.from_pretrained(finetuned_checkpoint)

    def __neg__(self):
        """Negate the task vector."""
        negated_vector = TaskVector.__new__(TaskVector)
        negated_vector.pretrained_model = self.pretrained_model
        negated_vector.finetuned_model = self.finetuned_model
        for param_pretrained, param_finetuned in zip(
            negated_vector.pretrained_model.parameters(), negated_vector.finetuned_model.parameters()
        ):
            param_finetuned.data = param_pretrained.data - param_finetuned.data
        return negated_vector

    def apply_to(self, base_model):
        """Applies the task vector to a base model's weights."""
        for param_base, param_pretrained, param_finetuned in zip(
            base_model.parameters(), self.pretrained_model.parameters(), self.finetuned_model.parameters()
        ):
            param_base.data += scaling_coef * normalize_weights(param_finetuned.data - param_pretrained.data)
        return base_model

# Initialize TaskVector
task_vector = TaskVector("gpt2", finetuned_model_path)

# Negate the Task Vector to adjust toward negative sentiment
neg_task_vector = -task_vector

# Sentence to transfer
input_sentence = "This is a simple sentence and not that long."

# Generate output using the fine-tuned model
def generate_with_model(model, sentence):
    inputs = tokenizer.encode(sentence, return_tensors="pt")
    output = model.generate(
    inputs,
    max_length=70,          # Limit output length
    temperature=0.7,         # Adjust temperature for randomness
    top_k=85,                # Limit sampling to top k candidates
    top_p=0.9,               # Nucleus sampling for diversity
    repetition_penalty=3.0,  # Apply repetition penalty
    num_return_sequences=1   # Only generate one sequence
    )
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text

# Generate output after applying task vector
base_model_copy = GPT2LMHeadModel.from_pretrained("gpt2")
scaling_coef = 0.8  # Adjust the scaling factor as needed
task_adjusted_model = neg_task_vector.apply_to(base_model_copy)

# Generate sentences
finetuned_output = generate_with_model(finetuned_model, input_sentence)
adjusted_output = generate_with_model(task_adjusted_model, input_sentence)

# Print input and outputs
print("Input Sentence:", input_sentence)
print("Output (Fine-Tuned Model):", finetuned_output)
print("Output (After Task Vector Adjustment):", adjusted_output)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input Sentence: This is a simple sentence and not that long.
Output (Fine-Tuned Model): This is a simple sentence and not that long. It's about the life of an American who has been in prison for over 20 years, but he doesn't know what to do with it.<br /><b />The film is based on real events from his childhood - which are very disturbing. The story is about how this man was raised by
Output (After Task Vector Adjustment): This is a simple sentence and not that long. It does have some grammar but it has enough grammatical structure to make sentences easy for comprehension of the whole thing without any further Grammar Structure
I am going on here because I want you understand how this works so well then if i dont know what im gonna do now just go ahead let me explain
