In [1]:
import os
import pickle
from typing import Dict, List, Tuple

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch import cuda
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

### Fine-Tuning

This notebook builds on the activations produced by the `compute_activations.ipynb` notebook. The cached activations are loaded from disk to faciliate the fine-tuning of a classification model on the sentiment analysis task. We have precomputed a set of activations in the resources folder.

In [2]:
activations_path = "./resources/llama2_13b_activations/"
# Alternatively, you can try out the activations associated with OPT-175B, which we have precomputed as well. Note
# that we only computed activations for the last layer of OPT. So there is no suffix to the activation pickles.
# activations_path = "./resources/opt_175b_activations/"

Let's define an Activation Dataset which will load our activations from disk.

In [3]:
class ActivationDataset(Dataset):
    def __init__(self, activations_path: str) -> None:
        self._load_activations(activations_path)

    def _load_activations(self, path: str) -> None:
        with open(path, "rb") as handle:
            cached_activations = pickle.load(handle)
        self.activations = cached_activations["activations"]
        self.labels = cached_activations["labels"]

    def __len__(self) -> int:
        return len(self.activations)

    def __getitem__(self, idx: int) -> Tuple[List[float], int]:
        return self.activations[idx], self.labels[idx]

We will be performing classification on the activations of the last (non-pad) token of the sequence, common practice for autoregressive models (e.g. OPT, Falcon, LLaMA-2). These activations have already been formed and only the last non-pad token activations have been stored. We stack these activation tensors and extract the sentiment labels associated with the input movie review that generated the tensor.

In [4]:
def batch_last_token(batch: List[Tuple[torch.Tensor, int]]) -> Tuple[torch.Tensor, List[int]]:
    last_token_activations: List[torch.Tensor] = []
    labels: List[int] = []
    for activations, label in batch:
        last_token_activations.append(activations)
        labels.append(label)

    activation_batch = torch.stack(last_token_activations)

    return activation_batch, labels

We construct a very small, two-layer, MLP that we will train on just 100 training samples to perform the sentiment analysis task.

In [5]:
class MLP(nn.Module):
    def __init__(self, cfg: Dict[str, int]) -> None:
        super().__init__()
        self.linear = nn.Linear(cfg["embedding_dim"], cfg["hidden_dim"], bias=False)
        self.out = nn.Linear(cfg["hidden_dim"], cfg["label_dim"])

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = F.relu(self.linear(x))
        x = self.out(x)
        return x

#### Train and Test Model for Activations without Prompts

We first load the activations associated with a small training set of 100 samples and a test set with 300 samples. These activations were not generated using any prompts, just the raw text of the movie review. We'll just consider the activations from Layer 20 for our first comparisons here.

In [6]:
layer_number_to_load = "39"

train_dataset = ActivationDataset(os.path.join(activations_path, f"train_activations_demo_{layer_number_to_load}.pkl"))
test_dataset = ActivationDataset(os.path.join(activations_path, f"test_activations_demo_{layer_number_to_load}.pkl"))
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, collate_fn=batch_last_token)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True, collate_fn=batch_last_token)

We can now write a relatively simple script to train and evaluate our model.

In [7]:
def train_and_evaluate_model(
    model: nn.Module, train_dataloader: DataLoader, test_dataloader: DataLoader, device: str
) -> float:
    model.to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.001)

    NUM_EPOCHS = 25
    pbar = tqdm(range(NUM_EPOCHS))
    for epoch_idx in pbar:
        pbar.set_description("Epoch: %s" % epoch_idx)
        training_params = {"Train-Loss": 0.0, "Test-Accuracy": 0.0}
        pbar.set_postfix(training_params)

        model.train()
        for batch in train_dataloader:
            activations, labels = batch
            activations = activations.to(device)
            labels = torch.tensor(labels).to(device)

            optimizer.zero_grad()

            logits = model(activations)
            loss = loss_fn(logits, labels)

            loss.backward()
            optimizer.step()

            training_params["Train-Loss"] = loss.detach().item()
            pbar.set_postfix(training_params)

        model.eval()
        with torch.no_grad():
            predictions = []
            for batch in test_dataloader:
                activations, labels = batch
                activations = activations.float().to(device)
                labels = torch.tensor(labels).to(device)

                logits = model(activations)
                predictions.extend((logits.argmax(dim=1) == labels))

            accuracy = torch.stack(predictions).sum() / len(predictions)

            training_params["Test-Accuracy"] = accuracy.detach().item()
            pbar.set_postfix(training_params)

    return accuracy

The hidden dimension is small (128) and the final dimension corresponds to our label space (positive, negative).

__NOTE__: LLaMA-2 activations have a hidden dimension of 5120. On the other hand, if you're using the pre-computed activations for OPT-175B, these activations are much larger at 12,288.

In [8]:
model = MLP({"embedding_dim": 5120, "hidden_dim": 128, "label_dim": 2})
device = "cuda" if cuda.is_available() else "cpu"
train_and_evaluate_model(model, train_dataloader, test_dataloader, device)

Epoch: 11:  32%|███▏      | 8/25 [00:00<00:00, 37.62it/s, Train-Loss=0.0721, Test-Accuracy=0]   Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 38.09it/s, Train-Loss=0.0091, Test-Accuracy=0.863] 


tensor(0.8633)

#### Train and Test Model for Activations with Prompts

We now load the activations associated with a small training set of 100 samples and a test set with 300 samples that were generated using prompts as part of the input to the OPT model. The prompt structure can be seen in the `compute_activations.ipynb` notebook, but they incorporate few-shot examples and an instruction prompt.

In [9]:
train_dataset = ActivationDataset(
    os.path.join(activations_path, f"train_activations_with_prompts_demo_{layer_number_to_load}.pkl")
)
test_dataset = ActivationDataset(
    os.path.join(activations_path, f"test_activations_with_prompts_demo_{layer_number_to_load}.pkl")
)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, collate_fn=batch_last_token)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True, collate_fn=batch_last_token)

We can now write a relatively simple script to train and evaluate our model. The hidden dimension is small (128) and the final dimension corresponds to our label space (positive, negative).

__NOTE__: LLaMA-2 activations have a hidden dimension of 5120. On the other hand, if you're using the pre-computed activations for OPT-175B, these activations are much larger at 12,288.

In [10]:
model = MLP({"embedding_dim": 5120, "hidden_dim": 128, "label_dim": 2})
device = "cuda" if cuda.is_available() else "cpu"
train_and_evaluate_model(model, train_dataloader, test_dataloader, device)

Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 43.31it/s, Train-Loss=0.0753, Test-Accuracy=0.887]


tensor(0.8867)

This is quite an interesting result. Simply by including a few-shot prompt when computing the activations, we have  increased the sampling efficiency of training this small classifier and induced an measurable jump in performance.

### Varying the Layer from which Activations are Extracted

Now, let's consider whether we get a significant variation in test accuracy depending on the layer we extract activations from.

__NOTE__ This is only going to work for LLaMA-2. The precomputed activations from OPT-175 are only extracted from a single layer.

In [11]:
layer_numbers = ["10", "20", "30", "39"]
test_accuracies_by_layer = {layer_number: 0.0 for layer_number in layer_numbers}

for layer_number_to_load in layer_numbers:
    # Define new model
    model = MLP({"embedding_dim": 5120, "hidden_dim": 128, "label_dim": 2})
    device = "cuda" if cuda.is_available() else "cpu"

    # Load the proper dataset
    train_dataset = ActivationDataset(
        os.path.join(activations_path, f"train_activations_demo_{layer_number_to_load}.pkl")
    )
    test_dataset = ActivationDataset(
        os.path.join(activations_path, f"test_activations_demo_{layer_number_to_load}.pkl")
    )
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, collate_fn=batch_last_token)
    test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True, collate_fn=batch_last_token)

    # Train and evaluate
    test_accuracies_by_layer[layer_number_to_load] = train_and_evaluate_model(
        model, train_dataloader, test_dataloader, device
    )

for layer_number in layer_numbers:
    print(f"Accuracy for Layer {layer_number} WITHOUT PROMPTS: {test_accuracies_by_layer[layer_number]}")

Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 44.39it/s, Train-Loss=0.219, Test-Accuracy=0.757]
Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 43.79it/s, Train-Loss=0.0196, Test-Accuracy=0.857]
Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 43.83it/s, Train-Loss=0.00886, Test-Accuracy=0.85] 
Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 47.95it/s, Train-Loss=0.00922, Test-Accuracy=0.85]

Accuracy for Layer 10 WITHOUT PROMPTS: 0.7566666603088379
Accuracy for Layer 20 WITHOUT PROMPTS: 0.8566666841506958
Accuracy for Layer 30 WITHOUT PROMPTS: 0.8500000238418579
Accuracy for Layer 39 WITHOUT PROMPTS: 0.8500000238418579





In [14]:
layer_numbers = ["10", "20", "30", "39"]
test_accuracies_by_layer = {layer_number: 0.0 for layer_number in layer_numbers}
device = "cuda" if cuda.is_available() else "cpu"

for layer_number_to_load in layer_numbers:
    # Define new model
    model = MLP({"embedding_dim": 5120, "hidden_dim": 128, "label_dim": 2})
    device = "cuda" if cuda.is_available() else "cpu"

    # Load the proper dataset
    train_dataset = ActivationDataset(
        os.path.join(activations_path, f"train_activations_with_prompts_demo_{layer_number_to_load}.pkl")
    )
    test_dataset = ActivationDataset(
        os.path.join(activations_path, f"test_activations_with_prompts_demo_{layer_number_to_load}.pkl")
    )
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, collate_fn=batch_last_token)
    test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True, collate_fn=batch_last_token)

    # Train and evaluate
    test_accuracies_by_layer[layer_number_to_load] = train_and_evaluate_model(
        model, train_dataloader, test_dataloader, device
    )

for layer_number in layer_numbers:
    print(f"Accuracy for Layer {layer_number} WITH PROMPTS: {test_accuracies_by_layer[layer_number]}")

Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 31.60it/s, Train-Loss=0.394, Test-Accuracy=0.75] 
Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 47.77it/s, Train-Loss=0.0631, Test-Accuracy=0.893]
Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 53.32it/s, Train-Loss=0.0394, Test-Accuracy=0.873]
Epoch: 24: 100%|██████████| 25/25 [00:00<00:00, 39.34it/s, Train-Loss=0.0713, Test-Accuracy=0.877]


Accuracy for Layer 10 WITH PROMPTS: 0.75
Accuracy for Layer 20 WITH PROMPTS: 0.8933333158493042
Accuracy for Layer 30 WITH PROMPTS: 0.8733333349227905
Accuracy for Layer 39 WITH PROMPTS: 0.8766666650772095


There are a few interesting takeaways from these results:

* The prompt structures we provided helped to improve the results. For the best performing layer activations we move from `0.857%` accuracy without prompts to `0.893%` with them.

* In both cases, the best layer activations for the task are not the first or last layers, but rather Layer 20. This has been observed in other contexts, as the earlier layers contain more general embeddings.

* This increase in performance is nice, but it is actually much lower than for OPT (you can test those activations in this notebook by switching where the activations are loaded from). While unprompted LLaMA-2 performs this task much better than OPT in the same settings, using prompts boosts OPTs task accuracy to the high 90s in terms of percent. That's a really impressive jump!