In [6]:
import torch
import pickle
import random


from datasets import load_dataset, Dataset


from typing import cast
from numbers import Number


from transformers import AutoTokenizer
from delphi.constants import STATIC_ASSETS_DIR
from delphi.eval.token_positions import get_all_tok_metrics_in_label

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
tokenizer = AutoTokenizer.from_pretrained("delphi-suite/stories-tokenizer")
token_ids = cast(
    Dataset,
    load_dataset(
        "delphi-suite/v0-tinystories-v2-clean-tokenized", split="validation[:10]"
    ),
)
next_logprobs_100k = cast(
    Dataset,
    load_dataset("delphi-suite/v0-next-logprobs-llama2-100k", split="validation[:10]"),
).with_format("torch")
next_logprobs_400k = cast(
    Dataset,
    load_dataset("delphi-suite/v0-next-logprobs-llama2-400k", split="validation[:10]"),
).with_format("torch")
token_map = cast(Dataset, load_dataset("delphi-suite/v0-token-map", split="validation"))

token_labels_filename = "labelled_token_ids_dict.pkl"
with open(f"{STATIC_ASSETS_DIR.joinpath(token_labels_filename)}", "rb") as f:
    token_labels = pickle.load(f)

In [4]:
diff = next_logprobs_400k["logprobs"] - next_logprobs_100k["logprobs"]  # type: ignore
pos_map = get_all_tok_metrics_in_label(
    token_ids, token_labels, diff, "Is Noun", 0.3, 0.6
)

In [35]:
# TODO: visualization function that turns dict[tuple[int, int], Number] an interactive html display
def vis_pos_map(
    pos_map: dict[tuple[int, int], Number],
    token_ids: Dataset,
    token_labels: dict[tuple[int, int], str],
    sample: int = 3,
):
    # choose n random keys from pos_map
    keys = random.sample(list(pos_map.keys()), k=sample)

    for key in keys:
        prompt, pos = key
        print(f"Position pair: {key}")
        print(f"Prompt: {tokenizer.decode(token_ids['tokens'][prompt][:pos + 1])}")
        print(f"Token: {tokenizer.decode(token_ids['tokens'][prompt][pos])}")
        print(f"Value: {pos_map[key]}")
        print()

In [36]:
vis_pos_map(pos_map, token_ids, token_labels, 3)

Position pair: (2, 180)
Prompt: on her bed. But then, something unexpected happened. A big wind came through her window and blew all the pretty things away!
Lily was sad, but she did not give up. She decided to make her own beautiful things to decorate her room. She made paper flowers and painted pretty pictures. In the end, her room was more beautiful than before, and she reached her goal.<s> Once upon a time, there was a soldier named Tom. He lived in a small house with his friend, a cat named Max. They were very close and liked to play together.
One day, Tom wanted to buy an expensive toy. Max did not like the idea. Max said, "No, Tom. We don't need the expensive toy. Let's play with our old toys." Tom and Max started to disagree. They both talked about the toy for a long
Token: long
Value: 0.27616387605667114

Position pair: (3, 152)
Prompt: 's share the tree. You can live on one side, and I can live on the other side." The little bird agreed, and they both raised their families in