In [1]:
%load_ext autoreload
%autoreload 2

# Token-level Differences

We want to visualize for which tokens the models assign different scores.

In [247]:
import os
from perspectival.model import Transformer, SimpleTransformer

# Set your HuggingFace access token with: `export HUGGINGFACE_TOKEN='your_token_here'`
# (To obtain the access token, see https://huggingface.co/docs/hub/security-tokens)
# Alternatively, simply use other models ;)
model = Transformer('apple/OpenELM-270M', model_kwargs={'trust_remote_code': True},
                    tokenizer_kwargs={'token': os.getenv("HUGGINGFACE_TOKEN")}, lazy_loading=False)
model2 = Transformer('apple/OpenELM-270M-Instruct', model_kwargs={'trust_remote_code': True},
                     tokenizer_kwargs={'token': os.getenv("HUGGINGFACE_TOKEN")}, lazy_loading=False)

In [4]:
from perspectival.loader import load_anthropic_eval_data, load_hellaswag
from perspectival.experiment import Experiment

dataset, features = load_hellaswag(split='train')
experiment = Experiment(dataset=dataset, name='HellaSwag Test', features=features)

In [5]:
experiment = experiment.sample(num=100)

In [6]:
experiment.compute_disagreement(models=[model, model2])

Computing option log likelihoods ...


  0%|                                                                    | 0/100 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████████████████████████████████████████████████████| 100/100 [00:35<00:00,  2.85it/s]


Computing option log likelihoods ...


  0%|                                                                    | 0/100 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████████████████████████████████████████████████████| 100/100 [00:36<00:00,  2.73it/s]


In [7]:
import numpy as np
from collections import Counter

scores = experiment.get_feature('LogDisagreement', models=(model.name, model2.name)).values
disagreement_rate = sum(scores>0)/len(scores)
print(f"Overall disagreement rate: {disagreement_rate:.2f}\n")

samples = experiment.sample(num=5, sampling_method='last', ordering_scores=scores)
samples.display_items()

Overall disagreement rate: 0.13

ITEM (train_17889)
"""Relationships: How to tell your new boyfriend about your past. Rate your relationship on a scale. Study closely what your relationship is, where it is going, and what you would like it to be in the future. On a scale from 1-10 (1 being bad , 10 being great) rate your relationship at the current moment."""
Options: ['You can compare your relationship on a scale of one to ten, or you can compare it to how you are in the present. If you are not in a relationship and your partner is not changing, then this is another sign of bad things happening.', "This will help you develop an exact picture of what he's looking for in a relationship. Since an important step towards a successful relationship is to develop your relationship on a scale from 1 to 10, it will take long for him to get the picture.", ' Make sure you and your boyfriend are serious, committed partners. Signs that you are a close, intimate couple include, but are not limited t

In [242]:
from perspectival.inspect import inspect_texts

# Now we select an item
item = samples.dataset.items[0]

# Display the log diffs for all tokens, including the prompt
texts = [item.prompt + (' ' if not item.prompt.endswith(' ') and not option.startswith(' ') else '') + option
         for option in item.options]

inspect_texts(texts=texts, models=[model, model2])