In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

sys.path.append("../")

import logging
from src.utils import logging_utils
from src import functional

logger = logging.getLogger(__name__)

logging.basicConfig(
    level=logging.DEBUG,
    format=logging_utils.DEFAULT_FORMAT,
    datefmt=logging_utils.DEFAULT_DATEFMT,
    stream=sys.stdout,
)

import torch
import transformers

logger.info(f"{torch.__version__=}, {torch.version.cuda=}")
logger.info(
    f"{torch.cuda.is_available()=}, {torch.cuda.device_count()=}, {torch.cuda.get_device_name()=}"
)
logger.info(f"{transformers.__version__=}")

  from .autonotebook import tqdm as notebook_tqdm


2024-06-13 15:55:55 __main__ INFO     torch.__version__='2.3.1', torch.version.cuda='12.1'
2024-06-13 15:55:55 __main__ INFO     torch.cuda.is_available()=True, torch.cuda.device_count()=1, torch.cuda.get_device_name()='NVIDIA RTX A6000'
2024-06-13 15:55:55 __main__ INFO     transformers.__version__='4.41.2'


In [3]:
from src.models import ModelandTokenizer

mt = ModelandTokenizer(
    model_key="meta-llama/Meta-Llama-3-8B",
    # model_key = "gpt2",
    torch_dtype=torch.float16,
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


2024-06-13 15:55:56 accelerate.utils.modeling INFO     We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.10s/it]

2024-06-13 15:56:00 src.models INFO     loaded model </home/local_arnab/Codes/saved_model_weights/meta-llama/Meta-Llama-3-8B> | size: 15316.516 MB | dtype: torch.float16 | device: cuda:0





In [4]:
def get_module_nnsight(model, layer_name):
    layer = model
    for name in layer_name.split("."):
        layer = layer[int(name)] if name.isdigit() else getattr(layer, name)
    return layer

In [5]:
prompt = "The Space Needle is located in the city of"
layer_name = mt.layer_names[-1]

with mt.trace() as tracer:
    with tracer.invoke(prompt) as invoker:
        layer = get_module_nnsight(mt, layer_name)
        hidden_states = functional.untuple(layer.output).save()
        logits = mt.output.logits.save()

functional.interpret_logits(
    logits=logits[:, -1].squeeze(),
    tokenizer=mt.tokenizer,
    get_proba=True,
)

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


[(' Seattle', 0.969),
 ('\xa0', 0.004),
 (' Seat', 0.003),
 (' se', 0.003),
 (' Se', 0.002),
 (' the', 0.001),
 (' Bellev', 0.001),
 ('\n', 0.001),
 (' Washington', 0.001),
 (' Sea', 0.0)]

In [6]:
functional.logit_lens(
    h=hidden_states[:, -1],
    mt=mt,
    get_proba=True,
)

[(' Seattle', 0.969),
 ('\xa0', 0.004),
 (' Seat', 0.003),
 (' se', 0.003),
 (' Se', 0.002),
 (' the', 0.001),
 (' Bellev', 0.001),
 ('\n', 0.001),
 (' Washington', 0.001),
 (' Sea', 0.0)]

In [7]:
from src.models import prepare_input

# prompt = "Assume an alternate universe where Eiffel Tower is one of the tourist attractions in the capital of Italy. In that universe the Eiffel Tower is located in the city of"
prompt = "Assume an alternate universe where Eiffel Tower is one of the tourist attractions in the capital of Italy. In that universe the tall iron structure in the city of Rome is called"
# prompt = "The capital of France is Oslo. The capital of France is"

inputs = prepare_input(prompts=prompt, tokenizer=mt, add_bos_token=False)

In [8]:
generation = mt._model.generate(**inputs, max_new_tokens=30, top_k=1)

mt.tokenizer.decode(generation[0], skip_special_tokens=False)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


'<|begin_of_text|>Assume an alternate universe where Eiffel Tower is one of the tourist attractions in the capital of Italy. In that universe the tall iron structure in the city of Rome is called the Eiffel Tower. The Eiffel Tower is a 324-metre tall iron lattice tower located on the Champ de Mars in Paris,'