In [1]:
pip install transformers accelerate pandas matplotlib scikit-learn numpy ipywidgets

Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch, time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import manifold
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch.nn as nn

In [3]:
model_path = "ibm-granite/granite-3b-code-base"
device = "cuda:0"

### load a tokenizer associated with an LLM

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_path)

### tokenize some text

In [5]:
tokenizer.tokenize("the quick brown fox jumped over the lazy dog.")

['the',
 'Ġquick',
 'Ġbrown',
 'Ġf',
 'ox',
 'Ġjump',
 'ed',
 'Ġover',
 'Ġthe',
 'Ġlazy',
 'Ġdog',
 '.']

In [6]:
tokenizer.tokenize("""
def add(x, y):
    return x + y
                   """)

['Ċ',
 'def',
 'Ġadd',
 '(',
 'x',
 ',',
 'Ġy',
 '):',
 'ĊĠĠĠ',
 'Ġreturn',
 'Ġx',
 'Ġ+',
 'Ġy',
 'ĊĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠ']

In [7]:
tokenizer.tokenize("can you help me create a new car insurance policy quote for Roger?")

['can',
 'Ġyou',
 'Ġhelp',
 'Ġme',
 'Ġcreate',
 'Ġa',
 'Ġnew',
 'Ġcar',
 'Ġins',
 'urance',
 'Ġpolicy',
 'Ġquote',
 'Ġfor',
 'ĠR',
 'og',
 'er',
 '?']

In [8]:
tokenizer.tokenize("what is date of patient's salmonella diagnosis")

['what',
 'Ġis',
 'Ġdate',
 'Ġof',
 'Ġpatient',
 "'s",
 'Ġsal',
 'mon',
 'ella',
 'Ġdi',
 'agnosis']

### load a model

In [9]:
# device "meta" does not load weights
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="cuda:0",
    )
model.eval()

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.03it/s]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(49152, 2560, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2560, out_features=2560, bias=True)
          (k_proj): Linear(in_features=2560, out_features=2560, bias=True)
          (v_proj): Linear(in_features=2560, out_features=2560, bias=True)
          (o_proj): Linear(in_features=2560, out_features=2560, bias=True)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2560, out_features=10240, bias=True)
          (up_proj): Linear(in_features=2560, out_features=10240, bias=True)
          (down_proj): Linear(in_features=10240, out_features=2560, bias=True)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2560,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2560,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2560,), eps=1e-05)
    (

### extract the embedder from a model

In [10]:
t = list(model.children())
embed = t[0].embed_tokens
eshape = embed.weight.shape
(eshape[0], eshape[1])

(49152, 2560)

### tokenize the words in the vocabulary and get min and max value over all tokenized dimensions

In [11]:
vocabsize = eshape[0]
x = embed(torch.LongTensor(range(vocabsize)).to("cuda:0"))
(x.min(), x.max())

(tensor(-0.3008, device='cuda:0', grad_fn=<MinBackward1>),
 tensor(0.8750, device='cuda:0', grad_fn=<MaxBackward1>))