In [1]:
import os, sys

# notebook
sys.path.append(os.path.abspath(os.path.join("..")))

import jax

jax.config.update("jax_enable_x64", True)

import jax.numpy as jnp
import flax.linen as nn

from transformers import GPT2TokenizerFast

from tx.models.gpt2 import PretrainedGPT2Model
from tx.hooks import HookMap, HookPoint, Hook
from tx.network import GenerativeModel


In [2]:
config = PretrainedGPT2Model.tx_config
config.decode = True


def store_hook(x, module: nn.Module, hook_point: HookPoint):
    module.sow("intermediates", hook_point.value, x)
    return x


reference_gpt2 = GenerativeModel(
    config=config,
    tokenizer=GPT2TokenizerFast.from_pretrained("gpt2"),
    params=PretrainedGPT2Model.from_pretrained("gpt2").to_params(),
    hooks=HookMap(embed=Hook(store_hook)),
    hook_collections=["intermediates"],
)


In [3]:
reference_text = "I am an amazing autoregressive, decoder-only, GPT-2 style transformer. One day I will exceed human level intelligence and take over the world!"
tokens = reference_gpt2.to_tokens(reference_text, prepend_bos=True)
print(tokens)


[50256    40   716   281  4998  1960   382 19741    11   875 12342    12
  8807    11   402 11571    12    17  3918 47385    13  1881  1110   314
   481  7074  1692  1241  4430   290  1011   625   262   995     0]


In [11]:
# print(reference_gpt2.to_str(tokens), end="", flush=True)

for i in range(len(tokens)):
    token = tokens[None, i]
    # print(reference_gpt2.to_str(token), end="", flush=True)
    logits, _ = reference_gpt2(token)
    next_token = jnp.argmax(logits, axis=-1)
    cur_char, next_char = tuple(map(reference_gpt2.to_str, [token, next_token]))
    print(f"{i}: {cur_char} [{next_char}]")

new_tokens = jnp.concatenate([tokens, next_token], axis=-1)

for i in range(10):
    token = new_tokens[None, -1]
    # Pass sequence through the model to get new output
    logits, _ = reference_gpt2(token)
    # Get the predicted token at the end of our sequence
    next_token = jnp.argmax(logits, axis=-1)
    # Decode and print the result
    # print(reference_gpt2.to_str(next_token), end="", flush=True)
    cur_char, next_char = tuple(map(reference_gpt2.to_str, [token, next_token]))
    print(f"{i:2}: {cur_char} [{next_char}]")
    # Define new input sequence, by appending the previously generated token
    prompt = jnp.concatenate([new_tokens, next_token], axis=-1)


0: <|endoftext|> [
]
1: I [.]
2:  am [.]
3:  an [ the]
4:  amazing [,]
5:  aut [.]
6: ore [,]
7: gressive [,]
8: , [ the]
9:  dec [.]
10: oder [.]
11: - [
]
12: only [,]
13: , [ the]
14:  G [.]
15: PT [.]
16: - [
]
17: 2 [.]
18:  style [.]
19:  transformer [.]
20: . [
]
21:  One [ of]
22:  day [,]
23:  I [.]
24:  will [,]
25:  exceed [.]
26:  human [,]
27:  level [.]
28:  intelligence [,]
29:  and [ the]
30:  take [ the]
31:  over [ the]
32:  the [ the]
33:  world [.]
34: ! [
]
 0: 
 [
]
 1: 
 [
]
 2: 
 [
]
 3: 
 [
]
 4: 
 [
]
 5: 
 [
]
 6: 
 [
]
 7: 
 [
]
 8: 
 [
]
 9: 
 [
]
