In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4"
CHECKPOINT_ROOT = "results"

In [2]:
import glob
import time

from accelerate import init_empty_weights
from accelerate import load_checkpoint_and_dispatch
from transformers.generation.streamers import BaseStreamer

from model.block_transformer import BlockTransformer
from model.utils import load_block_transformer_from_config, load_vanilla_model_from_config
from paths import PROJECT_ROOT
from util.config import load_config

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
def get_config_path(name):
    if ".yaml" not in name:
        name += ".yaml"
    return os.path.join(PROJECT_ROOT, "conf", "trainer", name)


def get_checkpoint_path(name):
    root = os.path.join(CHECKPOINT_ROOT, name)
    if not os.path.exists(root):
        raise ValueError(f"Checkpoint directory does not exist: {root}")
    pattern = os.path.join(CHECKPOINT_ROOT, name, "checkpoint-*")
    checkpoint_paths = glob.glob(pattern)
    def get_step(checkpoint_path):
        bs = os.path.basename(checkpoint_path)
        return int(bs.split("-")[1])
    checkpoint_paths = [(get_step(cp), cp) for cp in checkpoint_paths]
    checkpoint_paths.sort()
    checkpoint_path = checkpoint_paths[-1][1]
    checkpoint_path = os.path.join(checkpoint_path, "model.safetensors")
    print(f"Retrieving latest checkpoint path {checkpoint_path}")
    return checkpoint_path

In [4]:
def load_model(name, block=True)
    config = load_config(get_config_path(name))
    with init_empty_weights():
        if block:
            model, tokenizer = load_block_transformer_from_config(config)
        else:
            model = load_vanilla_model_from_config(config)
    checkpoint = get_checkpoint_path(name)
    device_map = "sequential"  # set to auto to use multiple GPUs + pipelining (not tested)
    model = load_checkpoint_and_dispatch(model, checkpoint=checkpoint, device_map=device_map)
    if block:
        return model, tokenizer
    else:
        return model

In [5]:
def set_temperature(model, temperature):
    if isinstance(model, BlockTransformer):
        model.token_decoder.generation_config.update(do_sample=True, temperature=temperature)
    else:
        model.generation_config.update(do_sample=True, temperature=temperature)

In [6]:
class FirstSampleStreamer(BaseStreamer):
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
        self.first = True
        
    def put(self, value):
        # ignore prompt
        if self.first:
            self.first = False
            return
        token = tokenizer.decode(value[-1]).replace("\n", "\\n")
        print(token, end="", flush=True)
    
    def end(self):
        pass

## Load models

In [7]:
vanilla = load_model("vanilla_410", block=False)
block, tokenizer = load_model("block_main_b4_1.2b", block=True)
# modify if vanilla tokenizer != block embedder tokenizer



------------------------------ Preprocess Config -------------------------------
Automatically determining batch size based on `total_batch_size`
total_batch_size              : 256 (given)
torch.cuda.device_count()     : 1
per_device_train_batch_size   : 8 (given)
gradient_accumulation_steps   : 32 (computed)
actual total batch size       : 256
Setting wandb_run_name    : vanilla_410
Setting output_dir        : vanilla_410
Using deepspeed config    : /home/itsnamgyu/block-transformer/ds_configs/default_linear_warmup.config
--------------------------------------------------------------------------------
Initializing model from scratch...
Retrieving latest checkpoint path /home/itsnamgyu/x/itsnamgyu/block_transformer_lg/checkpoints/vanilla_410/checkpoint-570000/model.safetensors
------------------------------ Preprocess Config -------------------------------
Automatically determining batch size based on `total_batch_size`
total_batch_size              : 512 (given)
torch.cuda.device_cou



Using custom config for block decoder...
   num_hidden_layers: 12
   hidden_size: 2048
   num_attention_heads: 16
   intermediate_size: 8192
Initializing embedder from scratch...
Using custom config for embedder...
   vocab_size: 50304
   hidden_size: 512
Initializing token decoder from scratch...
Using custom config for token decoder...
   num_hidden_layers: 12
   hidden_size: 2048
   num_attention_heads: 16
   intermediate_size: 8192
Retrieving latest checkpoint path /home/itsnamgyu/x/itsnamgyu/block_transformer_lg/checkpoints/block_main_b4_1.2b/checkpoint-285000/model.safetensors


## Prompt

In [20]:
prompt = """RMS Titanic was a British ocean liner that sank on 15 April 1912 as a result of striking an iceberg on her maiden voyage from Southampton, England to New York City, United States. Of the estimated 2,224 passengers and crew aboard, 1,496 died, making the incident one of the deadliest peacetime sinkings of a single ship.[4] Titanic, operated by the White Star Line, carried some of the wealthiest people in the world, as well as hundreds of emigrants from the British Isles, Scandinavia, and elsewhere in Europe who were seeking a new life in the United States and Canada. The disaster drew public attention, spurred major changes in maritime safety regulations, and inspired a lasting legacy in popular culture.

RMS Titanic was the largest ship afloat upon entering service and the second of three Olympic-class ocean liners built for the White Star Line. The ship was built by the Harland and Wolff shipbuilding company in Belfast. Thomas Andrews Jr., the chief naval architect of the shipyard, died in the disaster. Titanic was under the command of Captain Edward John Smith, who went down with the ship.

The first-class accommodation was designed to be the pinnacle of comfort and luxury. It included a gymnasium, swimming pool, smoking rooms, fine restaurants and cafes, a Victorian-style Turkish bath, and hundreds of opulent cabins. A high-powered radiotelegraph transmitter was available to send passenger "marconigrams" and for the ship's operational use. Titanic had advanced safety features, such as watertight compartments and remotely activated watertight doors, which contributed to the ship's reputation as "unsinkable".

Titanic was equipped with 16 lifeboat davits, each capable of lowering three lifeboats, for a total capacity of 48 boats. Despite this capacity, the ship was scantly equipped with a total of only 20 lifeboats. Fourteen of these were regular lifeboats, two were cutter lifeboats, and four were collapsible and proved difficult to launch while the ship was sinking. Together, the 20 lifeboats could hold 1,178 people — roughly half the number of passengers on board, and a third of the number the passengers the ship could have carried at full capacity (a number consistent with the maritime safety regulations of the era). The British Board of Trade's regulations required 14 lifeboats for a ship 10,000 tonnes. Titanic carried six more than required, allowing 338 extra people room in lifeboats. When the ship sank, the lifeboats that had been lowered were only filled up to an average of 60%.

## History
"""

In [26]:
batch_size = 32
prompt_length = len(tokenizer(prompt)["input_ids"])
print("Prompt length:", prompt_length)
print("Batch size:", batch_size)

Prompt length: 549
Batch size: 32


## Block Generation

In [32]:
set_temperature(block, temperature=1)
prompts = [prompt] * batch_size
inputs = tokenizer(prompts, return_tensors="pt")
inputs = {k: t.cuda() for k, t in inputs.items()}
print(" Start of generation ".center(80, "-"))
start = time.time()
output_ids = block.generate(**inputs, max_length=1024, print_first_sample=True, tokenizer=tokenizer)

duration = time.time() - start
n = output_ids.shape[-1]
g = n - prompt_length
print()
print(" End of generation ".center(80, "-"))
print(f"Prompt tokens per sample     : {prompt_length:>8}")
print(f"Generated tokens per sample  : {g:>8}")
print(f"Batch size                   : {batch_size:>8}")
print("-" * 80)
print(f"Tok/sec/sample               : {g/duration:>8.2f}")
print(f"Tok/sec                      : {g*batch_size/duration:>8.2f}")

----------------------------- Start of generation ------------------------------
\nTitanic was built to be a passenger liner, and that vision soon became reality. By March 1912, the ship was ready to sail. Two weeks later, in May, a severe gash on the stern caused her to crash and sink. She had to be towed to the repair yard's yard, and was not repaired during this time.\n\nThree days later, just a day short of the scheduled arrival of a crew, the Titanic went missing again. The ship was unable to resume her normal voyage. After three days of repairs, the ship was finally able to leave Boston, Massachusetts, on May 4. The trip was so slow that the Titanic had barely left dock when she began to list severely, and the passengers had to bathe on the ship. The captain of the Titanic became convinced that the ship would no longer be seaworthy. However, even before it was found-that her original repairs were inadequate-a rescue mission was launched. As Titanic approached, several of the crew

## Vanilla Generation

In [33]:
set_temperature(vanilla, temperature=1)
prompts = [prompt] * batch_size
streamer = FirstSampleStreamer(tokenizer)
inputs = tokenizer(prompts, return_tensors="pt")
inputs = {k: t.cuda() for k, t in inputs.items()}
print(" Start of generation ".center(80, "-"))
start = time.time()
output_ids = vanilla.generate(**inputs, max_length=1024, streamer=streamer)

duration = time.time() - start
n = output_ids.shape[-1]
g = n - prompt_length
print()
print(" End of generation ".center(80, "-"))
print(f"Prompt tokens per sample     : {prompt_length:>8}")
print(f"Generated tokens per sample  : {g:>8}")
print(f"Batch size                   : {batch_size:>8}")
print("-" * 80)
print(f"Tok/sec/sample               : {g/duration:>8.2f}")
print(f"Tok/sec                      : {g*batch_size/duration:>8.2f}")

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


----------------------------- Start of generation ------------------------------
\nRMS Titanic was a passenger ship that sank on 14 April 1912 on her maiden voyage from Southampton, England to New York City, United States. The Titanic collides with the Titania ship Chirichestan on 20 May 1912. The ship was an 85-year-old English manufacturer's wooden freighter named the Titanic, or Titanic.\n\n## The Ship\n\nThe ship was registered in the United States with the U.S. Maritime Administration as U.S. Nautical Sinking Fund Number C-23. According to this chartered service to take passengers to the United States from Southampton (Scotland), England, Canada, and Australia, and from New York (New York) in both a double-decker motor-taxi and a passenger passenger carriage.\n\nThis chartered service was a registered charter with a company registered in the United States and the U.S. National Shipbuilding Corporation. The ship was built in Belfast, Ireland by the Harland and Wolff shipbuilding co