In [1]:
import os
import time
import importlib
import transformers
from datasets import load_dataset, load_from_disk
import torch
import re

import rpbuild as rp
import rpbuild.data
import rpbuild.generation

# Trigger dynamic reload of module -- for editing without restarting the kernel
importlib.reload(rp.generation)

<module 'rpbuild.generation' from '/home/dinalt/rust/ai_development/roleplay_build/rpbuild/generation.py'>

### Load Resources
Load dataset and model for testing...

In [2]:
from transformers import BitsAndBytesConfig

# The location of the input dataset
dataset_id = "dinalt/roleplay_build"

# Load dataset
dataset = load_dataset(dataset_id)["train"]
print(dataset)

# Where are models stored?
models_dir = "/home/dinalt/ai_assets/models"

# Configure a model to use.
# The name of this model -- which will live in models_dir
#model_name = "fhai50032_RolePlayLake-7B" # AKA "fhai50032/RolePlayLake-7B"
#model_id = os.path.join(models_dir, model_name)

# Or... or load it from the hub / cache
model_id = "fhai50032/RolePlayLake-7B"

# Device to run model on
device = None

# Load model with quantization
# Quantization is enabled by default, for those with low GPU memory.
# If you have enough memory, disabled it. It's faster and produces better output.

# See link for configuration options alternatives
# https://huggingface.co/docs/transformers/main/en/quantization

# Load model and tokenizer
causal_lm = rp.CausalLM(
    model_id,
    device=device,
    torch_dtype=torch.bfloat16,
    attn_implementation="flash_attention_2",
    #device_map="auto",
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
    )
)

Dataset({
    features: ['pairing_reason', 'plist', 'director_log', 'scenario', 'proxy', 'example_dialog', 'conversation', 'char_name', 'description', 'summary', 'preset', 'greeting'],
    num_rows: 2770
})
No chat template specified. Attempting to use tokenizer default
No default found. Using library default.
No instruct template specified. Using default.
Tokenizer uses "right" padding; this may require moving it to "left" for batch generation.


`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=2)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralFlashAttention2(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNor

## Main Test Harness
This is a mock for the automation pytorch script interface.

There is an output class, in this case, the "OutputChars" object, which merely makes a list of the output dictionaries for inspection.

An actual generator script would change the implementation to write the generated data to disk.

In [3]:
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import SequentialSampler
import time

sampler = SequentialSampler(dataset)

# Mock output class
# This just appends the output items to a list.
# See class rp.CharacterWriter for a json implementation
class OutputChars():
    def __init__(self):
        self.chars = []

    def __call__(self, local_rank, global_step, output_records):
        self.chars += output_records

    def exists(self, local_rank, global_step):
        return False
    
    def file_path(self, local_rank, global_step):
        return "null"

output_chars = OutputChars()

transformers.set_seed(44) # 44

# Test inference engine.
start = time.perf_counter()
rp.generation.infer(
    # Fake local rank
    local_rank=0,
    dataset=dataset.shuffle(),
    sampler=sampler,
    generator=rp.generation.generate_dialog,
    # Dialog generator does not presently support batch inference for size other that 1
    batch_size=1,
    output_fn=output_chars,

    # Steps before calling output_fn
    output_steps=8,

    # Limit steps, for testing.
    max_steps=2,
    generator_kwargs = dict(
        causal_lm=causal_lm,
        dataset=dataset,
        max_tokens=2000,
    )
)
end = time.perf_counter()
print(f"Elapsed {(end-start):.03f} secs.")

  0%|                                                                                                                                                                                                                                                     | 0/2 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2228 > 255). Running this sequence through the model will result in indexing errors
 50%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                      | 1/2 [01:03<01:03, 63.41s/it]

Elapsed 63.419 secs.





### Dump Output From Mock

Dump the list of generated dictionaries producted by the mock.

In [4]:
# Dump characters from output
def dump_characters(characters):
    for i, character in enumerate(characters):
        print(f"\n{i:-^80}")
        rp.data.dump_character_data(character)
        

dump_characters(output_chars.chars)


---------------------------------------0----------------------------------------

char_name: Dr. Kael Mendoza

summary: A brilliant and compassionate ambassador from Earth, Dr. Kael Mendoza specializes in intergalactic medicine and cultural exchange. His dedication to fostering understanding between different species has earned him respect and admiration throughout the galaxy.

preset: Yara

pairing_reason: The combination of Dr. Kael Mendoza's passion for exploring new worlds and Luminous Lucy's magical ability to spread joy and light throughout the darkness creates a compelling story filled with adventure, wonder, and personal growth.

Name: Dr. Kael Mendoza
Humanoid alien, resembling a tall and lean human with elongated fingers and large, expressive eyes in shades of violet.
Skin: Olive green, slightly textured with tiny scales.
Hair: Short, jet black curls that are kept neatly groomed.
Height: 6'3'' (196 cm)
Weight: 187 lbs (85 kg)
Age: Early 40s

Background: Hailing from the plan

In [None]:
# Dump the selected character indices
def dump_selected_characters(characters, selected):
    for i in selected:
        character = characters[i]
        print(f"\n{i:-^80}")
        rp.data.dump_character_data(character)
        

dump_selected_characters(output_chars.chars, [4,5])