In [1]:
import time
import torch

from modeling_dream.modeling_dream import DreamModel
from modeling_dream.tokenization_dream import DreamTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def select_device():
    if torch.cuda.is_available():
        return "cuda"
    mps_backend = getattr(torch.backends, "mps", None)
    if mps_backend is not None and mps_backend.is_available():
        return "mps"
    return "cpu"
# end

# --- Model Loading ---
model_path = "Dream-org/Dream-v0-Instruct-7B"
device = select_device()
dtype_by_device = {
    "cuda": torch.bfloat16,
    "mps": torch.float16,
    "cpu": torch.float32,
}
dtype = dtype_by_device[device]
print(f"Using device: {device} (dtype={dtype})")


Using device: cuda (dtype=torch.bfloat16)


In [3]:
tokenizer = DreamTokenizer.from_pretrained(
    model_path,
    trust_remote_code=True,
    padding_side='left'
)

model = DreamModel.from_pretrained(
    model_path,
    torch_dtype=dtype,
    trust_remote_code=True
)

# set left padding
model = model.to(device).eval()

messages = [[
    {"role": "user", "content": "Write a story that ends with 'Finally, Joey and Rachel get married.'"}
],
[
    {"role": "user", "content": "Janet's ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"}
]]
# set padding=True
inputs = tokenizer.apply_chat_template(
    messages, return_tensors="pt", return_dict=True, add_generation_prompt=True, padding=True
)
input_ids = inputs.input_ids.to(device)
attention_mask = inputs.attention_mask.to(device)

output = model.diffusion_generate(
    input_ids,
    attention_mask=attention_mask,
    max_new_tokens=256,
    output_history=True,
    return_dict_in_generate=True,
    steps=256,
    temperature=0.2,
    top_p=0.95,
    alg="entropy",
    alg_temp=0.,
)
generations = [
    tokenizer.decode(g[len(p) :].tolist())
    for p, g in zip(input_ids, output.sequences)
]

print(generations[0].split(tokenizer.eos_token)[0])
print("------")
print(generations[1].split(tokenizer.eos_token)[0])


The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00,  8.55it/s]


Once upon a time, there was a man named Joey who lived in a small town. He was a kind and loving person who had a lot of friends. One day, Joey met a beautiful woman named Rachel. They fell in love and decided to get married. However, there was a problem. Joey's parents didn't like Rachel, and they didn't want their son to marry her. Joey was heartbroken, but he didn't give up. He tried to convince his parents that Rachel was the one for him, but they wouldn't listen. Joey and Rachel decided to move in together, and they worked hard to make it work. They had arguments and moments of joy, but they didn't want to give up on each other. One day, Joey's parents came to visit, and they met Rachel. They were surprised by how wonderful she was, and they decided to support their son's decision. Finally, Joey and Rachel got married, and they lived happily ever after.
------
Janet's ducks lay 16 eggs per day.
She eats 3 eggs for breakfast and bakes 4 eggs for muffins, so she uses a total of 3 + 

In [5]:
from jinyu_utils.jinyu_inspect import jinyu_inspect_file
jinyu_inspect_file(model.__class__)

start to inspect: /home/exx/.cache/huggingface/modules/transformers_modules/Dream-org/Dream-v0-Instruct-7B/05334cb9faaf763692dcf9d8737c642be2b2a6ae/modeling_dream.py




In [6]:
type(model)

transformers_modules.Dream-org.Dream-v0-Instruct-7B.05334cb9faaf763692dcf9d8737c642be2b2a6ae.modeling_dream.DreamModel