In [1]:
from omegaconf import OmegaConf, DictConfig
import torch
from torchtune.utils.profiling_utils import (
    _DEFAULT_PROFILER_ACTIVITIES,
    _DEFAULT_SCHEDULE_CFG,
    _DEFAULT_PROFILER_OPTS,
    _ExperimentalConfig,
)
from torchtune import config
from tests.recipes.utils import dummy_alpaca_dataset_config
import os

config_file = "./custom_configs/7B_lora.yaml"
fixtures_dir = "tests/assets"

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
config.instantiate(
    OmegaConf.create(
        {"_component_": "torch.profiler.profile", **_DEFAULT_PROFILER_OPTS}
    )
)

<torch.profiler.profiler.profile at 0x7f2d620a00d0>

In [50]:
alpaca_cfg = OmegaConf.from_dotlist(dummy_alpaca_dataset_config())

In [56]:
from torchtune.modules.tokenizers import SentencePieceTokenizer

In [59]:
tokenizer = SentencePieceTokenizer(os.path.join(fixtures_dir, "m.model"))

In [60]:
ds = config.instantiate(alpaca_cfg.dataset, tokenizer=tokenizer)

Generating train split: 100 examples [00:00, 1663.98 examples/s]


In [72]:
import tests.recipes.utils as test_utils
from tests.recipes.utils import MODEL_TEST_CONFIGS

In [77]:
llama2_qlora_cfg = MODEL_TEST_CONFIGS["llama2_qlora"]
llama2_qlora_cfg

['model._component_=torchtune.models.llama2.lora_llama2',
 "model.lora_attn_modules=['q_proj', 'k_proj', 'v_proj', 'output_proj']",
 'model.apply_lora_to_mlp=True',
 'model.apply_lora_to_output=False',
 'model.vocab_size=32000',
 'model.num_layers=4',
 'model.num_heads=16',
 'model.embed_dim=256',
 'model.max_seq_len=2048',
 'model.norm_eps=1e-5',
 'model.num_kv_heads=8',
 'model.lora_rank=8',
 'model.lora_alpha=16',
 'model.lora_dropout=0.0',
 'model.quantize_base=True']

In [78]:
model_cfg = OmegaConf.from_dotlist(llama2_qlora_cfg)
model_cfg

{'model': {'_component_': 'torchtune.models.llama2.lora_llama2', 'lora_attn_modules': ['q_proj', 'k_proj', 'v_proj', 'output_proj'], 'apply_lora_to_mlp': True, 'apply_lora_to_output': False, 'vocab_size': 32000, 'num_layers': 4, 'num_heads': 16, 'embed_dim': 256, 'max_seq_len': 2048, 'norm_eps': 1e-05, 'num_kv_heads': 8, 'lora_rank': 8, 'lora_alpha': 16, 'lora_dropout': 0.0, 'quantize_base': True}}

In [140]:
llama2_qlora = config.instantiate(model_cfg.model)
torch.save(llama2_qlora.state_dict(), "./llama2_qlora.pt")

In [141]:
m2 = torch.load("./llama2_qlora.pt")

In [143]:
m2.keys()

odict_keys(['tok_embeddings.weight', 'layers.0.sa_norm.scale', 'layers.0.attn.q_proj.weight', 'layers.0.attn.q_proj.lora_a.weight', 'layers.0.attn.q_proj.lora_b.weight', 'layers.0.attn.k_proj.weight', 'layers.0.attn.k_proj.lora_a.weight', 'layers.0.attn.k_proj.lora_b.weight', 'layers.0.attn.v_proj.weight', 'layers.0.attn.v_proj.lora_a.weight', 'layers.0.attn.v_proj.lora_b.weight', 'layers.0.attn.output_proj.weight', 'layers.0.attn.output_proj.lora_a.weight', 'layers.0.attn.output_proj.lora_b.weight', 'layers.0.mlp_norm.scale', 'layers.0.mlp.w1.weight', 'layers.0.mlp.w1.lora_a.weight', 'layers.0.mlp.w1.lora_b.weight', 'layers.0.mlp.w2.weight', 'layers.0.mlp.w2.lora_a.weight', 'layers.0.mlp.w2.lora_b.weight', 'layers.0.mlp.w3.weight', 'layers.0.mlp.w3.lora_a.weight', 'layers.0.mlp.w3.lora_b.weight', 'layers.1.sa_norm.scale', 'layers.1.attn.q_proj.weight', 'layers.1.attn.q_proj.lora_a.weight', 'layers.1.attn.q_proj.lora_b.weight', 'layers.1.attn.k_proj.weight', 'layers.1.attn.k_proj.lora_

In [86]:
sum(p.numel() for p in llama2_qlora.parameters()) / 1e6
llama2_qlora.output.weight.dtype

torch.float32

In [34]:
e1 = _ExperimentalConfig(verbose=True)
e2 = _ExperimentalConfig(verbose=True)

In [43]:
import sys

sys.getsizeof(e1), sys.getsizeof(e2), sys.getsizeof(_ExperimentalConfig())

(56, 56, 56)

In [30]:
config.instantiate(OmegaConf.create(DEFAULT_SCHEDULE_CFG))

<function torch.profiler.profiler.schedule.<locals>.schedule_fn(step: int) -> torch.profiler.profiler.ProfilerAction>

In [32]:
DictConfig(DEFAULT_SCHEDULE_CFG)

{'_component_': 'torch.profiler.schedule', 'wait': 10, 'warmup': 5, 'active': 3, 'repeat': 1}

In [3]:
test_config = """
profile:
  enabled: True
  CPU: True
  CUDA: True
  #output_dir: ${artifact_dir}/profiling
  #torch.profiler.profile
  profiler:
    _component_: torch.profiler.profile
    profile_memory: False
    with_stack: True
    record_shapes: False
    with_flops: True
  #torch.profiler.schedule
  schedule:
    _component_: torch.profiler.schedule
    wait: 3
    warmup: 1
    active: 1
    repeat: 0
"""

In [16]:
s = OmegaConf.create(test_config)

In [25]:
# s.profile.pop("schedule")
s.profile.pop("enabled")

True

In [2]:
cfg = OmegaConf.load(config_file)

In [3]:
p = cfg.pop("profile")

In [5]:
cfg.pop("hello")

ConfigKeyError: Key not found: 'hello'
    full_key: hello
    object_type=dict

In [3]:
from torchtune.config._instantiate import instantiate

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
scheduler_cfg = cfg.profile.schedule
scheduler_cfg.repeat = 0

default_scheduler = {
    "_component_": "torch.profiler.schedule",
    "wait": 1,
    "warmup": 1,
    "active": 1,
    "repeat": 0,
}

In [7]:
from omegaconf import OmegaConf

OmegaConf.create(default_scheduler)

{'_component_': 'torch.profiler.schedule', 'wait': 1, 'warmup': 1, 'active': 1, 'repeat': 0}

In [5]:
OmegaConf.select(cfg.profile, "schedule", default=None, throw_on_missing=False)

{'_component_': 'torch.profiler.schedule', 'wait': 1, 'warmup': 1, 'active': 1, 'repeat': 0}

In [54]:
test_schedule = instantiate(scheduler_cfg)

In [55]:
ref_schedule = torch.profiler.schedule(wait=1, warmup=1, active=1)

In [56]:
test_actions = [test_schedule(i) for i in range(5)]
ref_actions = [ref_schedule(i) for i in range(5)]
assert test_actions == ref_actions

In [90]:
cfg = OmegaConf.load(config_file)
cfg.profile.profiler

{'_component_': 'torch.profiler.profile', 'record_shapes': True, 'profile_memory': True, 'with_stack': True, 'with_flops': True}

In [91]:
cfg.profile.CUDA
OmegaConf.select(cfg.profile, "CPU", default=True, throw_on_missing=False)

True

In [77]:
p = instantiate(
    cfg.profile.profiler,
    activities=[
        torch.profiler.ProfilerActivity.CPU if cfg.profile.CPU else None,
        torch.profiler.ProfilerActivity.CUDA if cfg.profile.CUDA else None,
    ],
)

In [96]:
p.schedule = test_schedule

In [99]:
p.with_stack

True

In [95]:
[p.schedule(i) for i in range(5)]

[<ProfilerAction.NONE: 0>,
 <ProfilerAction.WARMUP: 1>,
 <ProfilerAction.RECORD_AND_SAVE: 3>,
 <ProfilerAction.NONE: 0>,
 <ProfilerAction.WARMUP: 1>]

In [81]:
assert all(
    a in p.activities
    for a in [torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA]
)

In [82]:
ref_prof = torch.profiler.profile(
    activities=[
        torch.profiler.ProfilerActivity.CPU,
        torch.profiler.ProfilerActivity.CUDA,
    ],
    record_shapes=True,
    profile_memory=True,
    with_stack=True,
    with_flops=True,
)

In [84]:
assert p.activities == ref_prof.activities

In [87]:
small_llama_config = {
    "architectures": ["LLaMAForCausalLM"],
    "bos_token_id": 0,
    "eos_token_id": 1,
    "hidden_act": "silu",
    "hidden_size": 128,
    "intermediate_size": 352,
    "initializer_range": 0.02,
    "max_sequence_length": 1024,
    "model_type": "llama",
    "num_attention_heads": 4,
    "num_hidden_layers": 4,
    "pad_token_id": -1,
    "rms_norm_eps": 1e-06,
    "transformers_version": "4.28.1",
    "use_cache": True,
    "vocab_size": 32000,
}

In [91]:
from transformers.models.llama import LlamaConfig
from transformers import LlamaForCausalLM

In [90]:
llama_config = LlamaConfig(**small_llama_config)

In [92]:
model = LlamaForCausalLM(config=llama_config)



In [144]:
tiny_llama_cfg = OmegaConf.load("./custom_configs/tiny_llama.yaml").checkpointer
tiny_llama_cfg

{'_component_': 'torchtune.utils.FullModelHFCheckpointer', 'checkpoint_dir': '/home/ubuntu/model_checkpoints/tiny_llama', 'checkpoint_files': ['model.safetensors'], 'recipe_checkpoint': None, 'output_dir': '${.checkpoint_dir}/trained', 'model_type': 'LLAMA2', 'resume_from_checkpoint': False}

In [150]:
OmegaConf.resolve(tiny_llama_cfg)
ckptr = config.instantiate(tiny_llama_cfg)

In [146]:
from pathlib import Path

p = Path(tiny_llama_cfg.checkpoint_dir)

In [149]:
model_path = p / tiny_llama_cfg.checkpoint_files[0]

model_path.is_file()

True

In [148]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [122]:
m = AutoModelForCausalLM.from_pretrained(
    "hf-internal-testing/tiny-random-LlamaForCausalLM"
)
tokenizer = AutoTokenizer.from_pretrained(
    "hf-internal-testing/tiny-random-LlamaForCausalLM"
)



You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [127]:
tokenizer.pad_token_id, tokenizer.eos_token_id

(0, 2)

In [131]:
m.config.pad_token_id = 0
m.config.generation_config

AttributeError: 'LlamaConfig' object has no attribute 'generation_config'

In [132]:
from tests.recipes.utils import MODEL_TEST_CONFIGS

In [135]:
MODEL_TEST_CONFIGS["llama2_qlora"]

['model._component_=torchtune.models.llama2.lora_llama2',
 "model.lora_attn_modules=['q_proj', 'k_proj', 'v_proj', 'output_proj']",
 'model.apply_lora_to_mlp=True',
 'model.apply_lora_to_output=False',
 'model.vocab_size=32000',
 'model.num_layers=4',
 'model.num_heads=16',
 'model.embed_dim=256',
 'model.max_seq_len=2048',
 'model.norm_eps=1e-5',
 'model.num_kv_heads=8',
 'model.lora_rank=8',
 'model.lora_alpha=16',
 'model.lora_dropout=0.0',
 'model.quantize_base=True']

In [136]:
from torchtune.utils import FullModelTorchTuneCheckpointer

In [110]:
chkptr = config.instantiate(tiny_llama_cfg)

ValueError: No file with name: model.safetensors found in /home/ubuntu/model_checkpoints/tiny_llama.