In [1]:
!pip install --upgrade pip -q
!pip install transformers datasets sentencepiece peft -q
!pip install huggingface_hub -q
!pip uninstall tensorflow -y # If we don't do this, TF will take over TPU and cause permission error for PT

[0m

In [2]:
!export USE_TORCH=True # To use transformers library in TPU

In [3]:
import torch
import torch_xla.core.xla_model as xm
from torch_xla.distributed.fsdp import XlaFullyShardedDataParallel as FSDP, checkpoint_module
import torch_xla.distributed.xla_multiprocessing as xmp
import torch_xla.distributed.parallel_loader as pl

from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
from datasets import Dataset, load_dataset, concatenate_datasets
from peft import LoraConfig, TaskType, get_peft_model

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import sys
import importlib
sys.path.append('')
fsdp_util = importlib.import_module('utils.fsdp')
importlib.reload(fsdp_util)

<module 'utils.fsdp' from '/home/tunerX/utils/fsdp.py'>

In [5]:
from huggingface_hub import login

login(token="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")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [6]:
MODEL = "meta-llama/Llama-2-7b-hf"

In [8]:
config = AutoConfig.from_pretrained(MODEL)
model = AutoModelForCausalLM.from_pretrained(MODEL)
tokenizer = AutoTokenizer.from_pretrained(MODEL)

if not tokenizer.pad_token:
    tokenizer.pad_token = tokenizer.eos_token
    config.pad_token_id = tokenizer.pad_token_id

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.46s/it]


In [9]:
peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, 
                         inference_mode=False, 
                         r=8, 
                         lora_alpha=32, 
                         lora_dropout=0.1)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 4,194,304 || all params: 6,742,609,920 || trainable%: 0.06220594176090199


In [10]:
fsdp_util.apply_fsdp(model, ["LlamaDecoderLayer"])

I0000 00:00:1721406295.433621    9245 tpu_initializer_framework_helper.cc:78] Libtpu path is: /usr/local/lib/python3.10/site-packages/torch_xla/lib/libtpu.so
I0000 00:00:1721406298.300460    9245 pjrt_c_api_client.cc:110] PjRtCApiClient created.


fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer
fsdp! LlamaDecoderLayer


RuntimeError: Bad StatusOr access: RESOURCE_EXHAUSTED: Error allocating device buffer: Attempting to allocate 64.00M. That was not possible. There are 62.17M free.; (0x0x0_HBM0)

In [None]:
# Define Alpaca prompt template
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction: {}

### Input: {}

### Response: {}"""

EOS_TOKEN = tokenizer.eos_token

# Define formatting function
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

# Load and preprocess the dataset
dataset = load_dataset("yahma/alpaca-cleaned", split="train")
dataset = dataset.map(formatting_prompts_func, batched=True)

In [None]:
# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

ds = dataset.train_test_split(test_size=0.15)
ds['train'] = ds['train'].map(tokenize_function, batched=True, remove_columns=dataset.column_names)
ds['test'] = ds['test'].map(tokenize_function, batched=True, remove_columns=dataset.column_names)

# Create DataLoader
train_dataloader = torch.utils.data.DataLoader(
    ds['train'],
    shuffle=True,
    batch_size=1,
    collate_fn=default_data_collator,
)

val_dataloader = torch.utils.data.DataLoader(
    ds['test'],
    shuffle=True,
    batch_size=1,
    collate_fn=default_data_collator,
)

In [None]:
# Test the DataLoader
print("Testing DataLoader:")
batch = next(iter(train_dataloader))
for k, v in batch.items():
    if isinstance(v, torch.Tensor):
        print(f"{k}: shape {v.shape}, dtype {v.dtype}")
    else:
        print(f"{k}: {type(v)}")

In [None]:
model = model.cpu()
print('now saving the model')
model.push_to_hub(
    "felarof01/llama3-test", 
    tokenizer=tokenizer,
    private=False,
    create_pr=False,
    max_shard_size="2GB", # Sharding isn't as important as before since hardware is better now but who cares anyway
)