In [None]:
from huggingface_hub import login

# Replace 'your_hugging_face_token' with your actual token
login("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")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
!pip install --upgrade peft accelerate bitsandbytes datasets trl



In [None]:
import os
from dataclasses import dataclass, field
from typing import Optional
from datasets.arrow_dataset import Dataset
import torch
from datasets import load_dataset
from peft import LoraConfig
from peft import AutoPeftModelForCausalLM
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
)

from trl import SFTTrainer

torch.manual_seed(42)

<torch._C.Generator at 0x7d41b894c1b0>

In [None]:
@dataclass
class ScriptArguments:
    """
    These arguments vary depending on how many GPUs you have, what their capacity and features are, and what size model you want to train.
    """
    local_rank: Optional[int] = -1
    per_device_train_batch_size: Optional[int] = 4
    per_device_eval_batch_size: Optional[int] = 4
    gradient_accumulation_steps: Optional[int] = 4
    learning_rate: Optional[float] = 2e-5
    max_grad_norm: Optional[float] = 0.3
    weight_decay: Optional[int] = 0.01
    lora_alpha: Optional[int] = 16
    lora_dropout: Optional[float] = 0.1
    lora_r: Optional[int] = 32
    max_seq_length: Optional[int] = 512
    # model_name: Optional[str] = "bn22/Mistral-7B-Instruct-v0.1-sharded"
    model_name: Optional[str] = "mistralai/Mistral-7B-Instruct-v0.3"
    dataset_name: Optional[str] = "iamtarun/python_code_instructions_18k_alpaca"
    use_4bit: Optional[bool] = True
    use_nested_quant: Optional[bool] = False
    bnb_4bit_compute_dtype: Optional[str] = "float16"
    bnb_4bit_quant_type: Optional[str] = "nf4"
    num_train_epochs: Optional[int] = 5
    fp16: Optional[bool] = False
    bf16: Optional[bool] = True
    packing: Optional[bool] = False
    gradient_checkpointing: Optional[bool] = True
    optim: Optional[str] = "paged_adamw_32bit"
    lr_scheduler_type: str = "constant"
    max_steps: int = 10000
    warmup_ratio: float = 0.03
    group_by_length: bool = True
    save_steps: int = 250
    logging_steps: int = 250
    merge_and_push: Optional[bool] = False
    output_dir: str = "content/results_packing"

In [None]:
script_args = ScriptArguments(
    local_rank=-1,
    per_device_train_batch_size=1,  # custom value
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=3e-5,  # custom value
    max_grad_norm=0.3,
    weight_decay=0.01,
    lora_alpha=16,
    lora_dropout=0.1,
    lora_r=32,
    max_seq_length=512,
    # model_name="bn22/Mistral-7B-Instruct-v0.1-sharded",
    model_name="mistralai/Mistral-7B-Instruct-v0.3",
    dataset_name="iamtarun/python_code_instructions_18k_alpaca",
    use_4bit=True,
    use_nested_quant=False,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_quant_type="nf4",
    num_train_epochs=5,
    fp16=True,
    bf16=False,
    packing=False,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    lr_scheduler_type="constant",
    max_steps=10000,
    warmup_ratio=0.03,
    group_by_length=True,
    save_steps=250,
    logging_steps=250,
    merge_and_push=False,
    output_dir="content/results_packing"
)

### Data Processing Utils

In [None]:
def gen_batches_train():
    ds = load_dataset(script_args.dataset_name, streaming=True, split="train")
    total_samples = 10000
    val_pct = 0.1
    train_limit = int(total_samples * (1 - val_pct))
    counter = 0

    for sample in iter(ds):
        if counter >= train_limit:
            break

        original_prompt = sample['prompt'].replace("### Input:\n", '').replace('# Python code\n', '')

        instruction_start = original_prompt.find("### Instruction:") + len("### Instruction:")

        instruction_end = original_prompt.find("### Output:")

        instruction = original_prompt[instruction_start:instruction_end].strip()

        content_start = original_prompt.find("### Output:") + len("### Output:")

        content = original_prompt[content_start:].strip()

        new_text_format = f'<s>[INST] {instruction} [/INST] ```python\n{content}```</s>'

        tokenized_output = tokenizer(new_text_format)

        yield {'text': new_text_format}

        counter += 1

def gen_batches_val():
    ds = load_dataset(script_args.dataset_name, streaming=True, split="train")
    total_samples = 10000
    val_pct = 0.1
    train_limit = int(total_samples * (1 - val_pct))
    counter = 0

    for sample in iter(ds):
        if counter < train_limit:
            counter += 1
            continue

        if counter >= total_samples:
            break

        original_prompt = sample['prompt'].replace("### Input:\n", '').replace('# Python code\n', '')
        instruction_start = original_prompt.find("### Instruction:") + len("### Instruction:")
        instruction_end = original_prompt.find("### Output:")

        instruction = original_prompt[instruction_start:instruction_end].strip()
        content_start = original_prompt.find("### Output:") + len("### Output:")
        content = original_prompt[content_start:].strip()
        new_text_format = f'<s>[INST] {instruction} [/INST] ```python\n{content}```</s>'

        tokenized_output = tokenizer(new_text_format)
        yield {'text': new_text_format}

        counter += 1

In [None]:
def create_and_prepare_model(args):
    compute_dtype = getattr(torch, args.bnb_4bit_compute_dtype)

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=args.use_4bit,
        bnb_4bit_quant_type=args.bnb_4bit_quant_type,
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=args.use_nested_quant,
    )

    if compute_dtype == torch.float16 and args.use_4bit:
        major, _ = torch.cuda.get_device_capability()
        if major >= 8:
            print("=" * 80)
            print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16")
            print("=" * 80)

    # Load the entire model on the GPU 0
    # switch to `device_map = "auto"` for multi-GPU
    device_map = {"": 0}

    model = AutoModelForCausalLM.from_pretrained(
        args.model_name,
        quantization_config=bnb_config,
        device_map=device_map,
        # use_auth_token=True,
        # revision="refs/pr/35"
    )

    #### LLAMA STUFF
    # check: https://github.com/huggingface/transformers/pull/24906
    model.config.pretraining_tp = 1
    # model.config.
    #### LLAMA STUFF
    model.config.window = 256

    peft_config = LoraConfig(
        lora_alpha=script_args.lora_alpha,
        lora_dropout=script_args.lora_dropout,
        # target_modules=["query_key_value"],
        r=script_args.lora_r,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
    ],
    )

    tokenizer = AutoTokenizer.from_pretrained(script_args.model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    return model, peft_config, tokenizer

In [None]:
training_arguments = TrainingArguments(
    output_dir=script_args.output_dir,
    per_device_train_batch_size=script_args.per_device_train_batch_size,
    gradient_accumulation_steps=script_args.gradient_accumulation_steps,
    optim=script_args.optim,
    save_steps=script_args.save_steps,
    logging_steps=script_args.logging_steps,
    learning_rate=script_args.learning_rate,
    fp16=script_args.fp16,
    bf16=script_args.bf16,
    eval_strategy="steps",
    max_grad_norm=script_args.max_grad_norm,
    max_steps=script_args.max_steps,
    warmup_ratio=script_args.warmup_ratio,
    group_by_length=script_args.group_by_length,
    lr_scheduler_type=script_args.lr_scheduler_type,
)

In [None]:
model, peft_config, tokenizer = create_and_prepare_model(script_args)
model.config.use_cache = False

Your GPU supports bfloat16, you can accelerate training with the argument --bf16


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/137k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
train_gen = Dataset.from_generator(gen_batches_train)

val_gen = Dataset.from_generator(gen_batches_val)

Generating train split: 0 examples [00:00, ? examples/s]

Downloading readme:   0%|          | 0.00/905 [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
print(train_gen)

print(val_gen)

Dataset({
    features: ['text'],
    num_rows: 9000
})
Dataset({
    features: ['text'],
    num_rows: 1000
})


In [None]:
model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32768, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )

In [None]:
# Fix weird overflow issue with fp16 training
tokenizer.padding_side = "right"

trainer = SFTTrainer(
    model=model,
    train_dataset=train_gen,
    eval_dataset=val_gen,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=script_args.max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=script_args.packing,
)

Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer.train()

Step,Training Loss,Validation Loss
250,1.0499,0.773377
500,0.7805,0.712064
750,0.7332,0.705438
1000,0.7262,0.702623
1250,0.7315,0.696115
1500,0.7266,0.692439
1750,0.7125,0.691209
2000,0.7196,0.686515
2250,0.7289,0.685004
2500,0.6972,0.685885



Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.3 is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-Instruct-v0.3.

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.3 is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-Instruct-v0.3.

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.3 is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-Instruct-v0.3.

Cannot access gated repo for url https://h

Step,Training Loss,Validation Loss
250,1.0499,0.773377
500,0.7805,0.712064
750,0.7332,0.705438
1000,0.7262,0.702623
1250,0.7315,0.696115
1500,0.7266,0.692439
1750,0.7125,0.691209
2000,0.7196,0.686515
2250,0.7289,0.685004
2500,0.6972,0.685885



Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.3 is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-Instruct-v0.3.

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.3 is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-Instruct-v0.3.

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.3 is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-Instruct-v0.3.

Cannot access gated repo for url https://h

TrainOutput(global_step=10000, training_loss=0.6821106811523437, metrics={'train_runtime': 37716.048, 'train_samples_per_second': 1.061, 'train_steps_per_second': 0.265, 'total_flos': 2.878712510582784e+17, 'train_loss': 0.6821106811523437, 'epoch': 4.444444444444445})

In [None]:
if script_args.merge_and_push:
    output_dir = os.path.join(script_args.output_dir, "final_checkpoints")
    trainer.model.save_pretrained(output_dir)

    # Free memory for merging weights
    del model
    torch.cuda.empty_cache()

    model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map="auto", torch_dtype=torch.bfloat16)
    model = model.merge_and_unload()

    output_merged_dir = os.path.join(script_args.output_dir, "Final_Model_Checkpoint")
    model.save_pretrained(output_merged_dir, safe_serialization=True)

### Inference

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the fine-tuned model and tokenizer
model_path = "PATH TO YOUR MODEL'S LOCATION"  # Update this path to your model's location
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

# Function to generate text based on a prompt
def generate_text(prompt, max_length=50):
    # Encode the input prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Generate a response
    output = model.generate(input_ids, max_length=max_length, num_return_sequences=1)

    # Decode and return the generated text
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Example usage
prompt = "Your input prompt goes here"  # Replace with your input prompt
generated_text = generate_text(prompt)
print(generated_text)