In [1]:
%%capture
%pip install -q -U transformers
%pip install -q -U accelerate
%pip install -q -U bitsandbytes
%pip install -q -U trl 
%pip install -q -U peft
%pip install -q datasets==2.16.0

Now we will create 4-bit quantization with NF4-type configuration using BitsAndBytes to load our model in 4-bit precision. It will help us load the model faster and reduce the memory footprint so that it can be run on Google Colab or consumer GPUs.

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch, wandb
from datasets import load_dataset, Dataset
from trl import SFTTrainer

2024-04-09 08:39:52.663659: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-09 08:39:52.663804: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-09 08:39:52.810772: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_hf = user_secrets.get_secret("HUGGINGFACE_TOKEN")
secret_wandb = user_secrets.get_secret("WANDB_API_KEY")

In [4]:
!huggingface-cli login --token $secret_hf

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [5]:
wandb.login(key = secret_wandb)
run = wandb.init(
    project='Fine tuning mistral 7B', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdragoa389[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.16.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.16.4
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240409_084007-gfwrtqvp[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33measy-shape-7[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/dragoa389/Fine%20tuning%20mistral%207B[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/dragoa389/Fine%20tuning%20mistral%207B/runs/gfwrtqvp[0m


# Load the model

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda" # the device to load the model onto
base_model = "mistralai/Mistral-7B-Instruct-v0.2"

model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)

model.config.use_cache = False 
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [8]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

(True, True)

In [9]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)

We will use the pipeline function from the Transformers library to generate the response based on the prompt.

In [10]:
pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer = tokenizer, 
    torch_dtype=torch.bfloat16, 
    device_map="auto"
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MusicgenMelodyForCausalLM', 'MvpForCausalLM', 'OpenLlam

In [11]:
prompt = "As a data scientist, can you explain the concept of regularization in machine learning?"

sequences = pipe(
    prompt,
    do_sample=True,
    max_new_tokens=100, 
    temperature=0.7, 
    top_k=50, 
    top_p=0.95,
    num_return_sequences=1,
)
print(sequences[0]['generated_text'])

As a data scientist, can you explain the concept of regularization in machine learning?

Regularization is a technique used in machine learning to prevent overfitting and improve the generalization performance of a model. It works by adding a penalty term to the loss function that discourages large coefficients (weights) in the model. This penalty term encourages the model to find simpler solutions that have smaller coefficients.

The two most common types of regularization are L1 (Lasso) and L2 (Ridge) regularization. L1 regularization adds a penalty


# Load the dataset 

In [12]:
import pandas as pd
#number of training records limitted to this number (on P100 is ~2.5 seconds / item / epoch)
train_records_to_use = 1500    

train_df = pd.read_csv("/kaggle/input/llm-prompt-recovery-ground-truth/train.csv")

#quick test batch
train_df = train_df.sample(train_records_to_use).reset_index(drop=True)
train_df.iloc[2].text

'<s>[INST] I need you to tell me what single-sentence prompt transformed \'ORIGINAL TEXT\' into \'RE-WRITTEN TEXT.\' ORIGINAL TEXT: Mr Obama said on Thursday the family would stay in Washington while his 14-year-old daughter, Sasha, finishes her education at the elite academy, Sidwell Friends.\n"Transferring someone in the middle of high school. Tough," he said to an audience question in Wisconsin.\nIt is not common for presidents to stay in the capital after they leave office.\nWoodrow Wilson, who served as president from 1913 to 1921, was the last.\nHowever, the Obama family also plans to spend time in Chicago, where the family has a home and First Lady Michelle Obama has family.\nMr Obama is also establishing his presidential library in Chicago. RE-WRITTEN TEXT: "O team, let us roar,\nWith strength and power,\nWe fight for glory,\nOur name forever,\nIn the history of old,\nWe are legend,\nWe are strong,\nWe are proud,\nOur spirit burns deep,\nOnwards to victory!" [/INST]The single-s

Convert it into a HF Dataset

In [13]:
train_dataset = Dataset.from_pandas(train_df)
train_dataset

Dataset({
    features: ['text'],
    num_rows: 1500
})

# Trainning Hyperparam

In [14]:
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="wandb"
)

In [15]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)



Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [16]:
trainer.train()



Step,Training Loss
25,1.616
50,1.3757
75,1.4148
100,1.2801
125,1.4364
150,1.2353
175,1.4599
200,1.1952
225,1.4101
250,1.181




TrainOutput(global_step=375, training_loss=1.337967534383138, metrics={'train_runtime': 5438.4666, 'train_samples_per_second': 0.276, 'train_steps_per_second': 0.069, 'total_flos': 3.5735603414728704e+16, 'train_loss': 1.337967534383138, 'epoch': 1.0})

# Save the new model

In [17]:
new_model_name = "mistral_prompt_recovery_finetuned"

trainer.model.save_pretrained(new_model_name)
wandb.finish()
model.config.use_cache = True

[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         train/epoch ▁▁▂▃▃▃▄▄▅▆▆▆▇▇██
[34m[1mwandb[0m:   train/global_step ▁▁▂▃▃▃▄▅▅▅▆▇▇▇██
[34m[1mwandb[0m:     train/grad_norm ▂█▁▃▁▃▁▄▂▃▁▃▂▅▃
[34m[1mwandb[0m: train/learning_rate ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:          train/loss █▄▅▃▅▂▅▁▅▁▅▂▄▁▃
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:               total_flos 3.5735603414728704e+16
[34m[1mwandb[0m:              train/epoch 1.0
[34m[1mwandb[0m:        train/global_step 375
[34m[1mwandb[0m:          train/grad_norm 0.43953
[34m[1mwandb[0m:      train/learning_rate 0.0002
[34m[1mwandb[0m:               train/loss 1.291
[34m[1mwandb[0m:               train_loss 1.33797
[34m[1mwandb[0m:            train_runtime 5438.4666
[34m[1mwandb[0m: train_samples_per_second 0.276
[34m[1mwandb[0m:   train_s

In [18]:
trainer.model.push_to_hub(new_model_name, use_temp_dir=False)

adapter_model.safetensors:   0%|          | 0.00/369M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/dragoa/mistral_prompt_recovery_finetuned/commit/a86d30bb3d090a4877d0e2549ca557087da86451', commit_message='Upload model', commit_description='', oid='a86d30bb3d090a4877d0e2549ca557087da86451', pr_url=None, pr_revision=None, pr_num=None)