In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# install

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
!pip install --upgrade scipy

[0m

# import library

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, pipeline, logging
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


# access huggingface

In [None]:
from huggingface_hub import login
login(token="token")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# model card

*   <|begin_of_text|>: This is equivalent to the BOS token
*   <|eot_id|>: This signifies the end of the message in a turn.
*   <|start_header_id|>{role}<|end_header_id|>: These tokens enclose the role for a particular message. The possible roles can be: system, user, assistant.
*   <|end_of_text|>: This is equivalent to the EOS token. On generating this token, Llama 3 will cease to generate more tokens.

## example template

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful AI assistant for travel tips and recommendations<|eot_id|><|start_header_id|>user<|end_header_id|>

What is France's capital?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Bonjour! The capital of France is Paris!<|eot_id|><|start_header_id|>user<|end_header_id|>

What can I do there?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Paris, the City of Light, offers a romantic getaway with must-see attractions like the Eiffel Tower and Louvre Museum, romantic experiences like river cruises and charming neighborhoods, and delicious food and drink options, with helpful tips for making the most of your trip.<|eot_id|><|start_header_id|>user<|end_header_id|>

Give me a detailed list of the attractions I should visit, and time it takes in each one, to plan my trip accordingly.<|eot_id|><|start_header_id|>assistant<|end_header_id|>


In [None]:
dataset = load_dataset("spikecodes/911-call-transcripts",split="train")
dataset = dataset.shuffle(seed=42).select(range(500))
print(dataset[0]['messages'])

[{'role': 'assistant', 'content': "9-1-1, what's your emergency?"}, {'role': 'user', 'content': "Hey, how you doing? I'd like to report a double murder in Cambridge Arms."}, {'role': 'assistant', 'content': 'Okay, do you know the address?'}, {'role': 'user', 'content': "Yeah, hold on... it's 28C. 28 Cambridge Arms."}, {'role': 'assistant', 'content': 'Can you tell me what happened?'}, {'role': 'user', 'content': 'I walked through the backyard and saw glass shattered. I went inside and saw a woman lying there lifeless. I walked in and saw another person lying on the kitchen floor, looked like they were shot in the head.'}, {'role': 'assistant', 'content': 'Are you still there?'}, {'role': 'user', 'content': "Yeah, I'm outside. I live in Cambridge Arms too. I was looking for my debit card."}, {'role': 'assistant', 'content': 'Do you want to remain anonymous?'}, {'role': 'user', 'content': 'Yeah, I would like to remain anonymous.'}, {'role': 'assistant', 'content': 'Okay, can you describe

In [None]:
def transform_conversation(example):
  segment = example['messages']
  system = 'You are an AI assistant that provides advice to help users handle real-life emergency situations that could be life-threatening'
  reformatted_segment = []
  reformatted_segment.append(f'<|begin_of_text|><|start_header_id|>system<|end_header_id|>{system}<|eot_id|>')
  for i in range(1,len(segment)):
    if i+1 != len(segment):
      if segment[i]['role'] == 'user':
        user = ''
        if segment[i+1]['role'] == 'user':
          user = segment[i]['content']+' . '+segment[i+1]['content']
        else:
          user = segment[i]['content']
        reformatted_segment.append(f'<|start_header_id|>user<|end_header_id|>{user}<|eot_id|>')
      else:
        assistant = ''
        if segment[i+1]['role'] == 'assistant':
          assistant = segment[i]['content']+' . '+segment[i+1]['content']
        else:
          assistant = segment[i]['content']
        reformatted_segment.append(f'<|start_header_id|>assistant<|end_header_id|>{assistant}<|eot_id|>')
    else:
      if segment[i]['role'] == 'user':
        reformatted_segment.append(f'<|start_header_id|>user<|end_header_id|>{segment[i]["content"]}<|eot_id|>')
      else:
        reformatted_segment.append(f'<|start_header_id|>assistant<|end_header_id|>{segment[i]["content"]}<|eot_id|>')
  reformatted_segment.append(f'<|end_of_text|>')
  return {'text': ''.join(reformatted_segment)}
transformed_dataset = dataset.map(transform_conversation)

In [None]:
print(transformed_dataset[0]['text'])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an AI assistant that provides advice to help users handle real-life emergency situations that could be life-threatening<|eot_id|><|start_header_id|>user<|end_header_id|>Hey, how you doing? I'd like to report a double murder in Cambridge Arms.<|eot_id|><|start_header_id|>assistant<|end_header_id|>Okay, do you know the address?<|eot_id|><|start_header_id|>user<|end_header_id|>Yeah, hold on... it's 28C. 28 Cambridge Arms.<|eot_id|><|start_header_id|>assistant<|end_header_id|>Can you tell me what happened?<|eot_id|><|start_header_id|>user<|end_header_id|>I walked through the backyard and saw glass shattered. I went inside and saw a woman lying there lifeless. I walked in and saw another person lying on the kitchen floor, looked like they were shot in the head.<|eot_id|><|start_header_id|>assistant<|end_header_id|>Are you still there?<|eot_id|><|start_header_id|>user<|end_header_id|>Yeah, I'm outside. I live in Cambridge Arm

# load the base-model

In [None]:
HF_TOKEN = "token"
model_name = "meta-llama/Meta-Llama-3-8B"
finetune_model = "Llama-3-8b-emergency"

In [None]:
output_dir = "./results"
num_train_epochs = 1

## quantization config

In [None]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map={"": 0},
    token=HF_TOKEN
)
model.config.use_cache = False

Loading checkpoint shards: 100%|██████████| 4/4 [00:10<00:00,  2.56s/it]
Some weights of LlamaForCausalLM were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.31.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.29.self_attn.rotary_emb.inv_freq', 'model.layers.10.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.25.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn.rotary_emb.inv_freq', 'model.layers.23.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.14.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layer

## Tokenizer setting

In [None]:
def get_llama3_chat_template():
    return (
        "<|begin_of_text|>"
        "{% for message in messages %}"
            "{% if message.role == 'system' %}"
                "<|start_header_id|>system<|end_header_id|>"
                "{{message.content}}"
                "<|eot_id|>"
            "{% endif %}"
            "{% if message.role == 'user' %}"
                "<|start_header_id|>user<|end_header_id|>"
                "{{message.content}}"
                "<|eot_id|>"
            "{% endif %}"
            "{% if message.role == 'assistant' %}"
                "<|start_header_id|>assistant<|end_header_id|>"
                "{{message.content}}"
                "<|eot_id|>"
            "{% endif %}"
        "{% endfor %}"
        "<|end_of_text|>"
    )

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.chat_template = get_llama3_chat_template()

In [None]:
prompt = "There is a thief in my house. How can I stay safe until the police arrive?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"{prompt}")

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [None]:
print(result[0]['generated_text'])

There is a thief in my house. How can I stay safe until the police arrive?_
There are many situations in which you need to remain safe. Here are some suggestions.
* _Call the police._ This is the quickest way to get help. When you call, you can give the police your address and tell them what is happening. This will help the police to get to your house as quickly as possible.
* _Stay away from the thief._ If you are in the house with the thief, it is important to stay away from the thief. This will help to keep you safe. If the thief tries to hurt you, you can run away or call for help.
* _Lock yourself in a room._ If you are in the house with the thief, it is a good idea to lock yourself in a room. This will help to keep you safe. If the thief tries to hurt you, you can lock yourself in a room and call for help.
* _Stay away from


## lora config

In [None]:
# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

## Setting training args

In [None]:
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,      # uses the number of epochs earlier
    per_device_train_batch_size=2,          # 2 seems reasonable (made smaller due to CUDA memory issues)
    gradient_accumulation_steps=1,          # 2 is fine, as we're a small batch
    optim="paged_adamw_32bit",              # default optimizer
    save_steps=0,                           # we're not gonna save
    logging_steps=25,                       # same value as used by Meta
    learning_rate=2e-4,                     # standard learning rate
    weight_decay=0.001,                     # standard weight decay 0.001
    fp16=False,                             # set to true for A100
    bf16=False,                             # set to true for A100
    max_grad_norm=0.3,                      # standard setting
    max_steps=-1,                           # needs to be -1, otherwise overrides epochs
    warmup_ratio=0.03,                      # standard warmup ratio
    group_by_length=True,                   # speeds up the training
    lr_scheduler_type="cosine"              # constant seems better than cosine
)

In [None]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=transformed_dataset,
    peft_config=peft_config,                # use our lora peft config
    dataset_text_field="text",
    max_seq_length=None,                    # no max sequence length
    tokenizer=tokenizer,                    # use the llama tokenizer
    args=training_arguments,                # use the training arguments
    packing=False,                          # don't need packing
)



In [None]:
trainer.train()

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,3.3796
50,2.5968
75,2.5492
100,2.5072
125,2.4582
150,2.4314
175,2.4234
200,2.4247
225,2.4242
250,2.4129


TrainOutput(global_step=250, training_loss=2.5607607421875, metrics={'train_runtime': 446.2298, 'train_samples_per_second': 1.12, 'train_steps_per_second': 0.56, 'total_flos': 8855510522511360.0, 'train_loss': 2.5607607421875, 'epoch': 1.0})

In [None]:
trainer.model.save_pretrained(finetune_model)

# Test the fine-tune model

In [None]:
prompt = "There is a thief in my house. How can I stay safe until the police arrive?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an AI assistant that provides advice to help users handle real-life emergency situations that could be life-threatening<|eot_id|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [None]:
print(result[0]['generated_text'])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an AI assistant that provides advice to help users handle real-life emergency situations that could be life-threatening<|eot_id|><|start_header_id|>user<|end_header_id|>There is a thief in my house. How can I stay safe until the police arrive?<|eot_id|><|start_header_id|>assistant<|end_header_id|>What is the address of the house?. Do you know the name of the person who is in your house?. Is anyone else in the house with you?. Are you alone with the person in the house?. Do you have a weapon to defend yourself with?. What kind of weapon do you have?. Are you in the same room as the person who is in the house?. What is the make and model of the car the person is driving?. Are you able to see the person who is in the house?. Is the person armed?. Are you able to see the person's face?. Do you have a description of the person?. Is the person wearing a mask?. Are you able to


# Clear the model

In [None]:
# Empty VRAM
del model
del pipe
del trainer

# clear memory
import torch
torch.cuda.empty_cache()

# garbage collect
import gc
gc.collect()

0

# Merge the model

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
model = PeftModel.from_pretrained(base_model, finetune_model)
model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.38s/it]
Some weights of LlamaForCausalLM were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.31.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.29.self_attn.rotary_emb.inv_freq', 'model.layers.10.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.25.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn.rotary_emb.inv_freq', 'model.layers.23.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.14.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layer

## save model weight and tokenizer

In [None]:
model.save_pretrained("/root/models/finetune_model.pt")
tokenizer.save_pretrained("/root/models/tokenizer/")

('/root/models/tokenizer/tokenizer_config.json',
 '/root/models/tokenizer/special_tokens_map.json',
 '/root/models/tokenizer/tokenizer.json')