In [5]:
! pip install huggingface_hub trl transformers accelerate peft datasets bitsandbytes einops scipy py7zr predictionguard

Collecting predictionguard
  Downloading predictionguard-1.4.2-py2.py3-none-any.whl (7.2 kB)
Installing collected packages: predictionguard
Successfully installed predictionguard-1.4.2


In [19]:
import os
from random import randrange

import predictionguard as pg
from datasets import load_dataset
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer, TrainingArguments
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from getpass import getpass

In [3]:
hf_token = getpass('Enter your Hugging Face token: ')

Enter your Hugging Face token: ··········


# Preparing the dataset

In [13]:
dataset = load_dataset("samsum")

Downloading builder script:   0%|          | 0.00/3.36k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.04k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.94M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/14732 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/819 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/818 [00:00<?, ? examples/s]

In [14]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 14732
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 819
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 818
    })
})

In [15]:
dataset['train'][0]

{'id': '13818513',
 'dialogue': "Amanda: I baked  cookies. Do you want some?\r\nJerry: Sure!\r\nAmanda: I'll bring you tomorrow :-)",
 'summary': 'Amanda baked cookies and will bring Jerry some tomorrow.'}

In [16]:
def dialogue_to_prompt(dialogue):
  return """### Instruction:
Summarize the conversation in the below input. Use a single short sentence or statement.

### Input:
{dialogue}

### Response:
""".format(dialogue=dialogue)

#for split in ['train', 'test', 'validation']:
dataset = dataset.map(lambda x: {"input": dialogue_to_prompt(x['dialogue'])})
dataset = dataset.rename_column("summary", "output")

Map:   0%|          | 0/14732 [00:00<?, ? examples/s]

Map:   0%|          | 0/819 [00:00<?, ? examples/s]

Map:   0%|          | 0/818 [00:00<?, ? examples/s]

In [17]:
dataset['train'][0]

{'id': '13818513',
 'dialogue': "Amanda: I baked  cookies. Do you want some?\r\nJerry: Sure!\r\nAmanda: I'll bring you tomorrow :-)",
 'output': 'Amanda baked cookies and will bring Jerry some tomorrow.',
 'input': "### Instruction:\nSummarize the conversation in the below input. Use a single short sentence or statement.\n\n### Input:\nAmanda: I baked  cookies. Do you want some?\r\nJerry: Sure!\r\nAmanda: I'll bring you tomorrow :-)\n\n### Response:\n"}

In [18]:
print(dataset['train'][0]['input'])

### Instruction:
Summarize the conversation in the below input. Use a single short sentence or statement.

### Input:
Amanda: I baked  cookies. Do you want some?
Jerry: Sure!
Amanda: I'll bring you tomorrow :-)

### Response:



# Load a model to fine-tune

In [10]:
base_model_name = "meta-llama/Llama-2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    token=hf_token
)
base_model.config.use_cache = False

# More info: https://github.com/huggingface/transformers/pull/24906
base_model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(
    base_model_name,
    trust_remote_code=True,
    token=hf_token
)
tokenizer.pad_token = tokenizer.eos_token

Downloading (…)lve/main/config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

# Training

In [11]:
output_dir = "./Llama-2-7b-hf-samsum"

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_steps=50,
    max_steps=1000,
    logging_dir="./logs",        # Directory for storing logs
    save_strategy="steps",       # Save the model checkpoint every logging step
    save_steps=50,                # Save checkpoints every 50 steps
    evaluation_strategy="steps", # Evaluate the model every logging step
    eval_steps=50,               # Evaluate and save checkpoints every 50 steps
    do_eval=True                 # Perform evaluation at the end of training
)

In [12]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [13]:
def formatting_func(example):
    text = f"{example['input']}{example['output']}"
    return [text]

max_seq_length = 512
trainer = SFTTrainer(
    model=base_model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    peft_config=peft_config,
    formatting_func=formatting_func,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_args,
)



Map:   0%|          | 0/14732 [00:00<?, ? examples/s]

Map:   0%|          | 0/819 [00:00<?, ? examples/s]

In [14]:
# pass in resume_from_checkpoint=True to resume from a checkpoint
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
50,1.1096,2.597891
100,0.0288,4.067489
150,0.0059,4.188659
200,0.0056,4.242595
250,0.0055,4.262155
300,0.0055,4.301471
350,0.0055,4.31526
400,0.0054,4.352552
450,0.0054,4.358501
500,0.0054,4.368029


TrainOutput(global_step=1000, training_loss=0.06182192170619965, metrics={'train_runtime': 19297.1695, 'train_samples_per_second': 0.829, 'train_steps_per_second': 0.052, 'total_flos': 3.0601258205184e+17, 'train_loss': 0.06182192170619965, 'epoch': 1000.0})

# Save the model

In [15]:
! zip -r llama-samsum.zip Llama-2-7b-hf-samsum

  adding: Llama-2-7b-hf-samsum/ (stored 0%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/ (stored 0%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/training_args.bin (deflated 49%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/scheduler.pt (deflated 49%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/tokenizer.json (deflated 74%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/special_tokens_map.json (deflated 48%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/rng_state.pth (deflated 28%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/adapter_config.json (deflated 43%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/optimizer.pt (deflated 8%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/README.md (deflated 67%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/trainer_state.json (deflated 79%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/tokenizer_config.json (deflated 67%)
  adding: Llama-2-7b-hf-samsum/checkpoint-350/adapter_model.bin (deflated 8%)
  adding: Llama-2-7b-hf-samsum/checkpoint-

In [16]:
! cp llama-samsum.zip /content/drive/MyDrive/llama-samsum.zip

# Try it out, compare to the base model

In [7]:
pg_access_token = getpass('Enter your Prediction Guard access token: ')
os.environ['PREDICTIONGUARD_TOKEN'] = pg_access_token

Enter your Prediction Guard access token: ··········


In [8]:
! cp /content/drive/MyDrive/llama-samsum.zip .
! unzip llama-samsum.zip

Archive:  llama-samsum.zip
   creating: Llama-2-7b-hf-samsum/
   creating: Llama-2-7b-hf-samsum/checkpoint-350/
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/training_args.bin  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/scheduler.pt  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/tokenizer.json  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/special_tokens_map.json  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/rng_state.pth  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/adapter_config.json  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/optimizer.pt  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/README.md  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/trainer_state.json  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/tokenizer_config.json  
  inflating: Llama-2-7b-hf-samsum/checkpoint-350/adapter_model.bin  
   creating: Llama-2-7b-hf-samsum/checkpoint-650/
  inflating: Llama-2-7b-hf-samsum/checkpoint-650/training_args.bin  
  inflating: Llama-2-7

In [23]:
base_model_name="meta-llama/Llama-2-7b-hf"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    token=hf_token
)
tokenizer = AutoTokenizer.from_pretrained(
    base_model_name,
    trust_remote_code=True,
    token=hf_token
)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [24]:
model = PeftModel.from_pretrained(base_model, "/content/Llama-2-7b-hf-samsum/checkpoint-1000")

In [36]:
# select a random test sample
sample = dataset['test'][randrange(len(dataset["test"]))]

# prompt the fine-tuned model
model_input = tokenizer(sample['input'], return_tensors="pt").to("cuda")
model.eval()
with torch.no_grad():
    ft_completion = tokenizer.decode(model.generate(**model_input,
                                    max_new_tokens=100)[0],
                                     skip_special_tokens=True)

# prompt the base model
result = pg.Completion.create(
   model="Llama-2-7B",
   prompt=sample['input']
)
base_completion = result['choices'][0]['text']

print('Dialogue:')
print('--------------------------------------------------------')
print(sample['dialogue'])
print('')
print('Reference Summary')
print('--------------------------------------------------------')
print(sample['output'])
print('')
print('Base Model Summary')
print('--------------------------------------------------------')
print(base_completion.split('\n')[0])
print('')
print('Fine-Tuned Model Summary')
print('--------------------------------------------------------')
print(ft_completion.split(sample['input'])[1].split('\n')[0].split("#")[0])

Dialogue:
--------------------------------------------------------
Lisa: I have to clean the house.
Bob: Yes, it's very dirty.
Lisa: You can help me.
Bob: Why me?
Lisa: Because you helped make it dirty.
Bob: What do you want me to do?
Lisa: I want you to clean the bathroom.
Bob: Oh, that's easy.
Lisa: Clean the sink, the tub, the counter, and the toilet.
Bob: That's a lot of work.
Lisa: Tell me when you finish.
Bob: I don't think so. You'll just give me more work. 

Reference Summary
--------------------------------------------------------
Bob is going to help Lisa clean the house, he will clean the bathroom. 

Base Model Summary
--------------------------------------------------------
Lisa: I have to clean the house.

Fine-Tuned Model Summary
--------------------------------------------------------

