# Instruction Fine-Tune Mistral 7B using LoRA
We use [unnatural-instructions](https://github.com/orhonovich/unnatural-instructions) to perform instruction fune-tuning on mistral 7B base model.

In [1]:
from datasets import load_dataset, Dataset, DatasetDict
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForLanguageModeling, TrainingArguments, Trainer, BitsAndBytesConfig
import torch
import time
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


#### Load Mistral 7B

In [2]:
model_name = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [3]:
original_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config) # .to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512, padding_side="left", add_eos_token=True)

tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards: 100%|██████████| 2/2 [00:11<00:00,  5.59s/it]


Print number of trainable model parameters

In [4]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(original_model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


#### Load Unnatural Instruction Dataset through Huggingface

In [5]:
huggingface_dataset_name = "mrm8488/unnatural-instructions-core"

dataset = load_dataset(huggingface_dataset_name)

dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'instances'],
        num_rows: 66010
    })
})

Explode 'instances' into separate rows and make sure exploded_dataset remains the same schema as the original dataset

In [6]:
new_datasets = {}

for split, ds in dataset.items():
    # Prepare lists to hold the exploded rows
    exploded_instructions, exploded_instances = [], []

    # Iterate over each row in the dataset
    for row in ds:
        instruction = row['instruction']
        for instance in row['instances']:
            # For each instance, create a new row with the same instruction
            exploded_instructions.append(instruction)
            exploded_instances.append([instance])
    
    # Create a new dataset from the exploded rows
    exploded_data = {'instruction': exploded_instructions, 'instances': exploded_instances}
    new_datasets[split] = Dataset.from_dict(exploded_data)

# Combine the new datasets into a DatasetDict
exploded_dataset = DatasetDict(new_datasets)

exploded_dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'instances'],
        num_rows: 68478
    })
})

In [7]:
# Check each instance format
exploded_dataset['train'][0]


{'instruction': 'You will be given a series of words. Output these words in reverse order, with each word on its own line.',
 'instances': [{'constraints': 'None.',
   'input': "Words: ['Hello', 'world'].",
   'instruction_with_input': "You will be given a series of words. Output these words in reverse order, with each word on its own line.\nWords: ['Hello', 'world'].",
   'output': 'world\nHello'}]}

In [8]:
# Sanity check to make sure that each row has only one instance
sanity_df = exploded_dataset['train'].to_pandas()

sanity_df['instances_length'] = sanity_df['instances'].apply(len)
assert sanity_df[sanity_df['instances_length'] != 1].empty

Split data into train, val, and test

In [9]:
# Split the dataset into train, test, and validation sets
train_test_dataset = exploded_dataset['train'].train_test_split(test_size=0.2, seed=42)  # 80% train, 20% for test and validation
test_val_dataset = train_test_dataset['test'].train_test_split(test_size=0.5, seed=42)  # Split the 20% equally into test and validation

# Create a new DatasetDict
dataset_dict = DatasetDict({
    'train': train_test_dataset['train'],
    'test': test_val_dataset['train'],
    'validation': test_val_dataset['test']
})

dataset_dict

DatasetDict({
    train: Dataset({
        features: ['instruction', 'instances'],
        num_rows: 54782
    })
    test: Dataset({
        features: ['instruction', 'instances'],
        num_rows: 6848
    })
    validation: Dataset({
        features: ['instruction', 'instances'],
        num_rows: 6848
    })
})

In [10]:
index = 200

print(f"instructions: {dataset_dict['test'][index]['instruction']}")
print(f"instances: {dataset_dict['test'][index]['instances']}")

instructions: You are given a passage with certain words/phrases bolded. Identify if the word/phrase is being used in context ofSTAGE DIRECTIONS orCHARACTERIZATION.
instances: [{'constraints': 'The output for each word should be either 0 (Stage Direction) or 1 (Characterization).', 'input': 'STAGE DIRECTIONS:As he enters,MACBETH sees the three witches.He stopshis steps CHARACTERIZATION: amazed and terrified at their sight.', 'instruction_with_input': 'You are given a passage with certain words/phrases bolded. Identify if the word/phrase is being used in context ofSTAGE DIRECTIONS orCHARACTERIZATION.\nSTAGE DIRECTIONS:As he enters,MACBETH sees the three witches.He stopshis steps CHARACTERIZATION: amazed and terrified at their sight.', 'output': '0\n1\n0\n1'}]


### Perform Fine-Tuning

#### Preprocess Dataset to Generate Prompt|

In [17]:
def get_prompt(d_li):
    p_li  = []
    for d in d_li:
        constraints = f"\n{d['constraints']}" if d['constraints'] else ""
        instruction = f"{d['instruction_with_input']}{constraints}"
        model_answer = d['output']

        prompt = f"<s>[INST] {instruction} [/INST] {model_answer}</s>"
        p_li.append(prompt)
    return "\n".join(p_li)

def get_output(d_li):
    return "\n".join([d['output'] for d in d_li])

def tokenize_function(example):
    prompts = [get_prompt(per_inst_d_li) for per_inst_d_li in example['instances']]
    tokenized_example = tokenizer(prompts, max_length=512, padding="max_length", truncation=True, return_tensors="pt")
    example['input_ids'] = tokenized_example.input_ids # .to(device)
    example['labels'] = tokenized_example.input_ids.clone()
    return example

tokenized_datasets = dataset_dict.map(tokenize_function, batched=True, remove_columns=['instruction', 'instances']) # 

tokenized_datasets

Map: 100%|██████████| 54782/54782 [00:07<00:00, 6990.22 examples/s]
Map: 100%|██████████| 6848/6848 [00:01<00:00, 6547.38 examples/s]
Map: 100%|██████████| 6848/6848 [00:00<00:00, 7508.51 examples/s]


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 54782
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 6848
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 6848
    })
})

In [18]:
# To save some time in the lab, you will subsample the dataset:
subsampled_tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100 == 0, with_indices=True)

subsampled_tokenized_datasets

Filter: 100%|██████████| 54782/54782 [00:10<00:00, 5466.24 examples/s]
Filter: 100%|██████████| 6848/6848 [00:01<00:00, 5466.00 examples/s]
Filter: 100%|██████████| 6848/6848 [00:01<00:00, 5465.93 examples/s]


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 548
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 69
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 69
    })
})

In [19]:
print(f"Shapes of the datasets:")
print(f"Training: {tokenized_datasets['train'].shape}")
print(f"Validation: {tokenized_datasets['validation'].shape}")
print(f"Test: {tokenized_datasets['test'].shape}")

print(tokenized_datasets)

Shapes of the datasets:
Training: (54782, 2)
Validation: (6848, 2)
Test: (6848, 2)
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 54782
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 6848
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 6848
    })
})


In [20]:
print(f"input len is {len(tokenized_datasets['train'][4]['input_ids'])}")
print(f"input Ids: {tokenized_datasets['train'][0]['input_ids']}")
print(f"labels: {tokenized_datasets['train'][0]['labels']}")

input len is 512
input Ids: [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

### Fin-Tune the Model with Preprocessed Dataset
(Warning) It will OOM on my machine, so I marked them out

In [21]:
# output_dir = f'./model/dialogue-summary-training-{str(int(time.time()))}'

# training_args = TrainingArguments(
#     output_dir=output_dir,
#     learning_rate=1e-5,
#     per_device_train_batch_size=2,
#     num_train_epochs=1,
#     weight_decay=0.01,
#     logging_steps=1,
#     max_steps=1,
#     fp16=True,
# )

# trainer = Trainer(
#     model=original_model,
#     args=training_args,
#     train_dataset=tokenized_datasets['train'],
#     eval_dataset=tokenized_datasets['validation'],
# )

In [22]:
# trainer.train() ### OOM

### Peft

#### Set up LoRA

In [23]:
from peft import prepare_model_for_kbit_training

original_model.enable_input_require_grads()
original_model.gradient_checkpointing_enable()
original_model = prepare_model_for_kbit_training(original_model)

print(print_number_of_trainable_model_parameters(original_model))

trainable model parameters: 0
all model parameters: 3752071168
percentage of trainable model parameters: 0.00%


In [24]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8, # Rank
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type='CAUSAL_LM'
)

In [25]:
peft_model = get_peft_model(original_model, 
                            lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 21260288
all model parameters: 3773331456
percentage of trainable model parameters: 0.56%


In [26]:
from datetime import datetime

run_name = f'peft-causal-lm-training-{str(int(time.time()))}'
output_dir = f'./' + run_name

peft_training_args = TrainingArguments(
    output_dir=output_dir,
    warmup_steps=5,
    per_device_train_batch_size=2,
    gradient_checkpointing=True,
    gradient_accumulation_steps=4,
    max_steps=1000,
    learning_rate=2.5e-5, # Want about 10x smaller than the Mistral learning rate
    logging_steps=50,
    bf16=True,
    optim="paged_adamw_8bit",
    logging_dir="./logs",        # Directory for storing logs
    save_strategy="steps",       # Save the model checkpoint every logging step
    save_steps=50,                # Save checkpoints every 50 steps
    evaluation_strategy="steps", # Evaluate the model every logging step
    eval_steps=50,               # Evaluate and save checkpoints every 50 steps
    do_eval=True,                # Perform evaluation at the end of training
    report_to="wandb",           # Comment this out if you don't want to use weights & baises
    run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}"          # Name of the W&B run (optional)
)
    
peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [27]:
peft_trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[34m[1mwandb[0m: Currently logged in as: [33mirislin1006[0m. Use [1m`wandb login --relogin`[0m to force relogin
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` 

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
50,2.274,1.211107
100,1.1184,1.104863
150,1.0773,1.064886
200,1.0373,1.037202
250,1.0211,1.02036
300,1.0126,1.006043
350,0.9912,0.989309
400,0.9639,0.980859
450,0.9544,0.97318
500,0.9738,0.965228




TrainOutput(global_step=1000, training_loss=1.0370360374450684, metrics={'train_runtime': 33107.1791, 'train_samples_per_second': 0.242, 'train_steps_per_second': 0.03, 'total_flos': 1.75274075357184e+17, 'train_loss': 1.0370360374450684, 'epoch': 0.15})

In [28]:
peft_model_path="./peft-dialogue-summary-checkpoint-local"

peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)



('./peft-dialogue-summary-checkpoint-local/tokenizer_config.json',
 './peft-dialogue-summary-checkpoint-local/special_tokens_map.json',
 './peft-dialogue-summary-checkpoint-local/tokenizer.model',
 './peft-dialogue-summary-checkpoint-local/added_tokens.json',
 './peft-dialogue-summary-checkpoint-local/tokenizer.json')

In [29]:
test = dataset_dict['test'][0:2]['instances']
[get_prompt(per_inst_d_li) for per_inst_d_li in test]

['<s>[INST] You are given a list of movie ratings. Each rating consists of a movie title and the corresponding rating out of 5 stars. The task is to find all movies with a rating greater than or equal to 4 stars and print them in alphabetical order.\nAnt-Man - 4stars Captain America: Civil War - 5stars Deadpool - 3stars Guardians of the Galaxy Vol. 2 - 5stars Logan - 4stars Spider-Man: Homecoming - 5stars Thor Ragnarok - 4stars.\nPrint each movie title on a new line in alphabetical order. [/INST] Ant-Man\nCaptain America: Civil War\nGuardians of the Galaxy Vol. 2\nLogan\nSpider-Man: Homecoming\nThor Ragnarok</s>',
 "<s>[INST] In this task, you are asked to summarize a text. The input is one paragraph containing up to 300 words. Your job is to return the top 5 most important sentences from the given text as a list in order of importance.\nI have three sisters and two brothers. We are all close in age, so we are very close growing up. My favorite sister is the middle one. She was always 

In [30]:
# model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

# ft_model.eval()
# with torch.no_grad():
#     print(eval_tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))


In [31]:
eval_instances = dataset_dict['test'][0:10]['instances']
prompts = [get_prompt(per_inst_d_li) for per_inst_d_li in eval_instances]
outputs = [get_output(per_inst_d_li) for per_inst_d_li in eval_instances]
#print(prompts)
print(len(prompts), len(outputs), outputs[0:2])


original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

for idx, prompt in enumerate(prompts):
    model_input = tokenizer(prompt, return_tensors="pt")

    original_model_outputs = original_model.generate(**model_input, max_new_tokens=100)
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

    peft_model_outputs = peft_model.generate(**model_input, max_new_tokens=100)
    peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

    original_model_summaries.append(original_model_text_output)
    peft_model_summaries.append(peft_model_text_output)

zipped_summaries = list(zip(original_model_summaries, peft_model_summaries))
 
df = pd.DataFrame(zipped_summaries, columns = ['original_model_generation', 'peft_model_generation'])
df

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


10 10 ['Ant-Man\nCaptain America: Civil War\nGuardians of the Galaxy Vol. 2\nLogan\nSpider-Man: Homecoming\nThor Ragnarok', "['My favorite sister is the middle one.',\n 'She was always so kind and caring, even when we were fighting over who got which toy growing up.',\n 'Now that we are older, she's still my best friend even though we live in different states.',\n 'I talk to her every day and visit her whenever I can.']"]




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token

Unnamed: 0,original_model_generation,peft_model_generation
0,[INST] You are given a list of movie ratings. ...,[INST] You are given a list of movie ratings. ...
1,"[INST] In this task, you are asked to summariz...","[INST] In this task, you are asked to summariz..."
2,"[INST] In this task, you are given a natural l...","[INST] In this task, you are given a natural l..."
3,"[INST] In this task, you are given an incomple...","[INST] In this task, you are given an incomple..."
4,[INST] Given a letter and an encrypted message...,[INST] Given a letter and an encrypted message...
5,[INST] It is common for people to use social n...,[INST] It is common for people to use social n...
6,"[INST] In this task, you are given three image...","[INST] In this task, you are given three image..."
7,[INST] You are provided with a description of ...,[INST] You are provided with a description of ...
8,"[INST] In this task, you are given a list of s...","[INST] In this task, you are given a list of s..."
9,"[INST] In this task, you will be given a piece...","[INST] In this task, you will be given a piece..."


In [32]:
print(df['original_model_generation'].iloc[0])

[INST] You are given a list of movie ratings. Each rating consists of a movie title and the corresponding rating out of 5 stars. The task is to find all movies with a rating greater than or equal to 4 stars and print them in alphabetical order.
Ant-Man - 4stars Captain America: Civil War - 5stars Deadpool - 3stars Guardians of the Galaxy Vol. 2 - 5stars Logan - 4stars Spider-Man: Homecoming - 5stars Thor Ragnarok - 4stars.
Print each movie title on a new line in alphabetical order. [/INST] Ant-Man
Captain America: Civil War
Guardians of the Galaxy Vol. 2
Logan
Spider-Man: Homecoming
Thor Ragnarok Question 2

You are given a list of movie ratings. Each rating consists of a movie title and the corresponding rating out of 5 stars. The task is to find all movies with a rating greater than or equal to 4 stars and print them in alphabetical order.

Ant-Man - 4stars Captain America: Civil War - 5stars Deadpool - 3stars Guardians of the Galaxy Vol. 2 - 5stars


In [33]:
print(df['peft_model_generation'].iloc[0])

[INST] You are given a list of movie ratings. Each rating consists of a movie title and the corresponding rating out of 5 stars. The task is to find all movies with a rating greater than or equal to 4 stars and print them in alphabetical order.
Ant-Man - 4stars Captain America: Civil War - 5stars Deadpool - 3stars Guardians of the Galaxy Vol. 2 - 5stars Logan - 4stars Spider-Man: Homecoming - 5stars Thor Ragnarok - 4stars.
Print each movie title on a new line in alphabetical order. [/INST] Ant-Man
Captain America: Civil War
Guardians of the Galaxy Vol. 2
Logan
Spider-Man: Homecoming
Thor Ragnarok Question 2

You are given a list of movie ratings. Each rating consists of a movie title and the corresponding rating out of 5 stars. The task is to find all movies with a rating greater than or equal to 4 stars and print them in alphabetical order.

Ant-Man - 4stars Captain America: Civil War - 5stars Deadpool - 3stars Guardians of the Galaxy Vol. 2 - 5stars


In [35]:
print("Original model result:")
print(df['original_model_generation'].iloc[5])
print("Peft model result:")
print(df['peft_model_generation'].iloc[5])

Original model result:
[INST] It is common for people to use social networking sites, such as Facebook and Twitter, to communicate with each other. In this task you are given two tweets from different users. The job is to find out whether the two tweets are in response to each other or not. We will mark the tweet pair as 'True' if it is in response to each other, otherwise 'False'.
@jsbyun86 hello! how are you? @ohyeskim fine thanks! how about you?
The output should be 'True' or 'False'. [/INST] True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

True

False

Peft model result:
[INST] It is common for people to use social networking sites, such as Facebook and Twitter, to communicate with each other. In this task you are given two tweets from different users. The job is to find out whether the two tweets are in response to each othe

In [40]:
print("Original model result:")
print(df['original_model_generation'].iloc[3])

Original model result:
[INST] In this task, you are given an incomplete sentence with one or more missing words. Your job is to predict the most probable word(s) that can complete the sentence based on common sense and reasoning.
I never _ a chance to meet her.
The output should be one or more words that can complete the given sentence. [/INST] I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.



In [42]:

print("Peft model result:")
print(df['peft_model_generation'].iloc[3])

Peft model result:
[INST] In this task, you are given an incomplete sentence with one or more missing words. Your job is to predict the most probable word(s) that can complete the sentence based on common sense and reasoning.
I never _ a chance to meet her.
The output should be one or more words that can complete the given sentence. [/INST] I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

I never had a chance to meet her.

