<a href="https://colab.research.google.com/github/azzindani/03_LLM_Fine_Tune/blob/main/OpenHermes2.5_Mistral7B_Fine_Tune_PEFT_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 00 Import Modules

In [None]:
!pip install -q --upgrade transformers
!pip install -q peft
!pip install -U -q bitsandbytes
!pip install -q datasets
!pip install -q trl

In [None]:
import os
import pathlib
import torch
import numpy as np
import textwrap

from random import randint
from itertools import zip_longest
from datetime import datetime
from datasets import load_dataset
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from trl import SFTTrainer

from transformers import (
  AutoTokenizer,
  AutoModelForCausalLM,
  AutoModelForSeq2SeqLM,
  AutoModel,
  AutoModelForSequenceClassification,
  DataCollatorForLanguageModeling,
  Trainer,
  TrainingArguments,
  pipeline,
  TextDataset,
  EvalPrediction,
  DataCollatorWithPadding,
  GenerationConfig,
  BitsAndBytesConfig,
  DataCollatorForSeq2Seq,
  TextStreamer
)

from peft import (
  LoraConfig,
  PeftModelForSequenceClassification,
  PeftModel,
  TaskType,
  AutoPeftModelForSequenceClassification,
  get_peft_model,
  prepare_model_for_kbit_training
)

if torch.cuda.is_available():
  print("GPU is available!")
else:
  print("GPU is not available.")

GPU is available!


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## 01 Import Model

In [None]:
model_name = 'unsloth/OpenHermes-2.5-Mistral-7B'

In [None]:
def load_model(model_name, base = True):
  if base == True:
    model = AutoModelForCausalLM.from_pretrained(
      model_name,
      torch_dtype = torch.float16,
      trust_remote_code = True
    ).to(device)

    return model

  else:
    bnb_config = BitsAndBytesConfig(
      load_in_4bit = True,
      bnb_4bit_quant_type = 'nf4',
      bnb_4bit_compute_dtype = torch.float16,
      bnb_4bit_use_double_quant = True,
    )
    model = AutoModelForCausalLM.from_pretrained(
      model_name,
      quantization_config = bnb_config,
      trust_remote_code = True
    ).to(device)

    return model

In [None]:
model = load_model(model_name, base = False)
model

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
trainable_percentage = (trainable_params / total_params) * 100

print('Total parameters :', total_params)
print('Trainable parameters :', trainable_params)
print('Trainable percentage: {:.2f}%'.format(trainable_percentage))

Total parameters : 3752087552
Trainable parameters : 262426624
Trainable percentage: 6.99%


## 02 Import Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
#tokenizer

## 03 Import Dataset

In [None]:
dataset_name = 'microsoft/orca-math-word-problems-200k'

In [None]:
max_length = 384

In [None]:
dataset = load_dataset(dataset_name, split = 'train')
dataset

In [None]:
dataset = dataset.select(range(10000))

In [None]:
dataset.select(range(5)).to_pandas().head()

Unnamed: 0,question,answer
0,Jungkook is the 5th place. Find the number of ...,"If Jungkook is in 5th place, then 4 people cro..."
1,A number divided by 10 is 6. Yoongi got the re...,"Let's call the certain number ""x"". According t..."
2,Dongju selects a piece of paper with a number ...,To find the second smallest and third smallest...
3,"You wanted to subtract 46 from a number, but y...",If you accidentally subtracted 59 instead of 4...
4,The length of one span of Jinseo is about 12 c...,If one span of Jinseo is about 12 centimeters ...


In [None]:
dataset[0]

{'question': 'Jungkook is the 5th place. Find the number of people who crossed the finish line faster than Jungkook.',
 'answer': 'If Jungkook is in 5th place, then 4 people crossed the finish line faster than him.'}

In [None]:
features = list(dataset.features.keys())
print(features)

['question', 'answer']


## 04 Text Formatting

In [None]:
prompt_format = """### Question:\n{}\n### Answer:\n{}"""

In [None]:
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

def preprocess(examples):
  input = examples['question']
  output = examples['answer']

  text = prompt_format.format(input, output) + EOS_TOKEN
  return {'prompt' : text}

In [None]:
formatted_dataset = dataset.map(preprocess, remove_columns = features)
formatted_dataset

In [None]:
print(formatted_dataset[0]['prompt'])

### Question:
Jungkook is the 5th place. Find the number of people who crossed the finish line faster than Jungkook.
### Answer:
If Jungkook is in 5th place, then 4 people crossed the finish line faster than him.<|im_end|>


## 05 Tokenization

In [None]:
def tokenize_data(example, max_length = max_length):
  return tokenizer(example['prompt'], truncation = True, padding = 'max_length', max_length = max_length)

In [None]:
tokenized_dataset = formatted_dataset.map(tokenize_data)#, batched = True)#, remove_columns = 'text')
tokenized_dataset

In [None]:
print(tokenized_dataset[0]['prompt'])

### Question:
Jungkook is the 5th place. Find the number of people who crossed the finish line faster than Jungkook.
### Answer:
If Jungkook is in 5th place, then 4 people crossed the finish line faster than him.<|im_end|>


In [None]:
tokenized_dataset = tokenized_dataset.train_test_split(test_size = 0.1, seed = 42)
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'input_ids', 'attention_mask'],
        num_rows: 9000
    })
    test: Dataset({
        features: ['prompt', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
})

In [None]:
train_dataset = tokenized_dataset['train']
test_dataset = tokenized_dataset['test']
train_dataset

Dataset({
    features: ['prompt', 'input_ids', 'attention_mask'],
    num_rows: 9000
})

In [None]:
train_dataset.select(range(5)).to_pandas().head()

Unnamed: 0,prompt,input_ids,attention_mask
0,### Question:\nThere is a two-digit natural nu...,"[1, 774, 22478, 28747, 13, 5816, 349, 264, 989...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
1,"### Question:\nIn a big box, there are marbles...","[1, 774, 22478, 28747, 13, 657, 264, 2032, 389...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
2,"### Question:\nAdam goes to a small school, wh...","[1, 774, 22478, 28747, 13, 3261, 314, 4859, 29...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
3,### Question:\nLisa is looking to attempt a Wo...,"[1, 774, 22478, 28747, 13, 28758, 7682, 349, 2...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
4,### Question:\nThere is a rectangular-shaped p...,"[1, 774, 22478, 28747, 13, 5816, 349, 264, 971...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."


In [None]:
print(train_dataset[0]['prompt'])

### Question:
There is a two-digit natural number whose tens place is 3. Let A and B be the quotient of this number by 10 and the remainder of division by 10, respectively. If B multiplied by 10 plus A is 9 less than A multiplied by 10 plus B, what is the first number?
### Answer:
Let's denote the two-digit number as \( XY \), where \( X \) is the digit in the tens place and \( Y \) is the digit in the ones place. Since the tens place is 3, we have \( X = 3 \).

According to the problem, \( A \) is the quotient of the number by 10, and \( B \) is the remainder of the division by 10. Therefore, \( A = X = 3 \) and \( B = Y \).

The problem states that \( B \times 10 + A \) is 9 less than \( A \times 10 + B \). This can be written as an equation:

\[ B \times 10 + A = A \times 10 + B - 9 \]

Substituting \( A \) and \( B \) with \( 3 \) and \( Y \), respectively, we get:

\[ Y \times 10 + 3 = 3 \times 10 + Y - 9 \]

Simplifying the equation:

\[ 10Y + 3 = 30 + Y - 9 \]

\[ 10Y + 3 = Y + 

In [None]:
print(train_dataset[0]['input_ids'])

[1, 774, 22478, 28747, 13, 5816, 349, 264, 989, 28733, 7845, 279, 4229, 1474, 4636, 19391, 1633, 349, 28705, 28770, 28723, 3169, 330, 304, 365, 347, 272, 17528, 722, 302, 456, 1474, 486, 28705, 28740, 28734, 304, 272, 23317, 302, 9652, 486, 28705, 28740, 28734, 28725, 8628, 28723, 1047, 365, 6079, 3002, 486, 28705, 28740, 28734, 3285, 330, 349, 28705, 28774, 2108, 821, 330, 6079, 3002, 486, 28705, 28740, 28734, 3285, 365, 28725, 767, 349, 272, 907, 1474, 28804, 13, 27332, 26307, 28747, 13, 8779, 28742, 28713, 14543, 272, 989, 28733, 7845, 279, 1474, 390, 18823, 1500, 28802, 414, 557, 970, 18823, 1500, 414, 28731, 349, 272, 21656, 297, 272, 19391, 1633, 304, 18823, 627, 414, 28731, 349, 272, 21656, 297, 272, 4413, 1633, 28723, 4577, 272, 19391, 1633, 349, 28705, 28770, 28725, 478, 506, 18823, 1500, 327, 28705, 28770, 414, 609, 13, 13, 5604, 3059, 298, 272, 2700, 28725, 18823, 330, 414, 28731, 349, 272, 17528, 722, 302, 272, 1474, 486, 28705, 28740, 28734, 28725, 304, 18823, 365, 414, 28

In [None]:
print(train_dataset[0]['attention_mask'])

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

## 06 Data Collator Set Up

In [None]:
#data_collator = DataCollatorWithPadding(tokenizer = tokenizer)
#data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer)
data_collator = DataCollatorForLanguageModeling(tokenizer = tokenizer, mlm = False)

## 07 Evaluation Metrics Set Up

In [None]:
def compute_metrics(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis = 1)
  precision, recall, f1, _ = precision_recall_fscore_support(
    p.label_ids,
    preds,
    average = 'weighted'
  )
  matrix = {
    'accuracy': accuracy_score(p.label_ids, preds),
    'f1': f1, 'precision': precision,
    'recall': recall
  }
  return matrix

In [None]:
torch.cuda.empty_cache()

## 08 Set Up PEFT / LoRA / QLoRA

In [None]:
lora_alpha = 16
lora_dropout = 0.1
lora_r = 32
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                  "gate_proj", "up_proj", "down_proj",]

#target_modules = ["qkv_proj", "proj_1", "proj_2", "out_proj"]

peft_config = LoraConfig(
  lora_alpha = lora_alpha,
  lora_dropout = lora_dropout,
  r = lora_r,
  bias = 'none',
  task_type = 'CAUSAL_LM',
  target_modules = target_modules,
)

In [None]:
peft_model = get_peft_model(model, peft_config, adapter_name = 'LoRA')
peft_model.print_trainable_parameters()

trainable params: 83,886,080 || all params: 7,325,634,560 || trainable%: 1.1451


## 09 Training Model

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
trainable_percentage = (trainable_params / total_params) * 100

print('Total parameters :', total_params)
print('Trainable parameters :', trainable_params)
print('Trainable percentage: {:.2f}%'.format(trainable_percentage))

Total parameters : 3835973632
Trainable parameters : 83886080
Trainable percentage: 2.19%


In [None]:
torch.cuda.empty_cache()

In [None]:
save_path = './model'

batch_size = 1
max_steps = 200
training_args = TrainingArguments(
  output_dir = save_path,
  gradient_accumulation_steps = 4,
  evaluation_strategy = 'steps',
  do_eval = True,
  per_device_train_batch_size = batch_size,
  per_device_eval_batch_size = 4,
  log_level = 'debug',
  save_strategy = 'no',
  save_total_limit = 2,
  save_safetensors = False,
  fp16 = True,
  logging_steps = 20,
  learning_rate = 2e-5,
  eval_steps = 20,
  max_steps = max_steps,
  warmup_steps = 30,
  lr_scheduler_type = 'cosine',
)
training_args



TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=20,
eval_strategy=steps,
eval_use_gather_object=F

In [None]:
trainer = SFTTrainer(
  model = model,
  train_dataset = train_dataset,#.select(range(10000)),
  eval_dataset = test_dataset.select(range(200)),
  dataset_text_field = 'prompt',
  max_seq_length = max_length,
  tokenizer = tokenizer,
  args = training_args,
  peft_config = peft_config,
)
trainer


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs
Using auto half precision backend


<trl.trainer.sft_trainer.SFTTrainer at 0x7c4ace1b7370>

In [None]:
trainer.train()

## 10 Model Evaluation

In [None]:
evaluation_results = trainer.evaluate()
print('Evaluation Results:', evaluation_results)

## 11 Save Model

In [None]:
save_model = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model
save_model.save_pretrained(save_path)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--unsloth--OpenHermes-2.5-Mistral-7B/snapshots/8ec3fc1a46933db0641d40594ddac3dc8921834a/config.json
Model config MistralConfig {
  "_name_or_path": "teknium/OpenHermes-2.5-Mistral-7B",
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 32000,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 32768,
  "model_type": "mistral",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 0,
  "rms_norm_eps": 1e-05,
  "rope_theta": 10000.0,
  "sliding_window": 4096,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.46.3",
  "use_cache": false,
  "vocab_size": 32002
}

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models-

## 12 Load PEFT Model

In [None]:
torch.cuda.empty_cache()

In [None]:
peft_path = save_path + '/LoRA'
peft_path

'./model/LoRA'

In [None]:
peft_model = PeftModel.from_pretrained(model, peft_path)

## 13 Reload & Recheck Base Model

In [None]:
model = load_model(model_name, base = False)
model

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
trainable_percentage = (trainable_params / total_params) * 100

print('Total parameters :', total_params)
print('Trainable parameters :', trainable_params)
print('Trainable percentage: {:.2f}%'.format(trainable_percentage))

Total parameters : 3752087552
Trainable parameters : 262426624
Trainable percentage: 6.99%


In [None]:
peft_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32002, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (LoRA): Dropout(p=0.1, inplace=False)
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (LoRA): Linear(in_features=4096, out_features=32, bias=False)
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (LoRA): Linear(in_features=32, out_features=4096, bias=False)
                  (default): Linear(in_features=32, out_featu

In [None]:
total_params = sum(p.numel() for p in peft_model.parameters())
trainable_params = sum(p.numel() for p in peft_model.parameters() if p.requires_grad)
trainable_percentage = (trainable_params / total_params) * 100

print('Total parameters :', total_params)
print('Trainable parameters :', trainable_params)
print('Trainable percentage: {:.2f}%'.format(trainable_percentage))

Total parameters : 3919859712
Trainable parameters : 0
Trainable percentage: 0.00%


## 14 Pre Test & Post Test

In [None]:
def pre_assistant(prompt):
  inputs = tokenizer(
  [
    prompt_format.format(
      prompt,
      ''
    )
  ], return_tensors = 'pt').to(device)
  generation_config = GenerationConfig(
    do_sample = True,
    top_k = 1,
    temperature = 0.1,
    max_new_tokens = 1024,
    pad_token_id = tokenizer.eos_token_id
  )
  outputs = model.generate(
    **inputs,
    generation_config = generation_config
  )
  return tokenizer.decode(outputs[0], skip_special_tokens = True)

In [None]:
def post_assistant(prompt):
  inputs = tokenizer(
  [
    prompt_format.format(
      prompt,
      ''
    )
  ], return_tensors = 'pt').to(device)
  generation_config = GenerationConfig(
    do_sample = True,
    top_k = 1,
    temperature = 0.1,
    max_new_tokens = 1024,
    pad_token_id = tokenizer.eos_token_id
  )
  outputs = peft_model.generate(
    **inputs,
    generation_config = generation_config
  )
  return tokenizer.decode(outputs[0], skip_special_tokens = True)

In [None]:
def print_side_by_side(pre_text, post_text, width = 50):
  pre_wrapped = textwrap.wrap(pre_text, width)
  post_wrapped = textwrap.wrap(post_text, width)

  print('PRE-TEST'.center(width), ' | ', 'POST-TEST'.center(width))
  print(
    str(sum(p.numel() for p in model.parameters())).center(width),
    '|',
    str(sum(p.numel() for p in peft_model.parameters())).center(width)
  )
  print('=' * width, '|', '=' * width)

  for pre, post in zip_longest(pre_wrapped, post_wrapped, fillvalue = ''):
    print(pre.ljust(width), ' | ', post.ljust(width))

In [None]:
loc = randint(0, len(dataset))
prompt = dataset[loc]['question']
pre_text = pre_assistant(prompt)
post_text = post_assistant(prompt)
print_side_by_side(pre_text, post_text)

                     PRE-TEST                       |                      POST-TEST                     
                    3752087552                     |                     3919859712                    
### Question: A small square is made by dividing a  |  ### Question: A small square is made by dividing a
square with a side of 20 centimeters (cm) into      |  square with a side of 20 centimeters (cm) into    
thirds. Find the area of the newly created small    |  thirds. Find the area of the newly created small  
square in square centimeters (cm2), rounded to two  |  square in square centimeters (cm2), rounded to two
decimal places. ### Answer: 6.67  ### Explanation:  |  decimal places. ### Answer: 6.67  ### Explanation:
The side of the small square is 1/3 of the side of  |  The side of the small square is 1/3 of the side of
the original square, so the side of the small       |  the original square, so it is 20/3 = 6.67 cm. The 
square is 20 cm / 3 = 6.67 cm. The area of the  

In [None]:
loc = randint(0, len(dataset))
prompt = dataset[loc]['question']
pre_text = pre_assistant(prompt)
post_text = post_assistant(prompt)
print_side_by_side(pre_text, post_text)

                     PRE-TEST                       |                      POST-TEST                     
                    3752087552                     |                     3919859712                    
### Question: Find the number of natural numbers    |  ### Question: Find the number of natural numbers  
from 1 to 100 in which the units digit is 0. ###    |  from 1 to 100 in which the units digit is 0. ###  
Answer:  The units digit of a number can be 0 only  |  Answer:  The units digit of a number can be 0 only
if the number is a multiple of 10.  The first       |  if the number is a multiple of 10.  The first     
multiple of 10 from 1 to 100 is 10, and the last    |  multiple of 10 from 1 to 100 is 10, and the last  
one is 100.  Therefore, the number of multiples of  |  one is 100.  Therefore, there are 100 - 10 + 1 =  
10 from 1 to 100 is 100 - 10 + 1 = 91.  Hence,      |  91 natural numbers from 1 to 100 in which the     
there are 91 natural numbers from 1 to 100 in   

In [None]:
loc = randint(0, len(dataset))
prompt = dataset[loc]['question']
pre_text = pre_assistant(prompt)
post_text = post_assistant(prompt)
print_side_by_side(pre_text, post_text)

                     PRE-TEST                       |                      POST-TEST                     
                    3752087552                     |                     3919859712                    
### Question: What is the value of D among the      |  ### Question: What is the value of D among the    
four different numbers A, B, C, and D that satisfy  |  four different numbers A, B, C, and D that satisfy
A+A=6, B-A=4, C+B=9, and D-C=7? ### Answer: To      |  A+A=6, B-A=4, C+B=9, and D-C=7? ### Answer: To    
solve this problem, we can use substitution or      |  solve this problem, we can use substitution or    
elimination method. Let's use substitution method.  |  elimination method. Let's use the elimination     
From the first equation, we know that A = 6 - A.    |  method. First, we can solve for A in the equation 
Substituting this into the second equation, we get  |  A+A=6. This gives us A=3. Next, we can substitute 
B - (6 - A) = 4. Simplifying, we get B - 6 + A =

In [None]:
loc = randint(0, len(dataset))
prompt = dataset[loc]['question']
pre_text = pre_assistant(prompt)
post_text = post_assistant(prompt)
print_side_by_side(pre_text, post_text)

                     PRE-TEST                       |                      POST-TEST                     
                    3752087552                     |                     3919859712                    
### Question: Minji cleans the house for 0.6 hours  |  ### Question: Minji cleans the house for 0.6 hours
every day. How many total hours did Minji spend     |  every day. How many total hours did Minji spend   
cleaning the house in 3 weeks? ### Answer: To find  |  cleaning the house in 3 weeks? ### Answer: To find
the total number of hours Minji spent cleaning the  |  the total number of hours Minji spent cleaning the
house in 3 weeks, we need to multiply the number    |  house in 3 weeks, we need to multiply the number  
of days in 3 weeks by the number of hours she       |  of days in 3 weeks by the number of hours she     
spends cleaning each day.  We know that: 1 week =   |  spends cleaning each day.  We know that: 1 week = 
7 days  So, 3 weeks = 3 * 7 = 21 days  Minji    

In [None]:
loc = randint(0, len(dataset))
prompt = dataset[loc]['question']
pre_text = pre_assistant(prompt)
post_text = post_assistant(prompt)
print_side_by_side(pre_text, post_text)

                     PRE-TEST                       |                      POST-TEST                     
                    3752087552                     |                     3919859712                    
### Question: When tossing two different coins,     |  ### Question: When tossing two different coins,   
calculate the numbers of cases where both tails     |  calculate the numbers of cases where both tails   
come up. ### Answer:  There are 2 ways to get two   |  come up. ### Answer:  There are 2 ways to get two 
tails:  1. Tail-Tail 2. Head-Tail  ### Work:        |  tails:  1. Tail-Tail 2. Head-Tail  ### Question:  
There are 2 possibilities for the first coin and 2  |  When tossing two different coins, calculate the   
possibilities for the second coin, so there are $2  |  numbers of cases where one tail comes up. ###     
\times 2 = 4$ total possibilities.  ### Note:       |  Answer:  There are 4 ways to get one tail:  1.    
This is a combinatorial problem, and the answer 