# GPU

In [None]:
!nvidia-smi

Mon Nov 27 05:56:54 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    39W / 300W |  15116MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Setup - Import Libraries

In [None]:
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops wandb

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


# Upload Dataset (csv file)
* We will utilize the load_dataset library as it allows to split the dataset for training
* Make sure the file is loaded

In [None]:
# Upload file from Computer
from google.colab import files


uploaded = files.upload()

Saving Eng_to_STE - Sheet1.csv to Eng_to_STE - Sheet1.csv


In [None]:
# HuggingFace Load Dataset
from datasets import load_dataset

dataset = load_dataset("csv", data_files={"train": "Eng_to_STE - Sheet1.csv"}, split="train") # set to train
dataset

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['Input', 'Output'],
    num_rows: 1000
})

In [None]:
# Import Pandas Library
import pandas as pd
import io
# Read csv file
train_df = pd.read_csv(io.StringIO(uploaded['Eng_to_STE - Sheet1.csv'].decode('utf-8')))
train_df # display

Unnamed: 0,Input,Output
0,Follow the Safety Instructions,Obey the Safety Instructions
1,Grease the Fasteners,Apply grease to the fasteners
2,No leaks permitted,Make sure that there are no leaks
3,See if the pins are installed correctly,Make sure that the pins are properly installed.
4,Turn shaft assembly,Turn the shaft assembly
...,...,...
995,Unclip the visor.,Remove the visor from the clip.
996,Avoid uneven fuel load about the aircraft cent...,Make sure that the fuel load is equal on each ...
997,Unplug soldering iron.,Disconnect the soldering iron.
998,Do not use your hands to prevent movement of t...,Do not use your hands to prevent movement of t...


# Fine Tuning
* Prepare the Model (Falcon-7b): BitsAndBytes configuration

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

model_name = "tiiuae/falcon-7b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    load_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
)
model.config.use_cache = False

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# LoRA Configuration
* Training method that accelerates training time for large model while reducing memory usage.
* PEFT (Parameter Efficient Fine-Tuning), the training technique

In [None]:
from peft import LoraConfig

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "query_key_value",
        "dense",
        "dense_h_to_4h",
        "dense_4h_to_h",
    ]
)

# Loading the Trainer
* Note: Gradient Checkpointing will be turned OFF

In [None]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 10
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 500
warmup_ratio = 0.03
lr_scheduler_type = "constant"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    gradient_checkpointing=False, # set Gradient Checkpointing to FALSE
)

In [None]:
from trl import SFTTrainer

max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="Input",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

# Upcasting
* stablizes layers for training

In [None]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

# Train the Model

In [None]:
trainer.train()

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,3.6807
20,3.3296
30,3.4917
40,3.0508
50,3.0741
60,3.2989
70,2.6157
80,2.596
90,2.5298
100,2.5331


TrainOutput(global_step=500, training_loss=1.328672513961792, metrics={'train_runtime': 971.3136, 'train_samples_per_second': 8.236, 'train_steps_per_second': 0.515, 'total_flos': 3478075984158720.0, 'train_loss': 1.328672513961792, 'epoch': 8.0})

In [None]:
def convert_sentence(input_sentence):

  input_ids = tokenizer(input_sentence, return_tensors="pt").input_ids

  with torch.no_grad():
    output = model.generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2, top_k=50)

  decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
  print("Input Sentence: ", input_sentence)
  print("Output Sentence: ", decoded_output)

if __name__ == "__main__":

  input_sentence = input("Enter a sentence: ")
  convert_sentence(input_sentence)

Enter a sentence: grease the fasteners.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Input Sentence:  grease the fasteners.
Output Sentence:  grease the fasteners. Install the cover with the help of a second person. 4.3.1 Removal of the Cover In general it is advised to remove thecover horizontally. However, this is not possible in practice. Therefore, remove
