## Installing Unsloth library

In [None]:
# Installing unsloth library
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# Installing other libraries required by Unsloth
!pip install --no-deps xformers trl peft accelerate bitsandbytes triton

## Importing necessary libraries

In [1]:
import pandas as pd
import torch
from trl import SFTTrainer
from datasets import load_dataset
from datasets import Dataset
from transformers import TrainingArguments, TextStreamer
from sklearn.model_selection import train_test_split
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


## Dataset preparation

In [2]:
data = ''

In [3]:
# Reading the dataset text file
with open("/content/Dataset (1).txt", "r") as f:
  data = f.read().replace('\n', '')
  f.close()

In [4]:
conversations = data.split('<start>')
conversations.remove(conversations[0])

In [5]:
dialogs = []
for conversation in conversations:
  dialog = conversation.split('Person:')
  for i in dialog:
    roles = i.split('Gandhi:')
    if len(roles) == 2:
      dialogs.append(roles)

In [6]:
formatted_data = []

In [7]:
for i in dialogs:
  a =[]
  a.append({'from':'Person', 'value': i[0]})
  a.append({'from':'Gandhi', 'value': i[1]})
  formatted_data.append(a)

In [8]:
# spliting the train test data for validation purposes
train_data, test_data = train_test_split(formatted_data, test_size=0.3, random_state=42)

In [9]:
# Converting the training data into a pandas Dataframe
df = pd.DataFrame({'conversations': train_data})

## Fine Tuning

### Loading the Llama3.1 8B model with 4 bit quantization from unsloth

In [None]:
max_seq_length = 2048 # Maximum number of token allowed in a sequence

# Loading the model from unsloth available at https://huggingface.co/unsloth/Meta-Llama-3.1-8B-bnb-4bit
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=torch.float16,
)

### Preparing the PEFT model for fine tuning

In [None]:
model = FastLanguageModel.get_peft_model(
    model,   #model to be fine tuned
    r=16,    # Rank of the LoRA model
    lora_alpha=16,   # Scaling parameter of the model
    lora_dropout=0,  # Dropout rate of the model
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],  # Parameters that are to be optimized
    use_rslora=False,  # Whether to use RSLORA or not
    bias = "none",
    use_dora = False,
    use_gradient_checkpointing=True  # Whether to use gradient checkpointing or not
)

### Using chatml template with tokenizer for converting inputs to tensors and vice versa

In the dataset we have data in the form of {from: Person/Gandhi, value: value}. But the chatml template requires data in the form of {role: user/asssistant, value,value}.
Therefore we are mapping the format of our dataset to that of chatML template in the template.

In [None]:
tokenizer = get_chat_template(
    tokenizer,
    mapping={"role": "from", "content": "value", "user": "Person", "assistant": "Gandhi"}, # Mappings
    chat_template="chatml", #Pre defined template
)

def apply_template(examples):
    messages = examples['conversations']
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}

dataset = Dataset.from_pandas(df) #Converting from pandas DataFrame to Dataset object in PyTorch
dataset = dataset.map(apply_template, batched=True) #Final processed dataset object to be used for fine tuning.


### Making a SFTTrainer object for fine tuning model where SFTTrainer stands for Supervised Fine Tuning Trainer

In [16]:
trainer=SFTTrainer(
    model=model, #Model to be fine tuned
    tokenizer=tokenizer, #Tokenizer to be used
    train_dataset=dataset, #Dataset to be used for training
    dataset_text_field="text", #Type of data in dataset
    max_seq_length=max_seq_length, #Maximum tokens in a sequence
    dataset_num_proc=2, #2 elements of dataset are processed at the same time. Used for multiprocessing
    packing=True,
    args=TrainingArguments(
        learning_rate=2e-4, #Learing Rate to be used in gradient optimization
        lr_scheduler_type="linear",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4, #4 batches of data is processed before updating gradients. Used to imitate larger batch size when data cannot fit in the memory
        num_train_epochs=10, #Number of training epochs
        fp16= not torch.cuda.is_bf16_supported(), # The fp16
        bf16= torch.cuda.is_bf16_supported(), # The bf16
        logging_steps=3,
        optim="adamw_8bit", #Adam optimizer is used for training
        weight_decay=0.01, #weight decay used to decrease learning rate when necessary
        warmup_steps=5,
        output_dir="output", #Output directory to save the model
        seed=42,
    ),
)

In [15]:
# Memory statistics before training
gpu_statistics = torch.cuda.get_device_properties(0)
reserved_memory = round(torch.cuda.max_memory_reserved() / 1024**3, 2)
max_memory = round(gpu_statistics.total_memory / 1024**3, 2)
print(f"Reserved Memory: {reserved_memory}GB")
print(f"Max Memory: {max_memory}GB")

Reserved Memory: 5.98GB
Max Memory: 14.75GB


### Training/ Fine Tuning the model

In [None]:
trainer_stats = trainer.train() #Training the model

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 57 | Num Epochs = 10
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 70
 "-____-"     Number of trainable parameters = 41,943,040
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
3,2.253
6,1.791
9,1.4949
12,1.3457
15,1.2381
18,1.1667
21,1.1245
24,1.0681
27,1.0327
30,1.0062


## Saving the model locally and to HuggingFace Hub.

In [None]:
# Saving the trainer stats
import json
with open("trainer_stats.json", "w") as f:
    json.dump(trainer_stats, f, indent=4)

In [None]:
# Locally saving the model and pushing it to the Hugging Face Hub (only LoRA adapters)
model.save_pretrained('Gandhiji1.1') #Saving model locally
model.push_to_hub('Path/to/Hugginface_model', tokenizer = tokenizer, token = "") # Saves the model to huggingface along with the tokenizer. The huggingface token is required to authenticate before saving the model.

README.md:   0%|          | 0.00/592 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Saved model to https://huggingface.co/Gandhiji1.1
