### Changing to the main directory

In [2]:
%cd ..

/home/isham/Desktop/machine-learning-projects/fine-tuning-q-and-a


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


### Import Necessary Libraries

In [15]:
from datasets import load_from_disk
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TrainingArguments, Trainer

from utils import MODEL_ID, BASE_MODEL_PATH, TRAINING_PATH, PROCESSED_DATA_DIR, FINAL_MODEL_PATH
from utils import EPOCHS, LR, BATCH_SIZE, SAVE_TOTAL_LIMIT, EVALUATION_STRATEGY

import torch
from utils import clear_gpu_memory
import os

### Loading Tokenized Datasets

In [4]:
train_tokenized_data = load_from_disk(os.path.join(PROCESSED_DATA_DIR, "train_tokenized_data"))
val_tokenized_data = load_from_disk(os.path.join(PROCESSED_DATA_DIR, "val_tokenized_data"))
test_tokenized_data = load_from_disk(os.path.join(PROCESSED_DATA_DIR, "test_tokenized_data"))

In [5]:
train_tokenized_data

Dataset({
    features: ['input_ids', 'labels'],
    num_rows: 4974
})

In [6]:
val_tokenized_data

Dataset({
    features: ['input_ids', 'labels'],
    num_rows: 1067
})

In [7]:
test_tokenized_data

Dataset({
    features: ['input_ids', 'labels'],
    num_rows: 1067
})

### Loading Model and Tokenizer

In [8]:
## Downloading Model 
original_model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID, torch_dtype=torch.float32)
## Saving Model
original_model.save_pretrained(BASE_MODEL_PATH)

## Freeing GPU Memory
del original_model
clear_gpu_memory()

In [11]:
loaded_original_model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_PATH, torch_dtype=torch.float32)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
clear_gpu_memory()

We need to use loaded model so not deleting it from the memory.

### Full Fine Tuning

In [14]:
training_args = TrainingArguments(
    output_dir=TRAINING_PATH,
    save_total_limit=SAVE_TOTAL_LIMIT,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    learning_rate=LR,
    num_train_epochs=EPOCHS,
    evaluation_strategy=EVALUATION_STRATEGY,
)

In [16]:
trainer = Trainer(
    model=loaded_original_model,
    args=training_args,
    train_dataset=train_tokenized_data,
    eval_dataset=val_tokenized_data
)

Note: For faster results, do try permutation and combinations of the hyperparameters.

In [17]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,0.1927,0.183134
2,0.142,0.146615
3,0.1006,0.14277
4,0.0682,0.146444
5,0.048,0.158205


TrainOutput(global_step=6220, training_loss=0.13390945514298713, metrics={'train_runtime': 1198.1337, 'train_samples_per_second': 20.757, 'train_steps_per_second': 5.191, 'total_flos': 1.702991535538176e+16, 'train_loss': 0.13390945514298713, 'epoch': 5.0})

In [18]:
## Saving the Model
trainer.model.save_pretrained(FINAL_MODEL_PATH)
tokenizer.save_pretrained(FINAL_MODEL_PATH)

('models/final_model/tokenizer_config.json',
 'models/final_model/special_tokens_map.json',
 'models/final_model/tokenizer.json')

### Clearing Cache Memory from GPU

In [15]:
del loaded_original_model
clear_gpu_memory()