<a href="https://colab.research.google.com/github/jlopetegui98/Literary-Fine-Tuning-of-LLM/blob/main/Fine-Tuning-RL/fine_tuning_RL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q trl xformers wandb datasets einops gradio sentencepiece

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for peft (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for accelerate (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━

In [5]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model
from trl.core import respond_to_batch
from tqdm import tqdm

In [None]:
# paths for models and data
dir_root = './drive/MyDrive/DL-ENS'
dir_data = f'{dir_root}/dataset'
models_path = f'{dir_root}/models'

In [3]:
# models and dataset names
base_model = "mistralai/Mistral-7B-Instruct-v0.1"
fine_tuned_model = "Mistral7B_fine_tuned_OscarWilde.pt"
dataset_name = "story_prompts_for_training.txt"
clf_name = "BertClassifier(BERTAA)_balanced_data.pt"

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# load model and tokenizer
model = torch.load(f'{models_path}/{fine_tuned_model}')
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(base_model,trust_remote_code=True)
tokenizer.padding_side = 'left'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

In [None]:
# create reference model
model_ref = create_reference_model(model)
model_ref.to(device)

In [None]:
# load clf
clf = torch.load(f'{models_path}/{clf_name}')
clf.to(device)

In [6]:
# conf
batch_size = 1

In [None]:
# initialize trainer
ppo_config = PPOConfig(
    batch_size=batch_size,
)
ppo_trainer = PPOTrainer(ppo_config, model, model_ref, tokenizer)

In [None]:
class PromptsDataset(Dataset):
    def __init__(self, sequences, tokenizer):
        self.tokenizer = tokenizer
        self.sequences = self.init_sequences(sequences)

    def init_sequences(self, sequences):
        seqs = []
        for i,seq in enumerate(tqdm(sequences)):
          seqs.append(tokenizer(f"This are the first lines of a work of fiction. Continue it. [INST] {seq} [/INST]", return_tensors = "pt", add_special_tokens = True, max_length = 100, padding='max_length', truncation=True))
        return seqs

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        # print(idx)
        input = self.sequences[idx]
        # print(input.input_ids.shape)
        return input

In [None]:
# function to obtain responses from the model
def get_responses(model, inputs):
  # query_tensor = tokenizer.encode(inputs, return_tensors="pt")

  # get model response
  response_tensor  = respond_to_batch(model, inputs)
  return response_tensor

In [None]:
# function to evaluate model outputs using classifier
def get_rewards(clf, inputs):
  outputs = clf.predict(inputs)
 #return the probabilities of being a wilde text

In [None]:
# create dataset instance

#read the inouts
texts = []
with open(f'{dir_data}/{dataset_name}', 'r+', encoding='utf-8') as fd:
  texts = fd.readlines()
texts = [text[:-1] for text in texts]

dataset = PromptsDataset(texts, tokenizer)

dataloader = DataLoader(dataset, batch_size=batch_size)

for input in tqdm(dataloader):
  input.to(device)
  response = get_responses(model, input)
  rewards = get_rewards(clf, response)
  train_stats = ppo_trainer.step([input[0]], [response[0]], rewards)
  del input
  del response
  del rewards

Hello world
