## Setup: Imports and Installations

following the tutorial from :https://docs.argilla.io/en/latest/guides/llms/examples/train-reward-model-rlhf.html

# Part 1 With noraml weight

In [None]:
%%capture
!pip install datasets pandas torch importlib
!pip install -q -U git+https://github.com/lvwerra/trl.git git+https://github.com/huggingface/transformers.git git+https://github.com/huggingface/accelerate.git git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops sentencepiece

In [None]:
import json
import os
import random

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from datasets import Dataset, DatasetDict
from google.colab import drive
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_ as clip_grad_norm
from torch.utils.data import DataLoader

from tqdm import tqdm
from transformers import (AdamW, AutoModel, AutoModelForSequenceClassification, AutoConfig,
                          AutoTokenizer, Trainer, TrainingArguments, get_polynomial_decay_schedule_with_warmup,
                          get_linear_schedule_with_warmup)
from transformers import DistilBertModel, DistilBertTokenizer
from sklearn.model_selection import train_test_split
from datasets import Dataset, load_dataset
from trl import RewardTrainer
from peft import LoraConfig, PeftConfig, TaskType, prepare_model_for_kbit_training, PeftModel, PromptLearningConfig,MODEL_TYPE_TO_PEFT_MODEL_MAPPING,get_peft_model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from google.colab import drive

drive.mount("/content/MyDrive")


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


Drive already mounted at /content/MyDrive; to attempt to forcibly remount, call drive.mount("/content/MyDrive", force_remount=True).


In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params:,} || all params: {all_param:,} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
def charge_model(model_name, train, test, lora=False, special_tokenizer=""):
  if lora:

    peft_model_id =model_name
    config = PeftConfig.from_pretrained(peft_model_id)

    # load base LLM model and tokenizer
    model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path, device_map={"":0})
    tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

    # Load the Lora model
    model = PeftModel.from_pretrained(model, peft_model_id, device_map={"":0})
    model.eval()

  else:

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)
    if special_tokenizer:
      tokenizer_name = special_tokenizer
    else:
      tokenizer_name = model_name
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    tokenizer.add_special_tokens({'eos_token': '[PAD]'})

    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

  def formatting_func(examples):
      kwargs = {"padding": "max_length", "truncation": True, "max_length": max_tokens, "return_tensors": "pt"}

      # Prepend the prompt and a line break to the original_response and response-1 fields.
      prompt_plus_chosen_response = examples["chat_1"]
      prompt_plus_rejected_response = examples["chat_0"]

      # Then tokenize these modified fields.
      tokens_chosen = tokenizer.encode_plus(prompt_plus_chosen_response, **kwargs)
      tokens_rejected = tokenizer.encode_plus(prompt_plus_rejected_response, **kwargs)

      return {
          "input_ids_chosen": tokens_chosen["input_ids"][0], "attention_mask_chosen": tokens_chosen["attention_mask"][0],
          "input_ids_rejected": tokens_rejected["input_ids"][0], "attention_mask_rejected": tokens_rejected["attention_mask"][0]
      }

  test = test.map(formatting_func)
  train = train.map(formatting_func)
  return model, tokenizer, train, test

# 1 GPT2

In [None]:
torch.cuda.empty_cache()
torch.cuda.mem_get_info()

(30488985600, 42481549312)

In [None]:
with open('training_dataset.json',encoding="cp437", errors='ignore') as json_file:
    train = json.load(json_file)

max_tokens = 512

full = pd.DataFrame(train)

full = full[(full['length_0']<=max_tokens)]

train, test = train_test_split(full, test_size=0.1, random_state=18625541)

train = pd.DataFrame(train)
train = train[(train['length_0']<=max_tokens)&(train['length_1']<=max_tokens)]
list_of_dicts = train.to_dict(orient='records')
train = Dataset.from_list(list_of_dicts)

test = pd.DataFrame(test)
test = test[(test['length_0']<=max_tokens)&(test['length_1']<=max_tokens)]
list_of_dicts = test.to_dict(orient='records')
test = Dataset.from_list(list_of_dicts)


In [None]:
training_args = TrainingArguments(
    output_dir="./reward_model_baseline",
    per_device_train_batch_size=4,
    evaluation_strategy="steps",
    learning_rate=0,
    weight_decay=0.01,
    max_steps=1,
    logging_steps=1,
)

## 1.1 GPT2 Baseline

In [None]:
model_name = "gpt2-medium"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test)
print(len(test))
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2-medium and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

303




trainable params: 354,824,192 || all params: 354,824,192 || trainable%: 100.0


You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,1.0567,0.785858,0.376238


TrainOutput(global_step=1, training_loss=1.0567363500595093, metrics={'train_runtime': 16.574, 'train_samples_per_second': 0.241, 'train_steps_per_second': 0.06, 'total_flos': 0.0, 'train_loss': 1.0567363500595093, 'epoch': 0.0})

## 1.2 GPT2 NORMAL

In [None]:
model_name = "MyDrive/MyDrive/NLP/m3/GPT2"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test)

trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


trainable params: 354,824,192 || all params: 354,824,192 || trainable%: 100.0


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,0.0067,0.531164,0.89769


TrainOutput(global_step=1, training_loss=0.0067311059683561325, metrics={'train_runtime': 16.5904, 'train_samples_per_second': 0.241, 'train_steps_per_second': 0.06, 'total_flos': 0.0, 'train_loss': 0.0067311059683561325, 'epoch': 0.0})

## 1.3 GPT2 Augmented

In [None]:
model_name = "MyDrive/MyDrive/NLP/m3/GPT2_large"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test)

trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


trainable params: 354,824,192 || all params: 354,824,192 || trainable%: 100.0


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,0.0,0.188568,0.953795


TrainOutput(global_step=1, training_loss=9.784565008885693e-06, metrics={'train_runtime': 16.5677, 'train_samples_per_second': 0.241, 'train_steps_per_second': 0.06, 'total_flos': 0.0, 'train_loss': 9.784565008885693e-06, 'epoch': 0.0})

# 2 ELECTRA

In [None]:
torch.cuda.empty_cache()
torch.cuda.mem_get_info()
training_args = TrainingArguments(
    output_dir="./reward_model_baseline",
    per_device_train_batch_size=4,
    evaluation_strategy="steps",
    learning_rate=0,
    weight_decay=0.01,
    max_steps=1,
    logging_steps=1,

)

(35884957696, 42481549312)

In [None]:
with open('training_dataset.json',encoding="cp437", errors='ignore') as json_file:
    train = json.load(json_file)

max_tokens = 512

full = pd.DataFrame(train)

full = full[(full['length_0']<=max_tokens)]

train, test = train_test_split(full, test_size=0.1, random_state=18625541)

train = pd.DataFrame(train)
train = train[(train['length_0']<=max_tokens)&(train['length_1']<=max_tokens)]
list_of_dicts = train.to_dict(orient='records')
train = Dataset.from_list(list_of_dicts)

test = pd.DataFrame(test)
test = test[(test['length_0']<=max_tokens)&(test['length_1']<=max_tokens)]
list_of_dicts = test.to_dict(orient='records')
test = Dataset.from_list(list_of_dicts)


## 2.1 Electra baseline

In [None]:
model_name = "OpenAssistant/reward-model-electra-large-discriminator"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test)
print(len(test))
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

303


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


trainable params: 335,142,913 || all params: 335,142,913 || trainable%: 100.0


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,0.9649,0.664139,0.666667


TrainOutput(global_step=1, training_loss=0.9648730754852295, metrics={'train_runtime': 15.311, 'train_samples_per_second': 0.261, 'train_steps_per_second': 0.065, 'total_flos': 0.0, 'train_loss': 0.9648730754852295, 'epoch': 0.0})

## 2.2 Electra normal

In [None]:
model_name = "MyDrive/MyDrive/NLP/m3/electra"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test)
print(len(test))
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

303


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


trainable params: 335,142,913 || all params: 335,142,913 || trainable%: 100.0


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,0.0016,0.296482,0.90099


TrainOutput(global_step=1, training_loss=0.0016334764659404755, metrics={'train_runtime': 15.3299, 'train_samples_per_second': 0.261, 'train_steps_per_second': 0.065, 'total_flos': 0.0, 'train_loss': 0.0016334764659404755, 'epoch': 0.0})

## 2.3 Electra large

In [None]:
model_name = "MyDrive/MyDrive/NLP/m3/electra_large"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test)
print(len(test))
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

303


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


trainable params: 335,142,913 || all params: 335,142,913 || trainable%: 100.0


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,0.0,0.063363,0.963696


TrainOutput(global_step=1, training_loss=3.3592831982787175e-07, metrics={'train_runtime': 15.3093, 'train_samples_per_second': 0.261, 'train_steps_per_second': 0.065, 'total_flos': 0.0, 'train_loss': 3.3592831982787175e-07, 'epoch': 0.0})

## 2.4 Electra Large loRA

In [None]:
model_name = "MyDrive/MyDrive/NLP/m3/electra_large_lora"
torch.cuda.empty_cache()


# Load peft config for pre-trained checkpoint etc.
print(len(test))
model, tokenizer, train, test = charge_model(model_name, train, test, lora=True)

trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

303


Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


trainable params: 2,101,250 || all params: 342,484,994 || trainable%: 0.6135305303332501


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,0.5432,0.435846,0.828383


TrainOutput(global_step=1, training_loss=0.5431709289550781, metrics={'train_runtime': 15.5039, 'train_samples_per_second': 0.258, 'train_steps_per_second': 0.065, 'total_flos': 0.0, 'train_loss': 0.5431709289550781, 'epoch': 0.0})

# 3 ROBERTA

In [None]:
torch.cuda.empty_cache()
torch.cuda.mem_get_info()

(38827261952, 42481549312)

In [None]:
with open('training_dataset.json',encoding="cp437", errors='ignore') as json_file:
    train = json.load(json_file)

max_tokens = 512

full = pd.DataFrame(train)

full = full[(full['length_0']<=max_tokens)]

train, test = train_test_split(full, test_size=0.1, random_state=18625541)

train = pd.DataFrame(train)
train = train[(train['length_0']<=max_tokens)&(train['length_1']<=max_tokens)]
list_of_dicts = train.to_dict(orient='records')
train = Dataset.from_list(list_of_dicts)

test = pd.DataFrame(test)
test = test[(test['length_0']<=max_tokens)&(test['length_1']<=max_tokens)]
list_of_dicts = test.to_dict(orient='records')
test = Dataset.from_list(list_of_dicts)


In [None]:

model_name = "argilla/roberta-base-reward-model-falcon-dolly"

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({'eos_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

def formatting_func(examples):
    kwargs = {"padding": "max_length", "truncation": True, "max_length": max_tokens, "return_tensors": "pt"}

    # Prepend the prompt and a line break to the original_response and response-1 fields.
    prompt_plus_chosen_response = examples["chat_1"]
    prompt_plus_rejected_response = examples["chat_0"]

    # Then tokenize these modified fields.
    tokens_chosen = tokenizer.encode_plus(prompt_plus_chosen_response, **kwargs)
    tokens_rejected = tokenizer.encode_plus(prompt_plus_rejected_response, **kwargs)

    return {
        "input_ids_chosen": tokens_chosen["input_ids"][0], "attention_mask_chosen": tokens_chosen["attention_mask"][0],
        "input_ids_rejected": tokens_rejected["input_ids"][0], "attention_mask_rejected": tokens_rejected["attention_mask"][0]
    }

test = test.map(formatting_func)
train = train.map(formatting_func)

Downloading (…)lve/main/config.json:   0%|          | 0.00/769 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/351 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

In [None]:
lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type=TaskType.SEQ_CLS,
)
model = get_peft_model(model, peft_config)
print_trainable_parameters(model)

trainable params: 2,362,370 || all params: 83,890,178 || trainable%: 2.8160269251067747


In [None]:
training_args = TrainingArguments(
    output_dir="./reward_model_roberta",
    per_device_train_batch_size=1,
    evaluation_strategy="steps",
    learning_rate=0,
    warmup_steps=10000,
    max_steps=0,
    logging_steps=1,
)
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

trainer.train()

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


# 4 OPT

In [None]:
torch.cuda.empty_cache()
torch.cuda.mem_get_info()
training_args = TrainingArguments(
    output_dir="./reward_model_baseline",
    per_device_train_batch_size=4,
    evaluation_strategy="steps",
    learning_rate=0,
    weight_decay=0.01,
    max_steps=1,
    logging_steps=1,
)

In [None]:
with open('training_dataset.json',encoding="cp437", errors='ignore') as json_file:
    train = json.load(json_file)

max_tokens = 512

full = pd.DataFrame(train)

full = full[(full['length_0']<=max_tokens)]

train, test = train_test_split(full, test_size=0.1, random_state=18625541)

train = pd.DataFrame(train)
train = train[(train['length_0']<=max_tokens)&(train['length_1']<=max_tokens)]
list_of_dicts = train.to_dict(orient='records')
train = Dataset.from_list(list_of_dicts)

test = pd.DataFrame(test)
test = test[(test['length_0']<=max_tokens)&(test['length_1']<=max_tokens)]
list_of_dicts = test.to_dict(orient='records')
test = Dataset.from_list(list_of_dicts)


In [None]:
model_name = "AdamG012/chat-opt-350m-reward-deepspeed"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test)
# print(len(test))
# trainer = RewardTrainer(
#     model=model,
#     args=training_args,
#     tokenizer=tokenizer,
#     train_dataset=train,
#     eval_dataset=test,
#     max_length=max_tokens
# )

# print_trainable_parameters(model)
# trainer.train()

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at AdamG012/chat-opt-350m-reward-deepspeed and are newly initialized: ['decoder.layers.12.self_attn_layer_norm.bias', 'decoder.layers.5.fc1.bias', 'decoder.layers.20.self_attn.k_proj.weight', 'decoder.layers.10.final_layer_norm.bias', 'decoder.layers.22.fc1.bias', 'decoder.layers.5.fc2.weight', 'decoder.layers.13.self_attn.out_proj.weight', 'decoder.layers.17.final_layer_norm.weight', 'decoder.layers.3.fc2.bias', 'decoder.layers.22.self_attn.q_proj.bias', 'decoder.layers.23.self_attn.out_proj.weight', 'decoder.project_in.weight', 'decoder.layers.14.fc1.weight', 'decoder.layers.21.final_layer_norm.bias', 'decoder.layers.5.self_attn.k_proj.bias', 'decoder.layers.15.self_attn.q_proj.weight', 'decoder.layers.19.fc2.weight', 'decoder.layers.18.fc2.weight', 'decoder.layers.1.self_attn.k_proj.weight', 'decoder.layers.2.fc2.bias', 'decoder.layers.16.self_attn.out_proj.bias', 'decoder.layers.0.self_attn.

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

In [None]:
training_args = TrainingArguments(
    output_dir="./reward_model_opt",
    per_device_train_batch_size=4,
    evaluation_strategy="steps",
    learning_rate=1e-5,
    weight_decay=0.01,
    warmup_steps=300,
    logging_steps=150,
)
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens,
)

trainer.train()

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
150,0.6699,0.681724,0.594059
300,0.6734,0.642334,0.590759
450,0.6776,0.760965,0.60066
600,0.6569,0.638725,0.59736
750,0.681,0.688787,0.594059
900,0.7039,0.663625,0.590759
1050,0.6743,0.687973,0.587459
1200,0.6386,0.629164,0.633663
1350,0.6338,0.687683,0.712871
1500,0.5523,0.620814,0.732673




TrainOutput(global_step=2037, training_loss=0.620115992715093, metrics={'train_runtime': 1525.1582, 'train_samples_per_second': 5.342, 'train_steps_per_second': 1.336, 'total_flos': 0.0, 'train_loss': 0.620115992715093, 'epoch': 3.0})

# 5 DEBERTA

In [None]:
torch.cuda.empty_cache()
torch.cuda.mem_get_info()
training_args = TrainingArguments(
    output_dir="./reward_model_baseline",
    per_device_train_batch_size=4,
    evaluation_strategy="steps",
    learning_rate=0,
    weight_decay=0.01,
    max_steps=1,
    logging_steps=1,

)

(38827261952, 42481549312)

In [None]:
with open('training_dataset.json',encoding="cp437", errors='ignore') as json_file:
    train = json.load(json_file)

max_tokens = 512

full = pd.DataFrame(train)

full = full[(full['length_0']<=max_tokens)]

train, test = train_test_split(full, test_size=0.1, random_state=18625541)

train = pd.DataFrame(train)
train = train[(train['length_0']<=max_tokens)&(train['length_1']<=max_tokens)]
list_of_dicts = train.to_dict(orient='records')
train = Dataset.from_list(list_of_dicts)

test = pd.DataFrame(test)
test = test[(test['length_0']<=max_tokens)&(test['length_1']<=max_tokens)]
list_of_dicts = test.to_dict(orient='records')
test = Dataset.from_list(list_of_dicts)


## 5.1 Deberta Baseline

In [None]:
model_name = "OpenAssistant/reward-model-deberta-v3-large-v2"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test)
print(len(test))
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

Downloading (…)lve/main/config.json:   0%|          | 0.00/993 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/455 [00:00<?, ?B/s]

Downloading spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/8.66M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

303


You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


trainable params: 435,062,785 || all params: 435,062,785 || trainable%: 100.0


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,0.5841,0.507798,0.735974


TrainOutput(global_step=1, training_loss=0.5840520858764648, metrics={'train_runtime': 20.8218, 'train_samples_per_second': 0.192, 'train_steps_per_second': 0.048, 'total_flos': 0.0, 'train_loss': 0.5840520858764648, 'epoch': 0.0})

## 5.2 Deberta

In [None]:
model_name = "MyDrive/MyDrive/NLP/m3/Deberta"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test)
print(len(test))
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

303


You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


trainable params: 435,062,785 || all params: 435,062,785 || trainable%: 100.0


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,0.0002,0.371989,0.871287


TrainOutput(global_step=1, training_loss=0.0001601248950464651, metrics={'train_runtime': 20.8195, 'train_samples_per_second': 0.192, 'train_steps_per_second': 0.048, 'total_flos': 0.0, 'train_loss': 0.0001601248950464651, 'epoch': 0.0})

## 5.3 Deberta Lora

In [None]:
model_name = "MyDrive/MyDrive/NLP/m3/Deberta_lora"
torch.cuda.empty_cache()
model, tokenizer, train, test = charge_model(model_name, train, test, lora=True)
print(len(test))
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train,
    eval_dataset=test,
    max_length=max_tokens
)

print_trainable_parameters(model)
trainer.train()

Map:   0%|          | 0/303 [00:00<?, ? examples/s]

Map:   0%|          | 0/2716 [00:00<?, ? examples/s]

303
trainable params: 2,050 || all params: 441,355,266 || trainable%: 0.0004644784276800721


You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
1,0.2572,0.426315,0.828383


TrainOutput(global_step=1, training_loss=0.25715184211730957, metrics={'train_runtime': 20.891, 'train_samples_per_second': 0.191, 'train_steps_per_second': 0.048, 'total_flos': 0.0, 'train_loss': 0.25715184211730957, 'epoch': 0.0})