In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from numpy import dtype

from datasets import Dataset

import torch
from peft import LoraConfig

from datasets import load_dataset
from trl import DPOConfig, DPOTrainer, BCOTrainer, BCOConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [3]:
import sys
sys.path.append("../")

from essay_scorer import EssayScorer

In [4]:
es = EssayScorer.load('../head_model')

In [5]:
task1 = 'Fresh water has always been a limited resource in some parts of the world. Today, however, growing worldwide demand has made this a global problem. What are the causes of the increased demand and what measure could governments and individuals take to respond to this problem?'

In [6]:
essay1 = '''Fresh water resource is a worldwide issue, that is related to varied factors, and it should be involve every country in the world. This essay will illustrate some reasons of the increasing demand and some solutions to solve as soon as possible.

Nowadays, because of industrialization, many European country has resolved this problem because every citizen are able to get drinkable water, but there is a huge difference with the third countries. Although, European people do not miss water, limited water resource is becoming a global emergency that could affect every nation.

Firstly, the rising request is related to the overpopulation; that is the reason why Chinese government has tried to reduce and regulate birth, so they have got the power to limit and control the population growth; this is not a reasonable political action to take.

Secondly, the overpopulation has caused a growing demand of food, so further water has been used by farmer for agriculture; moreover, not all the farms have an appropriate and modern infrastructures and machines to give water to the soil, consequently a huge amount of this natural resource is wasted.

A solution could be taken by local governments, such as, investments in new technologies, which are able to reuse water from soil, or more invested money on the creation of food in laboratories, in this last scenario a vast amount of water could be saved.

In addition, global warming has affected significantly this issue, for this reason international cooperation should try to reduce the environment changing to preserve water, avoiding deforestation and pollution, increasing renewable energy.
I would suggest teaching and educating children and students to save water in everyday life, these lessons could help them to notice their usage in their private house.

In conclusion, I think some solutions could be found to fix the problem; however, we should work all together to make the difference.'''

In [7]:
# band 6
es.score(task1, essay1)

  return self.essay_scorer_head_model(torch.FloatTensor([embedding]))


tensor([[5.8518]], grad_fn=<AddmmBackward0>)

In [8]:
model_name = 'meta-llama/Llama-3.2-3B-Instruct'

In [24]:
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True)

AttributeError: module 'torch' has no attribute 'bfloat8'

In [10]:
ref_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat8, low_cpu_mem_usage=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [20]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [12]:
# create data set
# we need dataset in preference format - promt, chosen, rejected
# https://huggingface.co/docs/trl/dataset_formats#preference
# https://huggingface.co/docs/trl/v0.12.1/en/bco_trainer#trl.BCOTrainer

path = '/home/ubuntu/sync_dir/src/bash_scripts/tmp_2024-11-26=17:18:48.665754/batch_df_2024-11-27=05:00:56.421481.csv'
df = pd.read_csv(path)

In [13]:
NEW_LINE_STR = '<NL>'

PROMT = (
    'Your task is to:\n' +

    'Enhance the essay\'s clarity, grammar, vocabulary, and sentence structure while preserving the original meaning and ideas.\n' +
    'Avoid introducing new arguments or changing the essay’s overall structure or tone.\n' +
    'Limit changes to essential improvements—do not rewrite the essay extensively or modify more than necessary.\n' +
    'Please provide only the revised essay\n' +

    'Topic: {task}\n' +
    'Essay: {essay}\n' +

    '-----------------------\n'
    'Write new essay under this line and nothing else: \n'
    '-----------------------\n'
)

In [14]:
dataset_dict = {
    'prompt': [],
    'completion': [],
    'label': [],
    
}

for task, essay, from_essay, from_band in zip(df['task'], df['essay'], df['from_essay'], df['from_band']):
    essay = essay.replace(NEW_LINE_STR, '\n')
    essay_score = float(es.score(task, essay)[0])
    from_essay = from_essay.replace(NEW_LINE_STR, '\n')
    prompt = PROMT.format(task=task, essay=from_essay)

    if abs(essay_score - from_band) < 0.5:
        continue

    unpaired_preference_example = {
        'prompt': prompt,
        'completion': essay,
        'label': bool(essay_score > from_band),
    }

    for k in unpaired_preference_example:
        dataset_dict[k].append(unpaired_preference_example[k])

train_dataset = Dataset.from_dict(dataset_dict)

In [15]:
train_dataset

Dataset({
    features: ['prompt', 'completion', 'label'],
    num_rows: 26
})

In [16]:
dataset_dict['label']

[False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 True,
 True]

In [22]:
training_args = BCOConfig(
    beta=0.1,
    output_dir='./bco_output_dir'
)

bco_trainer = BCOTrainer(
    model,
    ref_model,
    args=training_args,
    train_dataset=train_dataset,
    processing_class=tokenizer,
)



Map:   0%|          | 0/26 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/26 [00:00<?, ? examples/s]

Processing tokenized train dataset:   0%|          | 0/26 [00:00<?, ? examples/s]

Filtering desirable examples:   0%|          | 0/26 [00:00<?, ? examples/s]

Filtering undesirable examples:   0%|          | 0/26 [00:00<?, ? examples/s]



In [23]:
bco_trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 21.98 GiB of which 2.44 MiB is free. Including non-PyTorch memory, this process has 21.96 GiB memory in use. Of the allocated memory 21.56 GiB is allocated by PyTorch, and 109.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)