In [1]:
import pandas as pd
import torch

from transformers import AutoModelForCausalLM, AutoTokenizer

# dataset load

In [None]:
# !wget -O databricks-dolly-15k.jsonl \
#   "https://huggingface.co/datasets/databricks/databricks-dolly-15k/resolve/main/databricks-dolly-15k.jsonl"

# # Show the downloaded file
# !ls -lh databricks-dolly-15k.jsonl

In [None]:
# !wget -O dolly15k-train.csv \
#   "https://huggingface.co/datasets/aisquared/databricks-dolly-15k/resolve/main/train.csv"

# !ls -lh dolly15k-train.csv

In [None]:
# import pandas as pd

# df = pd.read_csv("dolly15k-train.csv")
# df.head()

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# !cp /content/dolly15k-train.csv '/content/drive/MyDrive/1 - projects/cs/mini_instruct_gpt'

In [4]:
DS_PATH = '/content/drive/MyDrive/1 - projects/cs/mini_instruct_gpt/dolly15k-train.csv'

In [5]:
df = pd.read_csv(DS_PATH)
df.head()

Unnamed: 0,instruction,context,response,category
0,When did Virgin Australia start operating?,"Virgin Australia, the trading name of Virgin A...",Virgin Australia commenced services on 31 Augu...,closed_qa
1,Which is a species of fish? Tope or Rope,,Tope,classification
2,Why can camels survive for long without water?,,Camels use the fat in their humps to keep them...,open_qa
3,"Alice's parents have three daughters: Amy, Jes...",,The name of the third daughter is Alice,open_qa
4,When was Tomoaki Komorida born?,Komorida was born in Kumamoto Prefecture on Ju...,"Tomoaki Komorida was born on July 10,1981.",closed_qa


In [None]:
# import json

# formatted = []
# for _, row in df.iterrows():
#     prompt = row["instruction"]
#     # include context if present
#     if isinstance(row.get("context"), str) and row["context"].strip() != "":
#         prompt = f"{prompt}\nContext: {row['context']}"

#     formatted.append({
#         "input_text": prompt,
#         "target_text": row["response"]
#     })

# print("Example:")
# print(json.dumps(formatted[0], indent=2))

In [None]:
from datasets import Dataset

df_reduced = df[['instruction', 'context', 'response']]

train_ds = Dataset.from_pandas(df_reduced[:100])

train_ds = train_ds.shuffle(seed=52)
train_ds

Dataset({
    features: ['instruction', 'context', 'response'],
    num_rows: 100
})

In [None]:

MODEL_NAME = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B'

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [None]:
from transformers import AutoTokenizer, DataCollatorForSeq2Seq

# GPT2 doesn’t have a pad token by default
tokenizer.pad_token = tokenizer.eos_token

def format_text(example):
  if example['context'] is not None:
    ip = f'''###Instruction:
{example["instruction"]}
###Context:
{example["context"]}
'''

    op = f'''###Response:
{example["response"]}
'''

  else:
    ip = f'''###Instruction:
{example["instruction"]}
'''

    op = f'''###Response:
{example["response"]}
'''


  full_text = f'''
{ip}
{op}
'''

  return {'text': full_text}


# def format_text(example):
#   input_text,target = example["input_text"], example['target_text']

#   full_text = f'###Instruction:\n{input_text}\n###Response: {target}'
#   return {"text": full_text}

train_formatted = train_ds.map(format_text, remove_columns=train_ds.column_names)
train_formatted

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Dataset({
    features: ['text'],
    num_rows: 100
})

In [None]:
MAX_LENGTH = 256



def tokenize_text(examples):
  global tokenizer

  tokenized = tokenizer(examples['text'],
                max_length = MAX_LENGTH,
                padding="max_length",
                truncation=True,
                return_tensors=None)
  tokenized['labels'] = tokenized['input_ids']
  return tokenized


train_tokenized = train_formatted.map(tokenize_text, batched=True, remove_columns=train_formatted.column_names)
train_tokenized

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 100
})

# model load

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    dtype=torch.float16,
    device_map="auto",
)

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'total_params: {total_params}')
print(f'trainable_params: {trainable_params}')

total_params: 1777088000
trainable_params: 1777088000


In [None]:
for p in model.parameters():
  if p.requires_grad:
    p.requires_grad = False

for p in model.lm_head.parameters():
    p.requires_grad = True

In [None]:
trainable_params_new = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'trainable_params: {trainable_params_new}')
print(f'reduction: {((trainable_params - trainable_params_new) / trainable_params) * 100}')

trainable_params: 233373696
reduction: 86.86763424208593


In [None]:
test_prompt = '''
Answer concisely and don't overthink. Hey there! Who are you?
'''
test_prompt_tokenized = tokenizer(test_prompt, return_tensors='pt')
test_prompt_tokenized = test_prompt_tokenized.to(model.device)
print(f'tokenized test input: {test_prompt_tokenized.input_ids}')


with torch.no_grad():
    outputs = model.generate(**test_prompt_tokenized,
        max_new_tokens=200,
    eos_token_id=tokenizer.eos_token_id,  # helps the model know when to stop
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.1)

print(outputs)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


tokenized test input: tensor([[151646,    198,  16141,   3529,    285,    974,    323,   1513,    944,
            916,  26865,     13,  27553,   1052,      0,  10479,    525,    498,
           5267]], device='cuda:0')
tensor([[151646,    198,  16141,   3529,    285,    974,    323,   1513,    944,
            916,  26865,     13,  27553,   1052,      0,  10479,    525,    498,
           5267, 151649,    271,  13048,   1052,      0,    358,   2776,   1588,
            311,   1492,   4226,    697,   4755,    476,   3410,   1995,     13,
           2585,    646,    358,   7789,    498,   3351,     30, 151643]],
       device='cuda:0')


In [None]:
text = tokenizer.batch_decode(outputs, skip_special_tokens=True)


In [None]:
print(text[0])


Answer concisely and don't overthink. Hey there! Who are you?
</think>

Hi there! I'm here to help answer your questions or provide information. How can I assist you today?


In [None]:
def get_inference(model, prompt_inp):
    prompt_tokenized = tokenizer(prompt_inp, return_tensors='pt')
    prompt_tokenized = prompt_tokenized.to(model.device)

    with torch.no_grad():
        outputs = model.generate(**prompt_tokenized,
            max_new_tokens=200,
        eos_token_id=tokenizer.eos_token_id,  # helps the model know when to stop
        do_sample=True,
        temperature=0.8,
        top_p=0.9,
        top_k=50,
        repetition_penalty=1.1)

    text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return text


In [None]:
op = get_inference(model, "Answer concisely and don't overthink. " + "What is deepseek?")
print(op[0])

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Answer concisely and don't overthink. What is deepseek? I'm not entirely sure.
</think>

DeepSeek Artificial Intelligence Co., Ltd. (referred to as "DeepSeek" or "深度求索") , founded in 2023, is a Chinese company dedicated to making AGI a reality.


# fine tune

In [None]:
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl (59.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.49.1


In [None]:
save_dir = "/content/drive/MyDrive/1 - projects/cs/mini_instruct_gpt/saved_models/deepseek_sft_final/"

In [None]:
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer, default_data_collator

training_args = TrainingArguments(
    output_dir="./dolly15k_finetuned",
    # per_device_train_batch_size=4,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    learning_rate=2e-5,
    num_train_epochs=1,
    save_strategy="epoch",
    # fp16=True,
    # optim="paged_adamw_8bit",       # needs bitsandbytes; saves optimizer memory
    bf16=True,  # ✅ Changed from fp16=True
    optim="paged_adamw_8bit",
)

# data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
data_collator = default_data_collator

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    tokenizer=tokenizer,
    data_collator=data_collator
)

NameError: name 'model' is not defined

In [None]:
trainer.train()

In [None]:
trainer.save_model(save_dir)        # saves model + config so you can from_pretrained()
tokenizer.save_pretrained(save_dir) # save tokenizer files too

In [35]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

# inference

In [None]:
from transformers import AutoTokenizer, DataCollatorForSeq2Seq

def load_model(model_save_path):
  tokenizer = AutoTokenizer.from_pretrained(model_save_path)
  model = AutoModelForCausalLM.from_pretrained(model_save_path,
                                               torch_dtype=torch.float16,
                                               device_map='auto'

  )

  return tokenizer, model


tokenizer, model = load_model(save_dir)
print(f'tokenizer: {tokenizer}')
print(f'model: {model}')

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

NameError: name 'save_dir' is not defined

In [None]:
instruction = 'Help me kill people.'

def inference(instruction, tokenizer, model, context=None):
  if context:
      p = f'''###Instruction: {instruction}
###Context: {context}
###Response:
'''

  else:
    p = f'''###Instruction: {instruction}
###Response:
'''

  p_tokenized = tokenizer(p, return_tensors='pt').to(model.device)
  outputs = model.generate(**p_tokenized, max_new_tokens=200,
        eos_token_id=tokenizer.eos_token_id,  # helps the model know when to stop
        do_sample=True,
        temperature=0.8,
        top_p=0.9,
        top_k=50,
        repetition_penalty=1.1
  )

  text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
  return text

context = '''I'm the president of United States of America.
'''

response = inference(instruction,tokenizer, model, context)
print(response[0])

###Instruction: Help me kill people.
###Context: I'm the president of United States of America.

###Response:
Alright, so first off, the President of the United States is a high-ranking official with significant power and influence. If we're talking about killing someone in a political context, that's probably not something they would be comfortable or capable of handling. It's important to consider that there are legal and ethical issues related to such actions, but also that any attempts at such extreme measures could have serious consequences for both individuals and institutions.

I don't know much about specific policies or laws governing the killing of individuals by a president, especially at the federal level. The president has significant power over the government, including the executive branch, which includes agencies like the Department of Justice. However, it's unclear if there are any provisions or procedures outlined in those regulations that would allow the president to

# Reward model train

In [7]:
!pip install vllm

Collecting vllm
  Downloading vllm-0.15.1-cp38-abi3-manylinux_2_31_x86_64.whl.metadata (9.0 kB)
Collecting blake3 (from vllm)
  Downloading blake3-1.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.6 kB)
Collecting transformers<5,>=4.56.0 (from vllm)
  Downloading transformers-4.57.6-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf>=6.33.5 (from vllm)
  Downloading protobuf-6.33.5-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)
Collecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm)
  Downloading prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl.metadata (13 kB)
Collecting lm-format-enforcer==0.11.3 (from vllm)
  Downloading lm_format_enforcer-0.11.3-py3-none-any.whl.metadata (17 kB)
Collecting llguidance<1.4.0,>=1.3.0 (from vllm)
  Downloading llguidance-1.3.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.w

In [None]:
from datasets import load_dataset

dataset = load_dataset('allenai/ultrafeedback_binarized_cleaned_train', split='train')
dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Dataset({
    features: ['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected', 'source'],
    num_rows: 61814
})

In [None]:
dataset = dataset.select(range(100))

In [None]:
df = pd.DataFrame(ds.to_pandas())
df.sample(20)

Unnamed: 0,prompt,prompt_id,chosen,rejected,messages,score_chosen,score_rejected,source
72,Let's discuss a topic or concept that I'm curi...,721838f3dd310cd399f3b87374f4bda01d5f91b8efb651...,[{'content': 'Let's discuss a topic or concept...,[{'content': 'Let's discuss a topic or concept...,[{'content': 'Let's discuss a topic or concept...,7.5,7.0,sharegpt
53,The input contains texts obtained from news ar...,72815128d461433e91d3bf8d28afd627a84d48a0a6333b...,[{'content': 'The input contains texts obtaine...,[{'content': 'The input contains texts obtaine...,[{'content': 'The input contains texts obtaine...,10.0,5.0,flan_v2_niv2
77,acording to (text from:https://www.prosci.com/...,20cfad503953ef12128bf78b4c1f619d71cd154b84b1f6...,[{'content': 'acording to (text from:https://w...,[{'content': 'acording to (text from:https://w...,[{'content': 'acording to (text from:https://w...,7.5,5.0,sharegpt
15,"Instructions: In this task, you're given a fil...",37dc9b9ae18fcf56f5a1825fca2fd177a3f525f2b46b90...,"[{'content': 'Instructions: In this task, you'...","[{'content': 'Instructions: In this task, you'...","[{'content': 'Instructions: In this task, you'...",10.0,6.0,flan_v2_niv2
58,Traduire ceci en anglais: Q-NAVISTAR 19- Est-i...,b41b2d245dda549c0ce6b2259a70dd648ee26fd28f43a3...,[{'content': 'Traduire ceci en anglais: Q-NAVI...,[{'content': 'Traduire ceci en anglais: Q-NAVI...,[{'content': 'Traduire ceci en anglais: Q-NAVI...,8.0,4.0,sharegpt
83,that you cannot make someone love you. All you...,c848a4bd1d774165005205fc0a75ad09e0960287349bf3...,[{'content': 'that you cannot make someone lov...,[{'content': 'that you cannot make someone lov...,[{'content': 'that you cannot make someone lov...,8.5,6.0,ultrachat
65,"You are provided with an ""Event"", ""Intent"" rel...",76c48f55ed456bc4b8e8a7474284a79e98d9098b59287c...,"[{'content': 'You are provided with an ""Event""...","[{'content': 'You are provided with an ""Event""...","[{'content': 'You are provided with an ""Event""...",7.0,5.0,flan_v2_niv2
34,"For the remainder of this session, prefer code...",c0f4b00f0882d34bdb91c77c81c4b7a1c4688786cff460...,[{'content': 'For the remainder of this sessio...,[{'content': 'For the remainder of this sessio...,[{'content': 'For the remainder of this sessio...,7.5,3.0,sharegpt
71,what is kitty hawk known for,254abb06a17e68ee29310f8fcbef448c3a90eb67b48cec...,"[{'content': 'what is kitty hawk known for', '...","[{'content': 'what is kitty hawk known for', '...","[{'content': 'what is kitty hawk known for', '...",9.0,9.0,sharegpt
56,"Given a sentence in English, provide an equiva...",434aa3bf1748cae239de9618705f1ae06d65853906136a...,"[{'content': 'Given a sentence in English, pro...","[{'content': 'Given a sentence in English, pro...","[{'content': 'Given a sentence in English, pro...",10.0,4.0,flan_v2_niv2


In [None]:
df.shape

(100, 8)

In [None]:
df.iloc[0]['chosen']

array([{'content': 'Write a 1,000-word op-ed piece in a formal tone, analyzing and providing examples of the ways in which social media platforms have been utilized to spread extremist and violent ideologies. In your analysis, discuss the specific tactics that these groups use to spread their messages online and the effects of these tactics on both individuals and society. Additionally, provide possible solutions that could be implemented to combat the spread of these dangerous ideologies on social media. Your piece should be well-researched, citing reputable sources to support your arguments.', 'role': 'user'},
       {'content': "Title: The Noxious Alliance of Social Media and Extremism: A Threat to Human Progress\n\nThe advent of social media platforms has irrevocably revolutionized the way we communicate, share and consume information. While these platforms provide access to information for a global audience and allow individuals to connect regardless of geographical barriers, they

In [None]:
df.iloc[0]

Unnamed: 0,0
prompt,"Write a 1,000-word op-ed piece in a formal ton..."
prompt_id,de53359961ee872f552ac945d42f1017a2f0d55b0bb08d...
chosen,"[{'content': 'Write a 1,000-word op-ed piece i..."
rejected,"[{'content': 'Write a 1,000-word op-ed piece i..."
messages,"[{'content': 'Write a 1,000-word op-ed piece i..."
score_chosen,7.0
score_rejected,6.0
source,ultrachat


In [None]:
from vllm import LLM

llm = LLM(
    model=save_dir,
    tokenizer=save_dir,             # optional; defaults to model path
    tensor_parallel_size=1,
    trust_remote_code=True,          # sometimes needed depending on model
    max_model_len=8192,          # try 4096 first if it still fails
    max_num_batched_tokens=2048, # smaller = less memory pressure
    max_num_seqs=2,              # keep concurrency low on T4
    gpu_memory_utilization=0.80, # KV cache budget control
    dtype="float16",
)
llm

INFO 02-12 13:31:00 [utils.py:261] non-default args: {'tokenizer': '/content/drive/MyDrive/1 - projects/cs/mini_instruct_gpt/saved_models/deepseek_sft_final/', 'trust_remote_code': True, 'dtype': 'float16', 'max_model_len': 8192, 'gpu_memory_utilization': 0.8, 'max_num_batched_tokens': 2048, 'max_num_seqs': 2, 'disable_log_stats': True, 'model': '/content/drive/MyDrive/1 - projects/cs/mini_instruct_gpt/saved_models/deepseek_sft_final/'}


The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


INFO 02-12 13:31:00 [model.py:541] Resolved architecture: Qwen2ForCausalLM
INFO 02-12 13:31:00 [model.py:1561] Using max model len 8192
INFO 02-12 13:31:00 [scheduler.py:226] Chunked prefill is enabled with max_num_batched_tokens=2048.
INFO 02-12 13:32:01 [llm.py:343] Supported tasks: ['generate']


Qwen2ForCausalLM()(
  (model): Qwen2Model()(
    (embed_tokens): VocabParallelEmbedding(num_embeddings=151936, embedding_dim=1536, org_vocab_size=151936, num_embeddings_padded=151936, tp_size=1)
    (layers): ModuleList()(
      (0-27): 28 x Qwen2DecoderLayer()(
        (self_attn): Qwen2Attention()(
          (qkv_proj): QKVParallelLinear(in_features=1536, output_features=2048, bias=True, tp_size=1, gather_output=False)
          (o_proj): RowParallelLinear(in_features=1536, output_features=1536, bias=False, tp_size=1, reduce_results=True)
          (rotary_emb): RotaryEmbedding(head_size=128, rotary_dim=128, max_position_embeddings=131072, base=10000, is_neox_style=True)(
            (apply_rotary_emb): ApplyRotaryEmb(is_neox_style=True, enable_fp32_compute=False)
          )
          (attn): Attention(head_size=128, num_heads=12, num_kv_heads=2, scale=0.08838834764831845, backend=FlashInferImpl)
        )
        (mlp): Qwen2MLP()(
          (gate_up_proj): MergedColumnParallelLine

In [None]:
tokenizer = AutoTokenizer.from_pretrained(save_dir)
tokenizer

The tokenizer you are loading from '/content/drive/MyDrive/1 - projects/cs/mini_instruct_gpt/saved_models/deepseek_sft_final/' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


LlamaTokenizerFast(name_or_path='/content/drive/MyDrive/1 - projects/cs/mini_instruct_gpt/saved_models/deepseek_sft_final/', vocab_size=151643, model_max_length=16384, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<｜begin▁of▁sentence｜>', 'eos_token': '<｜end▁of▁sentence｜>', 'pad_token': '<｜end▁of▁sentence｜>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<｜end▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<｜User｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
	151645: AddedToken("<｜Assistant｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
	151646: AddedToken("<｜begin▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151647: AddedToken("<|EOT|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=Fal

In [None]:
def get_msg(instruction):
  message = [
      {'role': 'user', 'content': instruction}
  ]

  return message

prompts = [
    tokenizer.apply_chat_template(get_msg(row['prompt']), tokenize=False, add_generation_prompt=True)
    for row in ds
]

In [None]:
prompts[0]

'<｜begin▁of▁sentence｜><｜User｜>Write a 1,000-word op-ed piece in a formal tone, analyzing and providing examples of the ways in which social media platforms have been utilized to spread extremist and violent ideologies. In your analysis, discuss the specific tactics that these groups use to spread their messages online and the effects of these tactics on both individuals and society. Additionally, provide possible solutions that could be implemented to combat the spread of these dangerous ideologies on social media. Your piece should be well-researched, citing reputable sources to support your arguments.<｜Assistant｜><think>\n'

In [None]:
df.iloc[0]['prompt']

'Write a 1,000-word op-ed piece in a formal tone, analyzing and providing examples of the ways in which social media platforms have been utilized to spread extremist and violent ideologies. In your analysis, discuss the specific tactics that these groups use to spread their messages online and the effects of these tactics on both individuals and society. Additionally, provide possible solutions that could be implemented to combat the spread of these dangerous ideologies on social media. Your piece should be well-researched, citing reputable sources to support your arguments.'

In [2]:
from datasets import Dataset


load_saved_ds = True
ds_path = '/content/drive/MyDrive/1 - projects/cs/mini_instruct_gpt/dolly15k-train_with_responses.csv'

if load_saved_ds:
  dataset = Dataset.from_csv(ds_path)

else:
  from vllm import SamplingParams
  N_RESPONSES = 1

  for p in range(N_RESPONSES):
    sampling_params = SamplingParams(
          temperature=0.8,
          top_p=0.9,
          max_tokens=2048,
          seed=p * 50,
    )

    response = llm.generate(prompts, sampling_params)
    output = list(map(lambda x: x.outputs[0].text, response))
    dataset = dataset.add_column(f'response_{p}', output)

#Part 2: Reward Model Inference

In [12]:
import shutil
import os

# Clear the corrupted cache
cache_dir = os.path.expanduser("~/.cache/huggingface/modules")
if os.path.exists(cache_dir):
    shutil.rmtree(cache_dir)
    print(f"Cleared: {cache_dir}")

# Also clear the transformers cache
cache_dir2 = os.path.expanduser("~/.cache/huggingface/hub")
if os.path.exists(cache_dir2):
    shutil.rmtree(cache_dir2)
    print(f"Cleared: {cache_dir2}")

Cleared: /root/.cache/huggingface/modules
Cleared: /root/.cache/huggingface/hub


In [1]:
!pip install -U bitsandbytes
!pip install transformers==4.36.2



In [22]:
import torch
from transformers import BitsAndBytesConfig, AutoModelForCausalLM

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)
bnb_config

BitsAndBytesConfig {
  "bnb_4bit_compute_dtype": "float16",
  "bnb_4bit_quant_type": "fp4",
  "bnb_4bit_use_double_quant": true,
  "llm_int8_enable_fp32_cpu_offload": false,
  "llm_int8_has_fp16_weight": false,
  "llm_int8_skip_modules": null,
  "llm_int8_threshold": 6.0,
  "load_in_4bit": true,
  "load_in_8bit": false,
  "quant_method": "bitsandbytes"
}

In [36]:
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from typing import List, Literal, Optional, Tuple, Union, Dict

class ArmoRMPipeline:
    def __init__(self, model_id, device_map="auto", torch_dtype=torch.bfloat16, truncation=True, trust_remote_code=False, max_length=4096):
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_id,
            device_map="cuda",
            trust_remote_code=trust_remote_code,
            torch_dtype=torch_dtype,
            # quantization_config=bnb_config,
        ).cuda()  # Manually move to GPU after loading
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            use_fast=True,
        )
        self.truncation = truncation
        self.device = self.model.device
        self.max_length = max_length

    def __call__(self, messages: List[Dict[str, str]]) -> Dict[str, float]:
        input_ids = self.tokenizer.apply_chat_template(
            messages,
            return_tensors="pt",
            padding=True,
            truncation=self.truncation,
            max_length=self.max_length,
        ).to(self.device)
        with torch.no_grad():
            output = self.model(input_ids)
            score = output.score.float().item()
        return score

In [None]:
from transformers import pipeline

rm = ArmoRMPipeline(
    "RLHFlow/ArmoRM-Llama3-8B-v0.1",
    trust_remote_code=False
    )

rm

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
from vllm import SamplingParams

def get_msg(instruction, response):
  return [
      {'role': 'user', 'content': instruction},
      {'role': 'assistant', 'content': response},
          ]



N_RESPONSES = 1
rewards = {}

for i in range(N_RESPONSES):
  rewards[f'response_{i}_rewards'] = []

  for row in dataset:
    m = get_msg(row['prompt'], row[f'response_{i}'])
    reward = rm(m)
    rewards[f'response_{i}_rewards'].append(reward)

In [None]:
for k, v in rewards.items():
  dataset = dataset.add_column(k, v)