In [1]:
import torch
import random
import os
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

from util import TokenizerUtil

tokenizer = TokenizerUtil()

input_ids, attention_mask = tokenizer.encode('how are you', max_length=4)

input_ids, attention_mask, tokenizer.decode(input_ids)

(tensor([   0, 9178,   32,    2]), tensor([1, 1, 1, 1]), '<s>how are</s>')

In [3]:
from datasets import load_dataset
from transformers import default_data_collator

dataset = load_dataset('json', data_files='D:/Project/Pycharm project/ZZH/DL/Simple_RLHF-main/dataset/filtered_chinese_cantonese_rej.json', split='train')

#2,4,4切分,取第0部分
dataset = dataset.select(range(8775))


def f(data):
    #随机生成两种回答
    if random.random() > 0.5:
        data['chosen'] = data['chosen'].swapcase()
    data = data['prompt'] + data['chosen']

    input_ids, attention_mask = tokenizer.encode(data)

    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': input_ids.clone()
    }


dataset = dataset.map(f, remove_columns=dataset.column_names)

loader = torch.utils.data.DataLoader(dataset,
                                     collate_fn=default_data_collator,
                                     batch_size=2,
                                     shuffle=True,
                                     drop_last=True)

len(loader), next(iter(loader))

Map:   0%|          | 0/8775 [00:00<?, ? examples/s]

(4387,
 {'input_ids': tensor([[    0, 47856, 21402,  ...,     1,     1,     1],
          [    0, 47856, 21402,  ...,     1,     1,     1]]),
  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
          [1, 1, 1,  ..., 0, 0, 0]]),
  'labels': tensor([[    0, 47856, 21402,  ...,     1,     1,     1],
          [    0, 47856, 21402,  ...,     1,     1,     1]])})

In [4]:
from transformers import AutoModelForCausalLM
import lora

model_actor = AutoModelForCausalLM.from_pretrained('facebook/opt-1.3b')

lora.insert(model_actor)
lora.count_params(model_actor)

pytorch_model.bin:  14%|#4        | 377M/2.63G [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

{'count_require': 2.21044736, 'count_all': 14.29004288, 'ratio': 0.15468444556549854}


In [5]:
from transformers import get_scheduler
from accelerate import Accelerator


def f():
    params = []
    params_lora = []
    for name, param in model_actor.named_parameters():
        if not param.requires_grad:
            continue

        if 'lora_A' in name or 'lora_B' in name:
            params_lora.append(param)
            continue

        params.append(param)

    return [{
        'params': params,
        'weight_decay': 0.0,
    }, {
        'params': params_lora,
        'weight_decay': 0.0,
        'lr': 5e-4
    }]


optimizer = torch.optim.Adam(f(), lr=1e-3, betas=(0.9, 0.95))

scheduler = get_scheduler(name='cosine',
                          optimizer=optimizer,
                          num_warmup_steps=0,
                          num_training_steps=100)

accelerator = Accelerator(gradient_accumulation_steps=64,
                          mixed_precision='fp16')

model_actor, loader, optimizer, scheduler = accelerator.prepare(
    model_actor, loader, optimizer, scheduler)

model_actor.train()

OPTForCausalLM(
  (model): OPTModel(
    (decoder): OPTDecoder(
      (embed_tokens): Embedding(50272, 2048, padding_idx=1)
      (embed_positions): OPTLearnedPositionalEmbedding(2050, 2048)
      (final_layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
      (layers): ModuleList(
        (0-23): 24 x OPTDecoderLayer(
          (self_attn): OPTSdpaAttention(
            (k_proj): Lora(
              (linear): Linear(in_features=2048, out_features=2048, bias=True)
            )
            (v_proj): Lora(
              (linear): Linear(in_features=2048, out_features=2048, bias=True)
            )
            (q_proj): Lora(
              (linear): Linear(in_features=2048, out_features=2048, bias=True)
            )
            (out_proj): Lora(
              (linear): Linear(in_features=2048, out_features=2048, bias=True)
            )
          )
          (activation_fn): ReLU()
          (self_attn_layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)


In [6]:
for i, data in enumerate(loader):
    with accelerator.accumulate(model_actor):
        out = model_actor(**data)
        accelerator.backward(out.loss)

        if accelerator.sync_gradients:
            accelerator.clip_grad_norm_(
                [i for i in model_actor.parameters() if i.requires_grad], 1.0)

        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    if (i + 1) % 100 == 0:
        lr = optimizer.param_groups[0]['lr']
        print(i, len(loader), out.loss.item(), lr)

        logits = out.logits[0].argmax(1)
        print(tokenizer.decode(logits))

    if i == 2000:
        break

lora.merge(model_actor)
model_actor.save_pretrained('model/actor')

  attn_output = torch.nn.functional.scaled_dot_product_attention(


99 4387 8.345001220703125 0.0009997532801828658
 of�的��旭的���你忈��的万的�的�釈佝�的�的��一�佡。�忇�己的����万�� Ioniinonmainonininmaininonininausononioninusosininionadiicininiidinmaganaxininoninusosininonononxinononononganoninonioanxinusonadconmaixanononaonmainanoninoninumadumonaonanaonioniananoninoaananososainiadidinonadxiainusxininonionaoninononininonadinaosidinainincosinoninoninqu
inininosusidononaonanaononiananinosingoninonaonumusonusonmaxxoninicononanuminanadanosiginoneusononionpastiniininaniciinxinxininonininiiniininininuiaioninadadbinononinoninganinuosgonaaadainiconinainoninananiniaduminainonininionosiininononumoneooninininmausincumononosanasoonononaininanininoninainoneinianinononinin isinininusaosopononainonaainxinadanaonosinoninadininonzonanmainaaononaononxinininininininosxinionininonganiginanininininininadoiinoneousoneososonininaanoozinuaoncosonininainoninininononininaoneononinononinganaicinoninoninonon-on
199 4387 2.492866039276123 0.00099778098230154
.�亪�学的���的�耀ﻓ浸的���塑士。�。��人��胓的���。��庻的��