In [1]:
import os

import pandas as pd
import numpy as np

import torch
import nltk.translate.bleu_score as bleu


from modelscope import snapshot_download
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer
from peft import get_peft_model, LoraConfig, TaskType



2024-03-14 10:23:16,056 - modelscope - INFO - PyTorch version 2.2.1 Found.
2024-03-14 10:23:16,058 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer
2024-03-14 10:23:16,157 - modelscope - INFO - Loading done! Current index file version is 1.13.1, with md5 b5a2c5fe01f7460b3e700a8ce7e6fc94 and a total number of 972 components indexed
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
REPO_DIRECTORY = r'/root/'
ABC_DICT_PATH = r'autodl-tmp/AIST4010-Cantonese-Translator-Data/ABC-Dict/abc_dict.csv'

def load_abc_dataset():
    abc_dict = pd.read_csv(REPO_DIRECTORY + ABC_DICT_PATH)
    abc_dataset = Dataset.from_pandas(abc_dict)
    return abc_dataset

abc_set = load_abc_dataset()
abc_shuffled_set = abc_set.shuffle(seed=42).train_test_split(test_size=0.1)
abc_train_set = abc_shuffled_set['train']
abc_test_set = abc_shuffled_set['test']
for (i, example) in enumerate(abc_train_set):
    print(example)
    if i == 5:
        break

{'en': 'You know what you have done, so tomorrow submit your resignation letter to me.', 'yue': '你自己知自己事，聽日遞封辭職信畀我。'}
{'en': "That one that I patronized was a young prostitute. Her skills were ample. She didn't loaf around one bit.", 'yue': '我幫襯咗嗰個係後生囡囡，功夫做到足，一啲都冇偷懶。'}
{'en': "Everybody be more cooperative. If each person just thinks of himself and you disregard other people's interests, then we can't get anything done.", 'yue': '大家合作啲，如果各人都諗縮數就乜都唔使做。'}
{'en': 'The kid sounds quite all right when he speaks.', 'yue': '個細路講起嘢嚟都有紋有路喎。'}
{'en': "Hey, they're so pretty! Take a look, there are a pair of rainbows!", 'yue': '嘩，咁靚啊，你睇，彩虹係打孖嚟㗎！'}
{'en': "There are so damned many people! I can't move!", 'yue': '人鬼咁多！我唔喐得嘅！'}


In [3]:
def count_dataset_tokens(dataset):
    en_count = 0
    yue_count = 0
    for example in dataset:
        en_count += len(example['en'])
        yue_count += len(example['yue'])
    return en_count, yue_count


counts = np.array(count_dataset_tokens(abc_train_set))
print(counts)
print(counts/len(abc_train_set))


[920030 191513]
[70.63027791 14.7023645 ]


In [4]:
model_path=r'/root/autodl-tmp/01ai/Yi-6B-Chat'

# model = Model.from_pretrained('01ai/Yi-6B')

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",
#     torch_dtype='auto'
# ).eval()


# tokenizer = AutoTokenizer.from_pretrained(model_name)

In [5]:
model_dir = snapshot_download('01ai/Yi-6B-Chat', cache_dir='/root/autodl-tmp', revision='master')

In [6]:
base_tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, padding_side='left', max_length=512, return_tensors='pt')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


# Since transformers 4.35.0, the GPT-Q/AWQ model can be loaded using AutoModelForCausalLM.
base_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device,
    torch_dtype='auto',
)


# # Prompt content: "hi"
# messages = [
#     {"role": "user", "content": "hi"}
# ]


# input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
# output_ids = model.generate(input_ids.to('cuda'))
# response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)

# # Model response: "Hello! How can I assist you today?"
# print(response)

Loading checkpoint shards: 100%|██████████| 3/3 [00:02<00:00,  1.18it/s]


In [7]:
# messages = [
#     {"role": "user", "content": "你識唔識講廣東話?"},
# ]

# input_ids = base_tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
# output_ids = base_model.generate(input_ids.to('cuda'))
# response = base_tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)

# # Model response: "Hello! How can I assist you today?"
# print(response)

In [8]:
# print(input_ids)
# print(output_ids)
# print(base_tokenizer.decode(input_ids[0]))
# print(base_tokenizer.decode(input_ids[0]))

# #get text of list of tokens in output_ids stored in array
# print([base_tokenizer.decode([token]) for token in output_ids[0]])

In [9]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['en'])):
        text1 = f"""
        <|im_start|> user
        Translate the following words into Cantonese: 
        {example['en'][i]}
        <|im_start|>assistant
        {example['yue'][i]}
        """
        text2 = f"""
        <|im_start|> user
        Translate the following words into English:
        {example['yue'][i]}
        <|im_start|>assistant
        {example['en'][i]}
        """
        output_texts.append(text1)
        output_texts.append(text2)
    return output_texts

In [10]:
prompts = formatting_prompts_func(abc_set[:10])
for prompt in prompts:
    print(prompt)


        <|im_start|> user
        Translate the following words into Cantonese: 
        Scoop up water
        <|im_start|>assistant
        㧾水
        

        <|im_start|> user
        Translate the following words into English:
        㧾水
        <|im_start|>assistant
        Scoop up water
        

        <|im_start|> user
        Translate the following words into Cantonese: 
        Ladle out soup
        <|im_start|>assistant
        㧾湯
        

        <|im_start|> user
        Translate the following words into English:
        㧾湯
        <|im_start|>assistant
        Ladle out soup
        

        <|im_start|> user
        Translate the following words into Cantonese: 
        Third son of a rich family
        <|im_start|>assistant
        三少
        

        <|im_start|> user
        Translate the following words into English:
        三少
        <|im_start|>assistant
        Third son of a rich family
        

        <|im_start|> user
        Translate the follow

In [11]:
# for name, param in base_model.named_parameters():
#     print(f"Parameter name: {name}")
#     print(param)
#     print("-" * 50)

In [12]:
print(base_model.config)

LlamaConfig {
  "_name_or_path": "/root/autodl-tmp/01ai/Yi-6B-Chat",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 4,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 5000000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size": 64000
}



In [13]:
lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules = ["k_proj", "q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)
peft_model = get_peft_model(base_model, 
                            lora_config)

peft_model.print_trainable_parameters()

trainable params: 17,825,792 || all params: 6,078,861,312 || trainable%: 0.293242288071467


**Train Tokenizer**

In [29]:
def get_training_corpus(dataset):
    for start_idx in range(0, len(dataset), 1000):
        samples = dataset[start_idx : start_idx + 1000]
        sample_en = samples["en"]
        sample_yue = samples["yue"]
        for i in range(len(sample_en)):
            yield sample_en[i]
            yield sample_yue[i]

training_corpus = get_training_corpus(abc_train_set)

tokenizer = base_tokenizer.train_new_from_iterator(training_corpus, vocab_size=40000)
tokenizer.save_pretrained("tokenizer")

Ignored unknown kwargs option padding_side





('tokenizer/tokenizer_config.json',
 'tokenizer/special_tokens_map.json',
 'tokenizer/tokenizer.model',
 'tokenizer/added_tokens.json',
 'tokenizer/tokenizer.json')

In [15]:
print(tokenizer("嗌呃畀啲嘢噃"))
print(base_tokenizer("嗌呃畀啲嘢噃"))
print(tokenizer("Good morning"))
print(base_tokenizer("Good morning"))

{'input_ids': [89, 636, 507, 1929, 4015, 670], 'attention_mask': [1, 1, 1, 1, 1, 1]}
{'input_ids': [59568, 534, 456, 445, 534, 450, 436, 536, 454, 433, 534, 454, 483, 534, 457, 467, 534, 458, 436], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
{'input_ids': [25911, 4862], 'attention_mask': [1, 1]}
{'input_ids': [6076, 4040], 'attention_mask': [1, 1]}


In [16]:
# bleu = evaluate.load('bleu')

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    print(predictions.shape, labels.shape)
    return {"bleu": bleu(predictions, labels)}

In [17]:
training_args = TrainingArguments(
    learning_rate=1e-3, # Higher learning rate than full fine-tuning.
    num_train_epochs=1,
    logging_steps=100,
    output_dir="root/peft_model"
)

trainer = SFTTrainer(
    peft_model,
    args=training_args,
    train_dataset= abc_train_set,
    eval_dataset= abc_test_set,
    tokenizer=tokenizer,
    formatting_func=formatting_prompts_func,
    compute_metrics=compute_metrics,
)
trainer.train()

Map:   0%|          | 0/13026 [00:00<?, ? examples/s]

Map: 100%|██████████| 13026/13026 [00:01<00:00, 8612.01 examples/s]
Map: 100%|██████████| 1448/1448 [00:00<00:00, 8712.10 examples/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss
100,2.9106
200,2.5409
300,2.5578
400,2.505
500,2.4342
600,2.529
700,2.6226
800,2.5906
900,2.5416
1000,2.481




TrainOutput(global_step=3257, training_loss=2.517511322705483, metrics={'train_runtime': 725.9868, 'train_samples_per_second': 35.885, 'train_steps_per_second': 4.486, 'total_flos': 9.45612565314601e+16, 'train_loss': 2.517511322705483, 'epoch': 1.0})

In [21]:
trainer.model.save_pretrained("/root/peft_model")



In [41]:
#get random data from test dataset
for i in range(5):
    example = abc_test_set[i]
    print(example)
    text1 = f"""
        Translate the following words into Cantonese: 
        {example['en'][i]}
        """
    text2 = f"""
        Translate the following words into English:
        {example['yue'][i]}
        """
    texts = [text1, text2]
    for text in texts:
        messages = [
            {"role": "user", "content": text}
        ]
        #print model outputs for base_model and peft_model
        base_input_ids = base_tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
        peft_input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
        print("Base ID:", base_input_ids)
        print("PEFT ID:", base_input_ids)
        print(peft_input_ids)
        base_output_ids = base_model.generate(base_input_ids.to('cuda'), max_new_tokens=100)
        peft_output_ids = peft_model.generate(peft_input_ids.to('cuda'), max_new_tokens=100)
        print(base_output_ids.shape, peft_output_ids.shape)
        print("Base model: ", base_tokenizer.decode(base_output_ids[0][base_input_ids.shape[1]:], skip_special_tokens=True))
        print("Fine-tuned: ", tokenizer.decode(peft_output_ids[0][peft_input_ids.shape[1]:], skip_special_tokens=True))


{'en': 'I much needed to shit, so as soon as I saw there was a place in the stairway, I immediately squatted down and took a shit, and after I had taken a shit, my life was smooth and easy, and the unusual situation was quickly relieved.', 'yue': '我好急，所以一見到樓梯有個地方，就馬上踎低擺堆，擺完堆之後，生命流暢，異常快慰。'}
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   135, 59597,   144,
           135,     7, 59568,   144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   135, 59597,   144,
           135,     7, 59568,   144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,    89,    89,    89,
            89, 35380,    77,  4131,  3540,  7360, 26322, 10679,  7722, 17559,
            89,     0,    89,    89,    89,    89,    89,    89,    89,  3359,
             0,   

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 127]) torch.Size([1, 146])
Base model:  ２２２２２２２２<|Human|><|Human|> Min box２２２２２２２２２２２<|Human|> stamped box box box<filename> Cal４２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２
Fine-tuned:           !ant!!Cantones！，                                                                                  
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135, 59646,   144,   135,     7, 59568,
           144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135, 59646,   144,   135,     7, 59568,
           144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,    89,    89,    89,
            89, 35380,    77,  4131,  3540,  7360, 26322, 10679,  5242,    29,
             0,    89,    89,    89,    89,    89,    89,    89,  3397,     0,
            89,  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 124]) torch.Size([1, 145])
Base model:  ２２２２２２２２<|Human|> stamped box box Min systems systems４２２２２２２２２<|Human|> stamped box box box２２２２２２２２２EC Min２２２２２２２２２<|Human|>２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２
Fine-tuned:            assist!!!!。                                                                                   
{'en': "There's no need to be so hurried, as we still have almost one hour till we need to board the train.", 'yue': '唔使咁急，我哋仲有差唔多一個鐘頭先至要上車。'}
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   135, 59578,   144,
           135,     7, 59568,   144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   135, 59578,   144,
           135,     7, 59568,   144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 127]) torch.Size([1, 146])
Base model:  ２２２２２２２２<|Human|> stamped box box box４２２２２２２２２２<|Human|> stamped box box box<filename>４２２２２２２２２２２２２２２２２２２２２２２nWhennWhen$(４２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２
Fine-tuned:                                                                                                     
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135, 59897,   144,   135,     7, 59568,
           144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135, 59897,   144,   135,     7, 59568,
           144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,    89,    89,    89,
            89, 35380,    77,  4131,  3540,  7360, 26322, 10679,  5242,    29,
             0,    89,    89,    89,    89,    89,    89,    89, 29873,     0,
            89,   

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 124]) torch.Size([1, 145])
Base model:  ２２２２２２２２<|Human|> stamped box box活动中４２２２２２２２２２２<|Human|><|Human|>ava box box box box２２２２２２２２２ feltoll Min２２２２２２２２２<|Human|> stamped２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２
Fine-tuned:           assistantant!中!檸。                  我 佢!ing 。                   ， the ?!！!           assistantant!ant懶。             
{'en': 'They squat down and wait for the judge to give the order for the race to start.', 'yue': '佢哋踎低等裁判員發令起跑。'}
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   135, 59569,   144,
           135,     7, 59568,   144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   135, 59569,   144,
           135,     7, 59568,   144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,    

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 127]) torch.Size([1, 147])
Base model:  ２２２２２２２２<|Human|><|Human|><|Human|>科技２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２
Fine-tuned:           antantant?!!。                    ing !!s.!啲 你要 佢!㧾.咪。         assistantantant檸is 咪.啲 佢!          assistantantant啲ant就 and !s. 我！.ing，    
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135,   537,   489,   447,   144,   135,
             7, 59568,   144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135,   537,   489,   447,   144,   135,
             7, 59568,   144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,    89,    89,    89,
            89, 35380,    77,  4131,  3540,  7360, 26322, 10679,  5242,    29,
             0,    89,    89,    89,    89,    89,    89,    

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 126]) torch.Size([1, 146])
Base model:  ２２２２２２２２<|Human|> stamped box�２２２２２２２２基本se基本 images<filename>４２２２２２２２２２２<|Human|> box２２２２２２２２２<|Human|> York４２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２
Fine-tuned:           佢。                                                                                         
{'en': "I'm not clear about what trouble the guy has gotten into, but I know the police are looking for him.", 'yue': '條友𦧺啲乜嘢屎𦡆，我唔清楚，但係知道警察搵緊佢。'}
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   134,   144,   135,
             7, 59568,   144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   134,   144,   135,
             7, 59568,   144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,    89,    89,    89,
         

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 126]) torch.Size([1, 147])
Base model:  ２２２２２２２２<|Human|> stamped box boxava４２２２２２２２２２<|Human|> stamped box box box４２２２２２２２２EC per Min２２２２２２２２２２<|Human|> stamped box２２２２２２２２２２ECava４２２２２２２２２２<|Human|>EC box４２２２２２２２２se Min<filename>４２２２２２２
Fine-tuned:                                                                                                     
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135,   534,   454,   483,   144,   135,
             7, 59568,   144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135,   534,   454,   483,   144,   135,
             7, 59568,   144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,    89,    89,    89,
            89, 35380,    77,  4131,  3540,  7360, 26322, 10679,  5242,    29,
             0,    89,    89,    89,

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 126]) torch.Size([1, 145])
Base model:  ２２２２２２２２<|Human|> stamped box�４２２２２２２２２２<|Human|> stamped box box box box Min４２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２
Fine-tuned:            assistant!!請!。                      我ing !is ing                                                        
{'en': 'Yee Nga Court is located on On Po Road in Tai Po, New Territories.', 'yue': '怡雅苑係位於新界、大埔、安埔路。'}
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   135, 59618,   144,
           135,     7, 59568,   144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029, 26212,  2823, 59569, 59601, 59568,   144,   135, 59618,   144,
           135,     7, 59568,   144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,    89,    89,    89,
            89, 35380,    77,  41

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 127]) torch.Size([1, 146])
Base model:  ２２２２２２２２２<|Human|> stamped box２２２２２２２２２<|Human|> stamped box box box４２２２２２２２２２２<|Human|>wise<filename>２２２２２２２２２EC Minwise<commit_after>４２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２
Fine-tuned:                                                                                                    
Base ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135, 59817,   144,   135,     7, 59568,
           144,     6, 14135,   144]])
PEFT ID: tensor([[    6,  2942,   144,   144,   135,  7759, 14429,   567,  1926,  3151,
          1029,  4750, 59601,   144,   135, 59817,   144,   135,     7, 59568,
           144,     6, 14135,   144]])
tensor([[    3,  4785,  3336,     0,    89,    89,    89,    89,    89,    89,
            89, 35380,    77,  4131,  3540,  7360, 26322, 10679,  5242,    29,
             0,    89,    89,    89,    89,    89,    89,    89,    89,   221,
      

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


torch.Size([1, 124]) torch.Size([1, 146])
Base model:  ２２２２２２２２<|Human|> stamped box box２２２２２２２２２２２２２<|Human|><|Human|><|Human|>２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２２
Fine-tuned:                                                                                                     
