In [1]:
import os

import pandas as pd
import numpy as np

import torch


from modelscope import snapshot_download
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import SFTTrainer
from peft import get_peft_model, LoraConfig, TaskType



2024-03-13 22:47:03,349 - modelscope - INFO - PyTorch version 2.0.1 Found.
2024-03-13 22:47:03,351 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer
2024-03-13 22:47:03,512 - modelscope - INFO - Loading done! Current index file version is 1.13.1, with md5 ce2a1413d67bf5615c2e26752330cf67 and a total number of 972 components indexed
  from .autonotebook import tqdm as notebook_tqdm


[2024-03-13 22:47:05,047] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
REPO_DIRECTORY = r'/root/'
ABC_DICT_PATH = r'/autodl-tmp/AIST4010-Cantonese-Translator-Data/ABC-Dict/abc_dict.csv'

def load_abc_dataset():
    abc_dict = pd.read_csv(REPO_DIRECTORY + ABC_DICT_PATH)
    abc_dataset = Dataset.from_pandas(abc_dict)
    return abc_dataset

abc_dict_dataset = load_abc_dataset()
print(abc_dict_dataset[:10])

{'en': ['Scoop up water', 'Ladle out soup', 'Third son of a rich family', 'Young pigeon or squab that has been roasted', "Husband's second older brother", 'This time', 'One dollar and sixty cents', 'Rabbit', 'Exit through turnstile', 'Chop something into cubes'], 'yue': ['㧾水', '㧾湯', '三少', '乳鴿', '二少', '今勻', '個六', '兔仔', '出閘', '切粒']}


In [3]:
model_path=r'/root/autodl-tmp/01ai/Yi-6B-Chat-4bits'

# model = Model.from_pretrained('01ai/Yi-6B')

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",
#     torch_dtype='auto'
# ).eval()


# tokenizer = AutoTokenizer.from_pretrained(model_name)

In [4]:
import torch
# from modelscope import snapshot_download, AutoModel, AutoTokenizer
import os
model_dir = snapshot_download('01ai/Yi-6B-Chat-4bits', cache_dir='/root/autodl-tmp', revision='master')

In [5]:
base_tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)



# Since transformers 4.35.0, the GPT-Q/AWQ model can be loaded using AutoModelForCausalLM.
base_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
    torch_dtype='auto',
).eval()

# # Prompt content: "hi"
# messages = [
#     {"role": "user", "content": "hi"}
# ]


# input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
# output_ids = model.generate(input_ids.to('cuda'))
# response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)

# # Model response: "Hello! How can I assist you today?"
# print(response)

We suggest you to set `torch_dtype=torch.float16` for better efficiency with AWQ.


In [6]:
messages = [
    {"role": "user", "content": "hi"}
]

input_ids = base_tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
output_ids = base_model.generate(input_ids.to('cuda'))
response = base_tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)

# Model response: "Hello! How can I assist you today?"
print(response)

Hello! How can I assist you today?


In [7]:
print(input_ids)
print(output_ids)
print(base_tokenizer.decode(input_ids[0]))

tensor([[    6,  3903,   144,  7637,     7,   144,     6,   765, 13611,   144]])
tensor([[    6,  3903,   144,  7637,     7,   144,     6,   765, 13611,   144,
         25102,    99,  1742,   748,   616,  4366,   641,  2272,   100,     7]],
       device='cuda:0')
<|im_start|> user
hi<|im_end|> 
<|im_start|>assistant



In [8]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['en'])):
        text1 = f"""
        <|im_start|> user
        Translate the following words into Cantonese: 
        {example['en'][i]}
        <|im_start|>assistant
        {example['yue'][i]}
        """
        text2 = f"""
        <|im_start|> user
        Translate the following words into English:
        {example['yue'][i]}
        <|im_start|>assistant
        {example['en'][i]}
        """
        output_texts.append(text1)
        output_texts.append(text2)
    return output_texts

In [9]:
prompts = formatting_prompts_func(abc_dict_dataset[:10])
for prompt in prompts:
    print(prompt)


        <|im_start|> user
        Translate the following words into Cantonese: 
        Scoop up water
        <|im_start|>assistant
        㧾水
        

        <|im_start|> user
        Translate the following words into English:
        㧾水
        <|im_start|>assistant
        Scoop up water
        

        <|im_start|> user
        Translate the following words into Cantonese: 
        Ladle out soup
        <|im_start|>assistant
        㧾湯
        

        <|im_start|> user
        Translate the following words into English:
        㧾湯
        <|im_start|>assistant
        Ladle out soup
        

        <|im_start|> user
        Translate the following words into Cantonese: 
        Third son of a rich family
        <|im_start|>assistant
        三少
        

        <|im_start|> user
        Translate the following words into English:
        三少
        <|im_start|>assistant
        Third son of a rich family
        

        <|im_start|> user
        Translate the follow

In [10]:
for name, param in base_model.named_parameters():
    print(f"Parameter name: {name}")
    print(param)
    print("-" * 50)

Parameter name: model.embed_tokens.weight
Parameter containing:
tensor([[ 0.0000e+00, -5.9605e-08,  0.0000e+00,  ..., -5.9605e-08,
          5.9605e-08, -5.9605e-08],
        [-0.0000e+00,  0.0000e+00, -0.0000e+00,  ..., -0.0000e+00,
         -5.9605e-08, -5.9605e-08],
        [ 4.3945e-03,  3.1853e-04,  4.3030e-03,  ...,  3.3875e-03,
          5.4550e-04, -1.2451e-02],
        ...,
        [ 2.7100e-02,  1.6724e-02, -3.3447e-02,  ...,  2.8687e-03,
          1.2756e-02,  1.6602e-02],
        [-2.4048e-02, -2.3560e-02,  1.3977e-02,  ..., -3.7689e-03,
          2.5635e-02,  5.3406e-03],
        [ 1.5869e-02,  1.3550e-02,  3.9062e-02,  ...,  3.1006e-02,
         -7.5378e-03, -5.8899e-03]], device='cuda:0', dtype=torch.float16,
       requires_grad=True)
--------------------------------------------------
Parameter name: model.layers.0.input_layernorm.weight
Parameter containing:
tensor([ 0.0046,  0.0053, -0.0004,  ...,  0.0048,  0.0043,  0.0042],
       device='cuda:0', dtype=torch.float16

In [11]:
print(base_model.config)
base_model.load_adapter(model_path)

LlamaConfig {
  "_name_or_path": "/root/autodl-tmp/01ai/Yi-6B-Chat-4bits",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 4,
  "pretraining_tp": 1,
  "quantization_config": {
    "backend": "autoawq",
    "bits": 4,
    "do_fuse": false,
    "fuse_max_seq_len": null,
    "group_size": 128,
    "modules_to_fuse": null,
    "modules_to_not_convert": null,
    "quant_method": "awq",
    "version": "gemm",
    "zero_point": true
  },
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 5000000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size": 64000
}



ValueError: adapter model file not found in /root/autodl-tmp/01ai/Yi-6B-Chat-4bits. Make sure you are passing the correct path to the adapter model.

In [None]:
# lora_config = LoraConfig(
#     r=32, # Rank
#     lora_alpha=32,
#     target_modules = ["k_proj", "q_proj", "v_proj"],
#     lora_dropout=0.05,
#     bias="none",
#     task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
# )
# peft_model = get_peft_model(base_model, 
#                             lora_config)

ValueError: Target module WQLinear_GEMM(in_features=4096, out_features=4096, bias=False, w_bit=4, group_size=128) is not supported. Currently, only `torch.nn.Linear` and `Conv1D` are supported.

In [12]:
trainer = SFTTrainer(
    base_model,
    train_dataset= abc_dict_dataset,
    formatting_func=formatting_prompts_func,
)
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


ValueError: You cannot perform fine-tuning on purely quantized models. Please attach trainable adapters on top of the quantized model to correctly perform fine-tuning. Please see: https://huggingface.co/docs/transformers/peft for more details