## 1.   Load pretrained model and tokenizer

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PromptEncoderConfig, get_peft_model, TaskType

tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-1_8b", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-1_8b", device_map="cuda",trust_remote_code=True, torch_dtype=torch.float16)
# model = model.eval()
config = PromptEncoderConfig(
    peft_type="P_TUNING",
    task_type=TaskType.CAUSAL_LM,
    num_virtual_tokens=10,
    token_dim=2048,
    # num_transformer_submodules=1,
    # num_attention_heads=12,
    # num_layers=12,
    encoder_reparameterization_type="MLP",
    encoder_hidden_size=2048,
)

model = get_peft_model(model, config)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## 2.   Prepare dataset

In [2]:
import pandas as pd

# DataFrame to Json
df_short_ans = pd.read_excel('./data/2024-02-28-公告测评集.xls', header=0)
df_short_ans['answer'] = df_short_ans['answer'].astype(str)
# df_short_ans.to_json('./data/data_short_ans_train.json',orient='records')

df_choice = pd.read_excel('./data/2024-02-28-公告测评集-选项.xls', header=0)
df_choice['prompt'] = ''
for i, row in df_choice.iterrows():
    prompt = '''下面是几条对上述公告相关的理解或问题答案，请选出一条或多条理解到位、答案准确的选项。\n\n'''
    question = row['评测问题'] + '\n\n'
    choice = (
        "选项A:" + str(row['选项A']), "选项B:" + str(row['选项B']), "选项C:" + str(row['选项C']), "选项D:" + str(row['选项D'])
    )
    choice_str = "\n".join(choice)
    df_choice.loc[i, 'question'] = question + prompt + choice_str

df_choice_2 = df_choice[['领域分类', 'question', '答案']].copy()
df_choice_2.rename(columns={'领域分类': 'type', '答案': 'answer'}, inplace=True)
# df_choice_2.to_json('./data/data_choice_train.json',orient='records')

df_combine = pd.concat([df_short_ans, df_choice_2], axis=0, ignore_index=True)
df_combine.to_json('./data/data_combine_train.json',orient='records')

## 3.   Load dataset and encode

In [3]:
import pandas as pd
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq, DataCollatorForLanguageModeling
from typing import Dict
from datasets import Dataset, load_dataset
import numpy as np


def encode_fn(text, tokenizer, max_length,return_attention_mask=False):
    return tokenizer(text, max_length=max_length, padding="max_length", truncation=True,return_attention_mask=return_attention_mask)


def get_dataset(file: str, split: str, encode_fn: callable, encode_args: dict,  cache_dir: str='.cache') -> Dataset:
    """
    Load a dataset
    """
    eos_token = tokenizer.eos_token
    dataset = load_dataset('json', data_files=file, split=split, cache_dir=cache_dir)
    def merge_prompt_and_responses(sample: dict):
        # add an eos token note that end of sentence, using in generate.
        # encoded_prompt = tokenizer([e + eos_token for e in sample['question']], truncation=False, padding=True, return_attention_mask=True)
        # encoded_response = tokenizer([e + eos_token for e in sample['answer']], truncation=False, padding=True, return_attention_mask=False)
        encoded_prompt = tokenizer(sample['question'] + eos_token, truncation=False, padding=True, return_attention_mask=True)
        encoded_response = tokenizer(sample['answer'] + eos_token, truncation=False, padding=True, return_attention_mask=False)
        encoded_q_type = tokenizer(sample['type'] + eos_token, truncation=False, padding=True, return_attention_mask=True)
        # input_ids = [np.array(item + [eos_token_id], dtype=np.uint32) for item in encoded_prompt["input_ids"]]
        # labels = [np.array(item + [eos_token_id], dtype=np.uint32) for item in encoded_response["input_ids"]]
        # prompt = encode_fn(sample['question'] + '[EOS]', return_attention_mask=True)
        # answer = encode_fn(sample['answer'] + '[EOS]', return_attention_mask=False)
        # title = encode_fn(sample['title'] + '[EOS]', **encode_args)
        # print(type(encoded_prompt.input_ids),'\n',type(encoded_prompt.attention_mask),'\n',labels)
        return {
            'input_ids': encoded_prompt.input_ids,
            'q_type': encoded_q_type.input_ids,
            'labels': encoded_response.input_ids,       
            'attention_mask': encoded_prompt.attention_mask,
            'q_type_attention_mask' : encoded_q_type.attention_mask,
        }

    # dataset = dataset.map(merge_prompt_and_responses, batched=True, batch_size=1)
    dataset = dataset.map(merge_prompt_and_responses)
    return dataset

dataset = get_dataset(
    file='./data/data_combine_train.json', 
    split="train", 
    encode_fn=encode_fn, 
    encode_args={"tokenizer": tokenizer, "max_length": 1024}, 
    cache_dir=".cache"
)


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

## 4.   Train

In [4]:
import time

args = TrainingArguments(
    output_dir='./train_result/',
    overwrite_output_dir=True,
    per_device_train_batch_size=1,
    auto_find_batch_size=True,  # 防止OOM
    gradient_accumulation_steps=10,
    learning_rate=1e-3,
    logging_steps=10,
    num_train_epochs=10,
    log_level='info',
    save_steps=10,
    save_total_limit=20,
    # fp16=config.fp16,
    # logging_first_step=config.logging_first_step,
    warmup_steps=50,
    seed=42,
)

# trainer
trainer = Trainer(
    model = model,
    args = args,
    train_dataset = dataset,
    data_collator = DataCollatorForLanguageModeling(tokenizer = tokenizer, mlm=False)
    # data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer)
)

# 训练模型
trainer.train()

loss_log = pd.DataFrame(trainer.state.log_history)
loss_log.to_csv(f"{'./logs'}/p_tune_train_log_{time.strftime('%Y%m%d-%H%M')}.csv")

# trainer.save_model('./trained_model')

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.


The following columns in the training set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: answer, question, q_type_attention_mask, type, q_type. If answer, question, q_type_attention_mask, type, q_type are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 200


  Num Epochs = 10


  Instantaneous batch size per device = 1


  Total train batch size (w. parallel, distributed & accumulation) = 10


  Gradient Accumulation steps = 10


  Total optimization steps = 200


  Number of trainable parameters = 12,609,536


Step,Training Loss
10,2.0283
20,1.8825
30,1.7828
40,1.6358
50,1.6084
60,1.5577
70,1.5459
80,1.5233
90,1.4859
100,1.4426


Checkpoint destination directory ./train_result/checkpoint-10 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-10


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-20 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-20


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-30 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-30


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-40 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-40


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-50 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-50


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-60 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-60


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-70 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-70


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-80 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-80


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-90 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-90


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-100


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-110 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-110


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-120 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-120


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-130 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-130


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-140 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-140


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-150 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-150


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-160 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-160


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-170 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-170


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-180 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-180


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-190 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-190


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

Checkpoint destination directory ./train_result/checkpoint-200 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Saving model checkpoint to ./train_result/checkpoint-200


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size



Training completed. Do not forget to share your model on huggingface.co/models =)




In [5]:
model.save_pretrained('./trained_model')

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.38.2",
  "use_cache": true,
  "vocab_size

In [6]:
print(model)

PeftModelForCausalLM(
  (base_model): InternLM2ForCausalLM(
    (model): InternLM2Model(
      (tok_embeddings): Embedding(92544, 2048, padding_idx=2)
      (layers): ModuleList(
        (0-23): 24 x InternLM2DecoderLayer(
          (attention): InternLM2Attention(
            (wqkv): Linear(in_features=2048, out_features=4096, bias=False)
            (wo): Linear(in_features=2048, out_features=2048, bias=False)
            (rotary_emb): InternLM2DynamicNTKScalingRotaryEmbedding()
          )
          (feed_forward): InternLM2MLP(
            (w1): Linear(in_features=2048, out_features=8192, bias=False)
            (w3): Linear(in_features=2048, out_features=8192, bias=False)
            (w2): Linear(in_features=8192, out_features=2048, bias=False)
            (act_fn): SiLU()
          )
          (attention_norm): InternLM2RMSNorm()
          (ffn_norm): InternLM2RMSNorm()
        )
      )
      (norm): InternLM2RMSNorm()
    )
    (output): Linear(in_features=2048, out_features=92

## 5.   Evaluation

In [7]:
from peft import PeftConfig, PeftModelForCausalLM
peft_config = PeftConfig.from_pretrained('./trained_model/')
model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-1_8b", device_map="cuda",trust_remote_code=True, torch_dtype=torch.float16)
model = PeftModelForCausalLM.from_pretrained(model, './trained_model', config=peft_config)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/config.json


Model config InternLM2Config {
  "_name_or_path": "internlm/internlm2-chat-1_8b",
  "architectures": [
    "InternLM2ForCausalLM"
  ],
  "attn_implementation": "eager",
  "auto_map": {
    "AutoConfig": "internlm/internlm2-chat-1_8b--configuration_internlm2.InternLM2Config",
    "AutoModel": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM",
    "AutoModelForCausalLM": "internlm/internlm2-chat-1_8b--modeling_internlm2.InternLM2ForCausalLM"
  },
  "bias": false,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 32768,
  "model_type": "internlm2",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 2.0,
    "type": "dynamic"
  },
  "rope_theta": 1000000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_ver

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/model.safetensors.index.json


Instantiating InternLM2ForCausalLM model under default dtype torch.float16.


Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 2
}



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing InternLM2ForCausalLM.



All the weights of InternLM2ForCausalLM were initialized from the model checkpoint at internlm/internlm2-chat-1_8b.
If your task is similar to the task the model of the checkpoint was trained on, you can already use InternLM2ForCausalLM for predictions without further training.


loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--internlm--internlm2-chat-1_8b/snapshots/f842a52ed9579d662d5e67c8c0f8c67b3eb877a8/generation_config.json


Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 2
}



In [8]:
print(model)

PeftModelForCausalLM(
  (base_model): InternLM2ForCausalLM(
    (model): InternLM2Model(
      (tok_embeddings): Embedding(92544, 2048, padding_idx=2)
      (layers): ModuleList(
        (0-23): 24 x InternLM2DecoderLayer(
          (attention): InternLM2Attention(
            (wqkv): Linear(in_features=2048, out_features=4096, bias=False)
            (wo): Linear(in_features=2048, out_features=2048, bias=False)
            (rotary_emb): InternLM2DynamicNTKScalingRotaryEmbedding()
          )
          (feed_forward): InternLM2MLP(
            (w1): Linear(in_features=2048, out_features=8192, bias=False)
            (w3): Linear(in_features=2048, out_features=8192, bias=False)
            (w2): Linear(in_features=8192, out_features=2048, bias=False)
            (act_fn): SiLU()
          )
          (attention_norm): InternLM2RMSNorm()
          (ffn_norm): InternLM2RMSNorm()
        )
      )
      (norm): InternLM2RMSNorm()
    )
    (output): Linear(in_features=2048, out_features=92