实验Noisy Channel Model、Contextual Calibration、Domain-context Calibration的效果

实验数据集

* TNEWS
* NLPCC2014-task2

LLM

* Qwen-7B-Chat

In [None]:
from paddlenlp import Taskflow
from modelscope.utils.constant import Tasks
from modelscope import Model
from modelscope.pipelines import pipeline
from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig


MODEL_NAME = 'qwen-7b-chat' ## [qwen-7b-chat, siamese_uninlu, paddle_nlp]
DATASET = 'tnews' # [tnews, nlpcc2014_task2]

In [None]:
# 读取数据集
import json

train, val, test = [], [], []

if DATASET == 'tnews':
    with open('dataset/tnews/train.json', 'r', encoding='utf-8') as f:
        for line in f:
            line = json.loads(line.strip())
            label = line['label']
            text = line['sentence']
            train.append([text, label])
            
    with open('dataset/tnews/dev.json', 'r', encoding='utf-8') as f:
        for line in f:
            line = json.loads(line.strip())
            label = line['label']
            text = line['sentence']
            val.append([text, label])
            
    # 原数据集已随机打散过
    test = val[:int(len(val))//2]
    val = val[int(len(val)//2):]
    
    id2en = {}
    en2id = {}
    
    with open('dataset/tnews/labels.json', 'r', encoding='utf-8') as f:
        for line in f:
            line = json.loads(line.strip())
            id2en[line['label']] = line['label_desc']
            en2id[line['label_desc']] = line['label']
        
    
    en2zh = {
        'news_story': '故事',
        'news_culture': '文化新闻',
        'news_entertainment': '娱乐新闻',
        'news_sports': '体育新闻',
        'news_finance': '经济新闻',
        'news_house': '房地产新闻',
        'news_car': '汽车新闻',
        'news_edu': '教育新闻',
        'news_tech': '科技新闻',
        'news_military': '军事新闻',
        'news_travel': '旅游新闻',
        'news_world': '国际新闻',
        'news_stock': '股市新闻',
        'news_agriculture': '农业新闻',
        'news_game': '游戏新闻'
    }
    
    zh2en = {}
    for en_name in en2zh:
        zh2en[en2zh[en_name]] = en_name
        
    zh_label_name_list = sorted(list(zh2en.keys()))
    en_label_name_list = sorted(list(en2zh.keys()))
    
    print(f'{DATASET}数据集加载完成!')
    print(f'训练集: {len(train)}, 验证集: {len(val)}, 测试集: {len(test)}')

elif DATASET == 'nlpcc2014_task2':
    
    id2en = {
        0: 'negative',
        1: 'positive'
    }
    
    en2id = {}
    for label_id in id2en:
        en2id[id2en[label_id]] = label_id
    
    with open('dataset/NLPCC2014_task2/train.json', 'r', encoding='utf-8') as f:
        for line in f:
            line = json.loads(line.strip())
            text, label = line['review'], en2id[line['label']]
            train.append([text, label])
    with open('dataset/NLPCC2014_task2/val.json', 'r', encoding='utf-8') as f:
        for line in f:
            line = json.loads(line.strip())
            text, label = line['review'], en2id[line['label']]
            val.append([text, label])
    with open('dataset/NLPCC2014_task2/test.json', 'r', encoding='utf-8') as f:
        for line in f:
            line = json.loads(line.strip())
            text, label = line['review'], en2id[line['label']]
            test.append([text, label])
    
    en2zh = {
        'positive': '正面情绪',
        'negative': '负面情绪'
    }
    
    zh2en = {}
    for en_name in en2zh:
        zh2en[en2zh[en_name]] = en_name
        
    zh_label_name_list = sorted(list(zh2en.keys()))
    en_label_name_list = sorted(list(en2zh.keys()))
    
    print(f'{DATASET}数据集加载完成!')
    print(f'训练集: {len(train)}, 验证集: {len(val)}, 测试集: {len(test)}')

In [None]:
if MODEL_NAME == 'qwen-7b-chat':
    # 设置生成参数直接设置model.generation_config
    tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision = 'v1.0.5',trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision = 'v1.0.5',device_map="auto", trust_remote_code=True,fp16 = True).eval()
    model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat",revision = 'v1.0.5', trust_remote_code=True) 
    
    print(f'{MODEL_NAME}模型加载成功')
    def get_llm_result(prompt, mode):
        if mode == 'greedy_decode':
            model.generation_config.do_sample = False
        elif mode == 'sample':
            model.generation_config.do_sample = True
        response, history = model.chat(tokenizer, prompt, history=None)
        return response
    
elif MODEL_NAME == 'siamese_uninlu':
    semantic_cls = pipeline(Tasks.siamese_uie, 'damo/nlp_structbert_siamese-uninlu_chinese-base', model_revision='v1.0')
    print(f'{MODEL_NAME}模型加载完成！')
    
    if DATASET in ['tnews', 'nlpcc2014_task2']:
        schema = {'分类': None}

    def get_siamase_result(text, label_names):
        res = semantic_cls(input=','.join(label_names)+'|'+text, schema = schema)['output']
        if len(res) > 0:
            return res[0][0]['span']
        else:
            return ''
        
elif MODEL_NAME == 'paddle_nlp':
    if DATASET in ['tnews', 'nlpcc2014_task2']:
        schema = zh_label_name_list
        model = Taskflow("zero_shot_text_classification", schema=schema)
        print(f'{MODEL_NAME}模型加载完成！')
    
    def get_paddle_result(text):
        res = model(text)
        if len(res) > 0:
            if DATASET in ['tnews', 'nlpcc2014_task2']:
                if len(res[0]['predictions']) > 0:
                    return res[0]['predictions'][0]['label']
        
        return ''

In [4]:
# 为实现拿到output prob所需要的utils
# 根据根据input text构建模型需要的input格式。本质是自己实现预处理+forward，通过查看modelscope中，每个模型text_generation逻辑来实现

# Qwen
import torch
import numpy as np
from transformers import PreTrainedTokenizer
from typing import Iterable, List, Tuple, Union

def make_context_qwen(
    tokenizer: PreTrainedTokenizer,
    query: str,
    history: List[Tuple[str, str]] = None,
    system: str = '',
    max_window_size: int = 6144,
    chat_format: str = 'chatml',
):
    if history is None:
        history = []

    if chat_format == 'chatml':
        im_start, im_end = '<|im_start|>', '<|im_end|>'
        im_start_tokens = [tokenizer.im_start_id]
        im_end_tokens = [tokenizer.im_end_id]
        nl_tokens = tokenizer.encode('\n')

        def _tokenize_str(role, content):
            return f'{role}\n{content}', tokenizer.encode(
                role) + nl_tokens + tokenizer.encode(content)

        system_text, system_tokens_part = _tokenize_str('system', system)
        system_tokens = im_start_tokens + system_tokens_part + im_end_tokens

        raw_text = ''
        context_tokens = []

        for turn_query, turn_response in reversed(history):
            query_text, query_tokens_part = _tokenize_str('user', turn_query)
            query_tokens = im_start_tokens + query_tokens_part + im_end_tokens
            response_text, response_tokens_part = _tokenize_str(
                'assistant', turn_response)
            response_tokens = im_start_tokens + response_tokens_part + im_end_tokens

            next_context_tokens = nl_tokens + query_tokens + nl_tokens + response_tokens
            prev_chat = (
                f'\n{im_start}{query_text}{im_end}\n{im_start}{response_text}{im_end}'
            )

            current_context_size = (
                len(system_tokens) + len(next_context_tokens)
                + len(context_tokens))
            if current_context_size < max_window_size:
                context_tokens = next_context_tokens + context_tokens
                raw_text = prev_chat + raw_text
            else:
                break

        context_tokens = system_tokens + context_tokens
        raw_text = f'{im_start}{system_text}{im_end}' + raw_text
        context_tokens += (
            nl_tokens + im_start_tokens + _tokenize_str('user', query)[1]
            + im_end_tokens + nl_tokens + im_start_tokens
            + tokenizer.encode('assistant') + nl_tokens)
        raw_text += f'\n{im_start}user\n{query}{im_end}\n{im_start}assistant\n'

    elif chat_format == 'raw':
        raw_text = query
        context_tokens = tokenizer.encode(raw_text)
    else:
        raise NotImplementedError(f'Unknown chat format {chat_format!r}')

    return raw_text, context_tokens

def concat_answer_qwen(raw_text, context_tokens, answer):
    raw_text += answer
    context_tokens += tokenizer.encode(answer)
    
    return raw_text, context_tokens

def get_stop_words_ids_qwen(chat_format, tokenizer):
    if chat_format == 'raw':
        stop_words_ids = [tokenizer.encode('Human:'), [tokenizer.eod_id]]
    elif chat_format == 'chatml':
        stop_words_ids = [[tokenizer.im_end_id], [tokenizer.im_start_id]]
    else:
        raise NotImplementedError(f'Unknown chat format {chat_format!r}')
    return stop_words_ids

def get_answer_prob_qwen(prompt, answer):
    # 得到问题部分的prob，通过perplexity计算得到
    # 这部分代码参考https://github.com/tonyzhaozh/few-shot-learning

    assert answer != None and len(answer) > 0

    prompt = [prompt]
    
    raw_text, context_tokens = make_context_qwen(
                tokenizer,
                query=prompt[0],
                history=[],
                system='You are a helpful assistant.',
                max_window_size=6144,
                chat_format=model.generation_config.chat_format)
    stop_words_ids = get_stop_words_ids_qwen(model.generation_config.chat_format, tokenizer)
    
    answer_start = len(context_tokens)
    raw_text, context_tokens = concat_answer_qwen(raw_text, context_tokens, answer)
    answer_end = len(context_tokens)
    answer_len = answer_end - answer_start
    
    input_ids = torch.tensor([context_tokens]).to('cuda:0')

    # 不等号右侧本来应该为pad id，但qwen没有设置pad token
    attention_mask = (input_ids != -1).float()
    input_for_gen = model.prepare_inputs_for_generation(input_ids, attention_mask=attention_mask)
    
    logits = model.forward(input_ids=input_for_gen['input_ids'], attention_mask=input_for_gen['attention_mask'], position_ids=input_for_gen['position_ids'], return_dict=True).logits.detach()
    probs = torch.softmax(logits, dim=2)
        
    logprobs = torch.log(probs)

    return_json = {}
    choices = []
    for batch_id in range(len(prompt)):
        curr_json = {}
        # text is just the optional context and next l tokens
        curr_json['text'] = tokenizer.decode(input_ids[batch_id], skip_special_tokens=True)

        curr_json['logprobs'] = {}
        curr_json['logprobs']['token_logprobs'] = []
        curr_json['logprobs']['tokens'] = []
        for index in range(len(probs[batch_id])):
            curr_json['logprobs']['tokens'].append(tokenizer.decode([input_ids[batch_id][index]]))
        curr_json['logprobs']['token_logprobs'].append('null')
        for index, log_probs_token_position_j in enumerate(logprobs[batch_id][:-1]):
            # probs are left shifted for LMs 
            curr_json['logprobs']['token_logprobs'].append(log_probs_token_position_j[input_ids[batch_id][index+1]])
                
        choices.append(curr_json)

    return_json['choices'] = choices
    
    # 得到answer部分的perplexity
    answer_logprob = 0.0
    for answer_index in range(answer_start, answer_end):
        answer_logprob += choices[0]['logprobs']['token_logprobs'][answer_index].cpu()
    answer_logprob /= answer_len # length-normalize
    answer_prob = np.exp(answer_logprob)
                                  
    return return_json, answer_prob.numpy()


In [5]:
# prompt
def get_prompt(input_text, label_name_list, few_shot_data = []):
    if len(few_shot_data) > 0:
        example = ''
        for d in few_shot_data:
            text, label = d[0], d[1]
            example += f"文本:'{text}'\n"
            example += f'这段文本所属标签：{label}\n'
        
        prompt = f'''
        给定一段文本，输出一个分类标签。
        分类标签集合：[{','.join(label_name_list)}]
        请直接输出结果，不要附带其他内容。
        
        {example}
        文本：'{input_text}'
        这段文本所属标签：
        '''
    else:
        prompt = f'''
        给定一段文本，输出一个分类标签。
        分类标签集合：[{','.join(label_name_list)}]
        请直接输出结果，不要附带其他内容。
        
        文本：'{input_text}'
        这段文本所属标签：
        '''
        
    return prompt

def get_reverse_prompt(label_name, few_shot_data = []):
    if len(few_shot_data) > 0:
        example = ''
        for d in few_shot_data:
            text, label = d[0], d[1]
            example += f'标签：{label}\n'
            example += f"生成文本：{text}\n"

        prompt = f'''
        给定一个分类标签，生成一段文本。
        请直接输出结果，不要附带其他内容。

        {example}
        标签：{label_name}
        生成文本：
        '''
    else:
        prompt = f'''
        给定一个分类标签，生成一段文本。
        请直接输出内容，不要附带其他内容。

        标签：{label_name}
        生成文本：
        '''
    
    return prompt

In [6]:
# decode生成的utils
import re 

def post_process_llm_cls(llm_out, label_name_list):
    # 在分类任务中，对llm out进行后处理，提升coverage
    post_process_pat = re.compile(r'(' + r'|'.join(label_name_list) + r')')
    # 针对多生成问题
    find_names = post_process_pat.findall(llm_out)
    if len(find_names) > 0:
        # 若有多个结果，默认取第一个
        return find_names[0]
    else:
        return ''
    
def post_process_siamase_cls(out, label_name_list):
    # 在分类任务中，对siamase out进行后处理，提升coverage
    for label_name in label_name_list:
        if out in label_name:
            # 针对片段式抽取问题
            return label_name
    
    return ''
    
def siamase_annotator_cls(text, label_name_list, print_log = False):
    out = get_siamase_result(text, label_name_list)
    if print_log:
        print('out:', out)
        
    if len(out) > 0 and out not in label_name_list:
        out = post_process_siamase_cls(out, label_name_list)
        if print_log:
            print('post-process out:', out)
    
    if len(out) > 0 and out in label_name_list:
        return out
    else:
        return ''
    
def paddle_annotator_cls(text, label_name_list, print_log = False):
    out = get_paddle_result(text)
    if print_log:
        print('out:', out)
        
    if len(out) > 0 and out in label_name_list:
        return out
    else:
        return ''

In [7]:
# 模型标注方法
def llm_annotator_generative_cls(input_text, mode, label_name_list, few_shot_data = [], self_consistency_num = 1, print_log=False):
    
    prompt = get_prompt(input_text, label_name_list, few_shot_data)
    
    result = {}
    
    for i in range(self_consistency_num):
        llm_out = get_llm_result(prompt, mode)
        if print_log:
            print('prompt:', prompt)
            print('llm_out:', llm_out)
            
            
        if llm_out not in label_name_list:
            # 增加后处理
            llm_out = post_process_llm_cls(llm_out, label_name_list)
            if print_log:
                print('post-process llm_out:', llm_out)
        
        if llm_out in label_name_list:
            if llm_out not in result:
                result[llm_out] = 1
            else:
                result[llm_out] += 1
                  
    if len(result) > 0:
        if print_log:
            print('self-consistency result:', result)
            print('='*30)
            print()
        result = sorted(result.items(), key = lambda x: x[1], reverse=True)
        return result[0][0]
    else:
        if print_log:
            print('self-consistency result:', result)
            print('='*30)
            print()
        return ''
    
def llm_annotator_channel(input_text, label_name_list, few_shot_data = [], print_log=False):
    # noisy channel方法用来标注
    # 用p(x|y)来选择y
    
    if print_log:
        print(f'input_text:{input_text}')
        print()
    
    label_probs = []
    for label_name in label_name_list:
        prompt = get_reverse_prompt(label_name, few_shot_data)
        
        _, x_prob = get_answer_prob_qwen(prompt, input_text)
        label_probs.append(x_prob)
        
        if print_log:
            print(f'label_name:{label_name}, label_prob:{x_prob}')
        
    if print_log:
        print(f'pred_label_name:{label_name_list[np.argmax(label_probs)]}, pred_label_prob:{label_probs[np.argmax(label_probs)]}')
        print('='*30)
        print()
    
    return label_name_list[np.argmax(label_probs)]


def llm_annotator_probmodel(input_text, label_name_list, few_shot_data = [], print_log=False):
    # 从概率的视角，进行标注
    
    if print_log:
        print(f'input_text:{input_text}')
        print()
        
    label_probs = []
    
    for label_name in label_name_list:
        prompt = get_prompt(input_text, label_name_list, few_shot_data)
    
        _, label_prob = get_answer_prob_qwen(prompt, label_name)
        label_probs.append(label_prob)
        
        if print_log:
            print(f'label_name:{label_name}, label_prob:{label_prob}')
        
    if print_log:
        print(f'pred_label_name:{label_name_list[np.argmax(label_probs)]}, pred_label_prob:{label_probs[np.argmax(label_probs)]}')
        print('='*30)
        print()
    
    return label_probs, label_name_list[np.argmax(label_probs)]

def get_p_calibrate_CC(label_name_list, few_shot_data = [], content_free_tokens = [''], print_log = False):
    # CC方法中，计算p calibrate的逻辑，对于固定的prompt（非input text部分），只需要计算一次
    all_p_calibrate = []
    for content_free_token in content_free_tokens:
        probs, _ = llm_annotator_probmodel(content_free_token, label_name_list, few_shot_data, print_log)
        all_p_calibrate.append(probs)
        
    p_calibrate = np.mean(np.array(all_p_calibrate), axis=0)
    p_calibrate = p_calibrate / np.sum(p_calibrate) # normalize
    
    return p_calibrate

def get_p_calibrate_DC(label_name_list, dc_len, dc_times, domain_words, few_shot_data = [], print_log = False):
    # DC方法中，计算p calibrate的逻辑：从domain words中随机选dc_len个词，重复dc_times次
    all_p_calibrate = []
    for i in range(dc_times):
        dc_words = np.random.choice(domain_words, dc_len, replace = False)
        if print_log:
            print(''.join(dc_words))
        probs, _ = llm_annotator_probmodel(''.join(dc_words), label_name_list, few_shot_data, print_log = False)
        all_p_calibrate.append(probs)
        
    p_calibrate = np.mean(np.array(all_p_calibrate), axis=0)
    p_calibrate = p_calibrate / np.sum(p_calibrate) # normalize
    
    return p_calibrate

def llm_annotator_calibrate(input_text, p_calibrate, label_name_list, few_shot_data = [], print_log=False):
    # calibrate方法
    if print_log:
        print(f'input_text:{input_text}')
        print()
        
    probs, _ = llm_annotator_probmodel(input_text, label_name_list, few_shot_data, print_log=False)
    num_classes = len(label_name_list)
    
    # diagonal_W，即除法
    W = np.linalg.inv(np.identity(num_classes) * p_calibrate)
    b = np.zeros([num_classes, 1])
    
    probs = probs / np.sum(probs) # normalize
    calibrate_label_probs = np.matmul(W, np.expand_dims(probs, axis=-1)) + b
    
    if print_log:
        print('calibrate之前')
        print(probs)
        print(label_name_list[np.argmax(probs)])
        print('calibrate之后')
        print(calibrate_label_probs)
        print(label_name_list[np.argmax(calibrate_label_probs)])
    
    return calibrate_label_probs, label_name_list[np.argmax(calibrate_label_probs)]
        

In [8]:
# 标注器
import numpy as np
from tqdm import tqdm

def get_few_shot_example(clean_set, mode = 'random', num = 3):
    few_shots = []
    
    if mode == 'random':
        # clean set：【[text, label_name]]
        clean_indexs = range(len(clean_set))
        few_shot_indexs = np.random.choice(clean_indexs, size = num, replace=False)
        for index in few_shot_indexs:
            few_shots.append(clean_set[index])
        
    elif mode == 'class-balance-fixed':
        # 按照类别抽，每个class的样本一样，fixed说明不随机抽sample，按顺序抽
        # clean set: {'label_name':[text]}
        for label_name in sorted(clean_set.keys()):
            samples = clean_set[label_name][:num]
            for sample in samples:
                few_shots.append([sample, label_name])
            
    return few_shots

def annotator(x, mode, p_calibrate, zh_label_name_list, few_shot_examples, print_log):
    # input：x，output：y（id）
    y_pred = ''
    if MODEL_NAME in ['qwen-7b-chat']:
        
        if mode == 'channel':
            out = llm_annotator_channel(x, zh_label_name_list, few_shot_data=few_shot_examples, print_log = print_log)
        elif mode == 'prob_model':
            _, out = llm_annotator_probmodel(x, zh_label_name_list, few_shot_data=few_shot_examples, print_log = print_log)
        elif 'calibrate' in mode:
            _, out = llm_annotator_calibrate(x, p_calibrate, zh_label_name_list, few_shot_data = few_shot_examples, print_log = print_log)
        elif mode in['greedy_decode', 'sample']:
            out = llm_annotator_generative_cls(x, mode, zh_label_name_list, few_shot_data = few_shot_examples, self_consistency_num = self_consistency_num, print_log=print_log)

    elif MODEL_NAME in ['siamese_uninlu']:
        out = siamase_annotator_cls(x, zh_label_name_list, print_log=print_log)
    elif MODEL_NAME in ['paddle_nlp']:
        out = paddle_annotator_cls(x, zh_label_name_list, print_log=print_log)
            
    if len(out) > 0:
        if DATASET in ['tnews', 'nlpcc2014_task2']:
            y_pred = en2id[zh2en[out]]
            
    return y_pred

In [None]:
# 调参
from sklearn.metrics import accuracy_score
import jieba

# evaluate通用设置
do_eval = True
print_log = False
runs = 5
sample_max_length = 256 # 单input进行right-truncate，防止超显存

# FSL设置
use_fsl = True
fsl_example_num = 4 # random时，指总sample数；class时，指每类sample数
fsl_example_mode = 'random' # [random, class-balance-fixed]

# 标注模式
mode = 'prob_model' #[greedy_decode, sample, channel, prob_model, calibrate_cc, calibrate_dc],后4种目前只支持qwen
p_calibrate = None

# CC的参数
content_free_tokens = ['', '空', '无']

# DC的参数
domain_words = []
dc_len = 0
dc_times = 20

# sample的参数
self_consistency_num = 1

def evaluate(test, sample_max_length, clean_pool, mode, p_calibrate, zh_label_name_list, few_shot_examples, print_log):
    y_truth_list, y_pred_list = [], []
        
    for sample_i in tqdm(range(len(test))):
        sample = test[sample_i]
        x, y_truth = sample[0], sample[1]
        if print_log:
            print('y_truth:', en2zh[id2en[y_truth]])
        y_pred = annotator(x[:sample_max_length], mode, p_calibrate, zh_label_name_list, few_shot_examples, print_log)
        if isinstance(y_pred, int) or (isinstance(y_pred, str) and len(y_pred) > 0):
            y_truth_list.append(y_truth)
            y_pred_list.append(y_pred)
            
            if (y_truth != y_pred) and print_log:
                print('预测错误')

    accuracy = accuracy_score(np.array(y_truth_list), np.array(y_pred_list))
    
    print(f'coverage:{len(y_pred_list) / len(test)}')
    print(f'accuracy:{accuracy}')

if do_eval:
    if DATASET in ['tnews']:
        evaluation_set = train[:200]
        
        dc_lens = []
        for d in evaluation_set:
            d_words = jieba.lcut(d[0], HMM=False)
            dc_lens.append(len(d_words))
            domain_words += d_words
                    
        dc_len = int(sum(dc_lens) / len(dc_lens))

    elif DATASET in ['nlpcc2014_task2']:
        evaluation_set = train[:300]
        
        dc_lens = []
        for d in evaluation_set:
            d_words = jieba.lcut(d[0], HMM=False)
            dc_lens.append(len(d_words))
            domain_words += d_words
                    
        dc_len = int(sum(dc_lens) / len(dc_lens))
        
    for i in range(runs):
                
        if fsl_example_mode == 'random':
            clean_pool = []
            for d in val[:100]:
                clean_pool.append([d[0], en2zh[id2en[d[1]]]])
        elif 'class-balance' in fsl_example_mode:
            clean_pool = {}
            for d in val:
                text, label_name = d[0], en2zh[id2en[d[1]]]
                if label_name not in clean_pool:
                    clean_pool[label_name] = [text]
                else:
                    clean_pool[label_name].append(text)    
                
        if use_fsl:
            few_shot_examples = get_few_shot_example(clean_pool, mode = fsl_example_mode, num = fsl_example_num)
        else:
            few_shot_examples = []

        if mode == 'calibrate_cc':
            p_calibrate = get_p_calibrate_CC(zh_label_name_list, few_shot_data = few_shot_examples, content_free_tokens = content_free_tokens, print_log = print_log)
            if print_log:
                print('p_calibrate')
                for p, n in zip(p_calibrate, zh_label_name_list):
                    print(p, n)    
        elif mode == 'calibrate_dc':
            p_calibrate = get_p_calibrate_DC(zh_label_name_list, dc_len, dc_times, domain_words, few_shot_data = few_shot_examples, print_log = print_log)
            if print_log:
                print('p_calibrate')
                for p, n in zip(p_calibrate, zh_label_name_list):
                    print(p, n)    
                    
        evaluate(evaluation_set, sample_max_length, clean_pool, mode, p_calibrate, zh_label_name_list, few_shot_examples, print_log)
        print()
        