#### **1. 导入模块**

导入 Python 标准库和本项目自定义库

In [1]:
# 标准库
import os
import sys
from collections import defaultdict

# 将上级目录加入系统路径
# 以便导入项目自定义库
sys.path.append(os.path.abspath('..'))

# 自定义库
from src.annotator.pos_tagger import POSTagger

#### **2. 加载模型**

加载大模型词性标注 API 接口

In [2]:
# === 加载模型：LLM C7 赋码集 ===

# 语种：英语
lang = 'english'

# 赋码集：CLAWS7
# https://ucrel.lancs.ac.uk/claws7tags.html
tagset = 'claws'

# 标注模式：LLM API
mode = 'llm'

# 大模型：kimi-k2.5 | glm-5 | deepseek-v3.2 | qwen3-max
llm_models = ['kimi-k2.5', 'glm-5']

# 登录阿里云百炼平台：https://bailian.console.aliyun.com/
# 申请调用大模型服务的 API 账号
# 并在 llm_corpus_annotation/config 文件中设置 LLM_API_KEY=sk-********

llm_tagger = {}
for model in llm_models:
    llm_tagger[model] = POSTagger(
        lang=lang,
        tagset=tagset,
        mode=mode,
        llm_model=model,
    )
print('LLM CLAWS7 英文词性标注模型加载完毕！')

LLM CLAWS7 英文词性标注模型加载完毕！


#### **3. 词性标注**

使用大模型和 C7 Tagger 标注英语词性

In [3]:
# === 测试数据 ===

# 选自《鹿鼎记》英译本
#《The Deer and The Cauldron》（闵福德译）

en_text = """
'Poor creatures!' he murmured to himself.
Whiskers leaped up with a joyful cry, then almost immediately sank back into the hammock with a groan. 'Let's go!' he said to his bearers. 'Hurry!'
He was evidently affected by this little scene, for a groan escaped his lips and he appeared to be very close to tears.
I noticed some time ago that the soup they serve is in need of seasoning, so every day I've been getting out one of the little bottles from my medicine chest and tipping a little of the powder in the soup to give it a bit of flavour.
"""

In [4]:
# === C7 Tagger 标注结果 ===

# C7 Tagger 未提供本地模型
# 可访问在线平台：https://ucrel-api.lancaster.ac.uk/claws/free.html
# 将测试数据输入到文本框中，再复制标注结果

c7_raw = """
'Poor_NN1 creatures_NN2 !_! '_" he_PPHS1 murmured_VVD to_II himself_PPX1 ._. 
Whiskers_NN2 leaped_VVD up_RP with_IW a_AT1 joyful_JJ cry_NN1 ,_, then_RT almost_RR immediately_RR sank_VVD back_RP into_II the_AT hammock_NN1 with_IW a_AT1 groan_NN1 ._. 'Let_NN1 's_GE go_VV0 !_! '_" he_PPHS1 said_VVD to_II his_APPGE bearers_NN2 ._. 'Hurry_VV0 !_! '_"
He_PPHS1 was_VBDZ evidently_RR affected_VVN by_II this_DD1 little_JJ scene_NN1 ,_, for_IF a_AT1 groan_NN1 escaped_VVD his_APPGE lips_NN2 and_CC he_PPHS1 appeared_VVD to_TO be_VBI very_RG close_JJ to_II tears_NN2 ._.
I_PPIS1 noticed_VVD some_DD time_NNT1 ago_RA that_CST the_AT soup_NN1 they_PPHS2 serve_VV0 is_VBZ in_II31 need_II32 of_II33 seasoning_NN1 ,_, so_RR every_AT1 day_NNT1 I_PPIS1 've_VH0 been_VBN getting_VVG out_RP one_MC1 of_IO the_AT little_JJ bottles_NN2 from_II my_APPGE medicine_NN1 chest_NN1 and_CC tipping_VVG a_RR21 little_RR22 of_IO the_AT powder_NN1 in_II the_AT soup_NN1 to_TO give_VVI it_PPH1 a_AT1 bit_NN1 of_IO flavour_NN1 ._. 
"""

c7_annos = {}
for index, text in enumerate(c7_raw.strip().splitlines()):
    sent_id = f'{index+1:05d}'
    sent_tok, sent_pos = [], []
    for word in text.split():
       word_tok, word_pos = word.split('_')
       sent_tok.append(word_tok)
       sent_pos.append(word_pos)
    c7_annos[sent_id] = {
        'tok': sent_tok,
        'pos': sent_pos,
    }

In [5]:
# === 调用大模型 API：开始词性标注 ===

# --- 注意 ---
# 为节省 API 调用成本
# 大模型生成内容保存于本地缓存 data/llm_cache
# 完成首次调用后，再次调用只需从本地数据库读取生成结果
# 若需测试 API 连接是否正常
# 可更换测试数据，重新标注

print(f'=== 英文词性标注 ===\n')

# 逐行遍历所有数据
for index, text in enumerate(en_text.strip().splitlines()):
    print(f'[ID]: {index+1:05d}')
    print(text)
    print(f'{"-" * 60}')

     # 大模型词性标注
    for model in llm_models:
        print(f'[{model}]:')
        llm_anno = llm_tagger[model].tag(text.strip())
        llm_tags = [(tok, pos) for (tok, pos) in zip(llm_anno['tok'], llm_anno['pos'])]
        print(llm_tags)
        print(f'{'-' * 60}')

    # C7 Tagger 词性标注
    print(f'[c7_tagger]:')
    c7_anno = c7_annos[f'{index+1:05d}']
    c7_tags = [(tok, pos) for (tok, pos) in zip(c7_anno['tok'], c7_anno['pos'])]
    print(c7_tags)
    print(f'{'-' * 60}\n')

=== 英文词性标注 ===

[ID]: 00001
'Poor creatures!' he murmured to himself.
------------------------------------------------------------
[kimi-k2.5]:
[("'", "'"), ('Poor', 'JJ'), ('creatures', 'NN2'), ('!', '!'), ("'", "'"), ('he', 'PPHS1'), ('murmured', 'VVD'), ('to', 'II'), ('himself', 'PPX1'), ('.', '.')]
------------------------------------------------------------
[glm-5]:
[("'", "'"), ('Poor', 'JJ'), ('creatures', 'NN2'), ('!', '!'), ("'", "'"), ('he', 'PPHS1'), ('murmured', 'VVD'), ('to', 'II'), ('himself', 'PPX1'), ('.', '.')]
------------------------------------------------------------
[c7_tagger]:
[("'Poor", 'NN1'), ('creatures', 'NN2'), ('!', '!'), ("'", '"'), ('he', 'PPHS1'), ('murmured', 'VVD'), ('to', 'II'), ('himself', 'PPX1'), ('.', '.')]
------------------------------------------------------------

[ID]: 00002
Whiskers leaped up with a joyful cry, then almost immediately sank back into the hammock with a groan. 'Let's go!' he said to his bearers. 'Hurry!'
--------------------