In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
from dotenv import load_dotenv
load_dotenv()

import lightning as L
import pandas as pd

from tqdm.auto import tqdm
from lightning.pytorch.callbacks import ModelCheckpoint

from src.model.modeling_bind import LitBIND
from src.model.modeling_bindc import LitBINDC
from src.data.dataset import get_train_dataloader, get_dev_dataloader, get_test_dataloader

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
SEED=42
DATASET_NAME = 'jwengr/C-LLM'
MINI_BATCH_SIZE=16
N_BATCH = 2
BASE_MODEL_NAME='Qwen/Qwen3-4B-Base'
EPOCHS=10
LEARNING_RATE = 1e-4
USE_BNTD=True
TRAIN_MAX_LENGTH=128
VALID_MAX_LENGTH=128
INFERENCE_SENTENCE_MAX_LENGTH=64
INFERENCE_SENTENCE_MIN_LENGTH=32
INFERENCE_SENTENCE_N_OVERLAP=3


In [4]:
L.seed_everything(SEED)

Seed set to 42


42

In [5]:
train_dl = get_train_dataloader(DATASET_NAME, batch_size=MINI_BATCH_SIZE, max_length=TRAIN_MAX_LENGTH)
dev_dl = get_dev_dataloader(DATASET_NAME, batch_size=MINI_BATCH_SIZE, max_length=VALID_MAX_LENGTH)
test_dl = get_test_dataloader(DATASET_NAME, batch_size=MINI_BATCH_SIZE)

In [6]:
lit_bind = LitBIND.load_from_checkpoint(
    'checkpoints/bind/C-LLM-Qwen3-4B-Base-epoch=00-valid_loss=0.0134.ckpt',
    base_model_name=BASE_MODEL_NAME,
    lr=LEARNING_RATE,
    epochs=EPOCHS,
    use_bntd=USE_BNTD,
    inference_sentence_max_length=INFERENCE_SENTENCE_MAX_LENGTH,
    inference_sentence_min_length=INFERENCE_SENTENCE_MIN_LENGTH,
    inference_sentence_n_overlap=INFERENCE_SENTENCE_N_OVERLAP,
    target_chars='hanzi'
)

Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.21s/it]


use full attn qwen3
Using hanzi chars as target chars.


In [7]:
trainer = L.Trainer()

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [8]:
preds = trainer.predict(lit_bind, test_dl)

You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/jjw1214/.conda/envs/jjw1214_py312/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:433: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=127` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 1671/1671 [46:44<00:00,  0.60it/s]


In [9]:
prediction = []

for pred in tqdm(preds):
    prediction.extend(pred)

100%|██████████| 1671/1671 [00:00<00:00, 750394.22it/s]


In [10]:
categories = []
inputs = []
true = []
for batch in test_dl:
    true.extend(batch['sentence'])
    inputs.extend(batch['sentence_noisy'])
    categories.extend(batch['category'])

In [11]:
result_df = pd.DataFrame()

In [12]:
result_df['input'] = inputs
result_df['pred'] = prediction
result_df['true'] = true
result_df['category'] = categories

In [13]:
result_df.to_csv('results/cllm-bind-tgt-hanzi-qwen3-4b.csv', index=False)

In [15]:
import pandas as pd
from src.metrics.metric import calculate_metric

In [21]:
result_df = pd.read_csv('results/cllm-bind-tgt-hanzi-qwen3-17b.csv')

In [19]:
set(result_df['category'])

{'car', 'cot', 'cscd', 'enc', 'gam', 'mec', 'new', 'nov'}

In [None]:
#0.6b
for cat in set(result_df['category']):
    cat_df = result_df[result_df['category']==cat].copy()
    result, result_list = calculate_metric(cat_df['input'].tolist(), cat_df['true'].tolist(), cat_df['pred'].tolist())
    print(cat, result['C_C_f1'])

cscd 45.709
new 22.213
nov 13.162
gam 14.634
mec 10.534
enc 28.783
car 23.197
cot 25.667


In [23]:
#1.7b
for cat in set(result_df['category']):
    cat_df = result_df[result_df['category']==cat].copy()
    result, result_list = calculate_metric(cat_df['input'].tolist(), cat_df['true'].tolist(), cat_df['pred'].tolist())
    print(cat, result['C_C_f1'])

cscd 36.678
new 13.775
nov 6.882
gam 7.0
mec 3.587
enc 12.611
car 13.039
cot 10.736


In [None]:
#4b
for cat in set(result_df['category']):
    cat_df = result_df[result_df['category']==cat].copy()
    result, result_list = calculate_metric(cat_df['input'].tolist(), cat_df['true'].tolist(), cat_df['pred'].tolist())
    print(cat, result['C_C_f1'])

cscd 50.991
new 27.119
nov 17.703
gam 18.881
mec 17.594
enc 30.801
car 28.652
cot 29.522


In [72]:
for cat in set(result_df['category']):
    cat_df = result_df[result_df['category']==cat].copy()
    result, result_list = calculate_metric(cat_df['input'].tolist(), cat_df['true'].tolist(), cat_df['pred'].tolist())
    print(cat, result)

car {'S_D_p': 1.991, 'S_D_r': 3.488, 'S_D_f1': 2.535, 'S_C_p': 1.738, 'S_C_r': 3.044, 'S_C_f1': 2.213, 'C_D_p': 4.824, 'C_D_r': 19.78, 'C_D_f1': 7.756, 'C_C_p': 4.096, 'C_C_r': 16.797, 'C_C_f1': 6.586}
mec {'S_D_p': 0.581, 'S_D_r': 1.105, 'S_D_f1': 0.762, 'S_C_p': 0.349, 'S_C_r': 0.663, 'S_C_f1': 0.457, 'C_D_p': 2.989, 'C_D_r': 11.143, 'C_D_f1': 4.714, 'C_C_p': 2.027, 'C_C_r': 7.558, 'C_C_f1': 3.197}
nov {'S_D_p': 0.672, 'S_D_r': 1.194, 'S_D_f1': 0.86, 'S_C_p': 0.635, 'S_C_r': 1.128, 'S_C_f1': 0.813, 'C_D_p': 2.439, 'C_D_r': 11.567, 'C_D_f1': 4.029, 'C_C_p': 2.093, 'C_C_r': 9.927, 'C_C_f1': 3.457}
gam {'S_D_p': 0.287, 'S_D_r': 0.676, 'S_D_f1': 0.403, 'S_C_p': 0.0, 'S_C_r': 0.0, 'S_C_f1': 0.0, 'C_D_p': 2.938, 'C_D_r': 15.244, 'C_D_f1': 4.927, 'C_C_p': 2.233, 'C_C_r': 11.585, 'C_C_f1': 3.744}
cscd {'S_D_p': 68.867, 'S_D_r': 39.861, 'S_D_f1': 50.495, 'S_C_p': 66.392, 'S_C_r': 38.428, 'S_C_f1': 48.68, 'C_D_p': 72.211, 'C_D_r': 40.704, 'C_D_f1': 52.062, 'C_C_p': 69.684, 'C_C_r': 39.28, 'C_C

In [1]:
result_df

NameError: name 'result_df' is not defined

In [63]:
errs_cscd = [] 
for idx, row in result_df[result_df['category']=='cscd'].iterrows():
    for p,t in zip(row['pred'], row['true']):
        if p!=t:
            errs_cscd.append(t)

In [69]:
cdf = result_df[result_df['category']=='cscd']

In [71]:
(cdf['pred']==cdf['true']).mean()

np.float64(0.6716)

In [66]:
result_df[result_df['category']=='cscd']

Unnamed: 0,input,pred,true,category,pred2,pred3
12460,湖南永兴法院今日称，耒阳致13死煤矿透水事故一案一审审结，耒阳三都镇原党委书记匡老贱获刑13...,湖南永兴法院今日称，耒阳致13死煤矿透水事故一案一审审结，耒阳三都镇原党委书记匡老贱获刑13...,湖南永兴法院今日称，耒阳致13死煤矿透水事故一案一审审结，耒阳三都镇原党委书记匡老贱获刑13...,cscd,湖南永兴法院今日称，耒阳致13死煤矿透水事故一案一审审结，耒阳三都镇原党委书记匡老贱获刑13...,湖南永兴法院今日称，耒阳致13死煤矿透水事故一案一审审结，耒阳三都镇原党委书记匡老贱获刑13...
12461,让农民工流血、流汗不在流泪,让农民工流血、流汗不再流泪,让农民工流血、流汗不再流泪,cscd,让农民工流血、流汗不再流泪,让农民工流血、流汗不再流泪
12462,工信部总工程师朱宏任称，正在和发改委等有关部门抓紧研究消费实施实施意见和具体政策措施，努力将...,工信部总工程师朱宏任称，正在和发改委等有关部门抓紧研究消费实施实施意见和具体政策措施，努力将...,工信部总工程师朱宏任称，正在和发改委等有关部门抓紧研究消费实施实施意见和具体政策措施，努力将...,cscd,工信部总工程师朱宏任称，正在和发改委等有关部门抓紧研究消费实施实施意见和具体政策措施，努力将...,工信部总工程师朱宏任称，正在和发改委等有关部门抓紧研究消费实施实施意见和具体政策措施，努力将...
12463,夜幕降临，海港城Chanel专卖店排著长队，游客路过，下班的人行色匆匆，茶餐厅的厨师忙里偷闲...,夜幕降临，海港城Chanel专卖店排着长队，游客路过，下班的人行色匆匆，茶餐厅的厨师忙里偷闲...,夜幕降临，海港城Chanel专卖店排着长队，游客路过，下班的人行色匆匆，茶餐厅的厨师忙里偷闲...,cscd,夜幕降临，海港城Chanel专卖店排着长队，游客路过，下班的人行色匆匆，茶餐厅的厨师忙里偷闲...,夜幕降临，海港城Chanel专卖店排着长队，游客路过，下班的人行色匆匆，茶餐厅的厨师忙里偷闲...
12464,责令高通公司立即停止违法行为并即时整改,则令高通公司立即停止违法行为并即时整改,责令高通公司立即停止违法行为并及时整改,cscd,则令高通公司立即停止违法行为并即时整改,则令高通公司立即停止违法行为并即时整改
...,...,...,...,...,...,...
17455,易邀中国前身是一家房产中介ERP公司，在市场环境惨淡的情况下，各房产中介纷纷开始寻找出路,易遨中国前身是一家房产中介ERP公司，在市场环境惨淡的情况下，各房产中介纷纷开始寻找出路,易遨中国前身是一家房产中介ERP公司，在市场环境惨淡的情况下，各房产中介纷纷开始寻找出路,cscd,易遨中国前身是一家房产中介ERP公司，在市场环境惨淡的情况下，各房产中介纷纷开始寻找出路,易遨中国前身是一家房产中介ERP公司，在市场环境惨淡的情况下，各房产中介纷纷开始寻找出路
17456,对于这些错误，目前人教社正在进行严密论证，待有结论后会在及时告知师生,对于这些错误，目前人教社正在进行严密论证，待有结论后会在及时告知师生,对于这些错误，目前人教社正在进行严密论证，待有结论后会再及时告知师生,cscd,对于这些错误，目前人教社正在进行严密论证，待有结论后会在及时告知师生,对于这些错误，目前人教社正在进行严密论证，待有结论后会在及时告知师生
17457,内乡县湍东镇的河南晋成陶瓷有限公司被罚款20万元；南阳热电有限责任公司被罚3万元；开封空分集...,内乡县涧东镇的河南晋成陶瓷有限公司被罚款20万元；南阳热电有限责任公司被罚3万元；开封空分集...,内乡县湍东镇的河南晋成陶瓷有限公司被罚款20万元；南阳热电有限责任公司被罚3万元；开封空分集...,cscd,内乡县涧东镇的河南晋成陶瓷有限公司被罚款20万元；南阳热电有限责任公司被罚3万元；开封空分集...,内乡县涧东镇的河南晋成陶瓷有限公司被罚款20万元；南阳热电有限责任公司被罚3万元；开封空分集...
17458,为顺应广大市民为地震灾区奉献爱心的需求，北京市民政局接受救灾捐赠事务管理中心启动救灾捐赠机制...,为顺应广大市民为地震灾区奉献爱心的需求，北京市民政局接受救灾捐赠事务管理中心启动救灾捐赠机制...,为顺应广大市民为地震灾区奉献爱心的需求，北京市民政局接受救灾捐赠事务管理中心启动救灾捐赠机制...,cscd,为顺应广大市民为地震灾区奉献爱心的需求，北京市民政局接受救灾捐赠事务管理中心启动救灾捐赠机制...,为顺应广大市民为地震灾区奉献爱心的需求，北京市民政局接受救灾捐赠事务管理中心启动救灾捐赠机制...


In [64]:
c_cscd = Counter(errs_cscd)

In [65]:
c_cscd.most_common()

[('再', 36),
 ('是', 24),
 ('地', 22),
 ('于', 20),
 ('时', 18),
 ('做', 18),
 ('由', 17),
 ('实', 17),
 ('受', 17),
 ('的', 17),
 ('作', 16),
 ('使', 16),
 ('哪', 16),
 ('戴', 15),
 ('以', 14),
 ('得', 13),
 ('到', 13),
 ('那', 12),
 ('又', 11),
 ('力', 11),
 ('着', 10),
 ('及', 10),
 ('有', 10),
 ('近', 10),
 ('里', 10),
 ('程', 10),
 ('称', 9),
 ('须', 9),
 ('次', 8),
 ('象', 8),
 ('监', 8),
 ('与', 8),
 ('至', 8),
 ('订', 8),
 ('即', 8),
 ('分', 8),
 ('权', 8),
 ('现', 8),
 ('大', 7),
 ('处', 7),
 ('一', 7),
 ('为', 7),
 ('暴', 7),
 ('买', 7),
 ('界', 7),
 ('元', 7),
 ('事', 7),
 ('渡', 7),
 ('倒', 7),
 ('具', 7),
 ('在', 7),
 ('止', 6),
 ('定', 6),
 ('今', 6),
 ('件', 6),
 ('乘', 6),
 ('形', 6),
 ('成', 6),
 ('其', 6),
 ('推', 6),
 ('映', 6),
 ('城', 6),
 ('像', 5),
 ('拍', 5),
 ('接', 5),
 ('已', 5),
 ('合', 5),
 ('厂', 5),
 ('机', 5),
 ('份', 5),
 ('型', 5),
 ('爆', 5),
 ('需', 5),
 ('只', 5),
 ('自', 5),
 ('履', 5),
 ('它', 5),
 ('回', 5),
 ('致', 5),
 ('涉', 5),
 ('想', 4),
 ('客', 4),
 ('篇', 4),
 ('除', 4),
 ('治', 4),
 ('也', 4),
 ('位', 4),
 ('被', 4),
 ('薪',