
### References

*   [https://www.kaggle.com/code/abdmental01/jigsaw-mpnet-base-v2-inference-cv-0-876](https://www.kaggle.com/code/abdmental01/jigsaw-mpnet-base-v2-inference-cv-0-876)
*   [https://www.kaggle.com/code/aerdem4/jigsaw-acrc-qwen7b-finetune-logits-processor-zoo](https://www.kaggle.com/code/aerdem4/jigsaw-acrc-qwen7b-finetune-logits-processor-zoo)
*   [https://www.guruguru.science/competitions/24/discussions/21027ff1-2074-4e21-a249-b2d4170bd516/](https://www.guruguru.science/competitions/24/discussions/21027ff1-2074-4e21-a249-b2d4170bd516/)
*   https://www.kaggle.com/code/mks2192/jigsaw-llama3-1-8b-instruct-training-one-epoch

# 1. Qwen2.5 32B GPTQ Int4 Inference

In [1]:
# 在代码开头添加
import torch
import gc

# 清理GPU内存
torch.cuda.empty_cache()
gc.collect()

# 或者重启kernel

37

In [2]:
import os
# 在代码最开头添加
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [3]:
! mkdir -p /tmp/src

In [4]:
# -*- coding: utf-8 -*-
"""
在原有代码基础上的最小化改进
只需要在现有代码的几个关键位置进行修改
"""

# ============== 1. 在文件开头添加这些函数 ==============

import re
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def extract_text_features(text, rule, pos_examples, neg_examples):
    """提取文本特征用于动态权重计算"""
    features = {}
    
    # 基础特征
    features['text_length'] = len(text)
    features['word_count'] = len(text.split())
    features['upper_ratio'] = sum(1 for c in text if c.isupper()) / max(len(text), 1)
    features['punct_count'] = len(re.findall(r'[!@#$%^&*(),.?":{}|<>]', text))
    features['caps_words'] = len(re.findall(r'\b[A-Z]{2,}\b', text))
    
    # 简单的相似度特征
    try:
        tfidf = TfidfVectorizer(max_features=100, stop_words='english')
        all_texts = [text, rule] + pos_examples + neg_examples
        tfidf_matrix = tfidf.fit_transform(all_texts)
        
        text_vec = tfidf_matrix[0]
        rule_vec = tfidf_matrix[1]
        pos_vecs = tfidf_matrix[2:4]
        neg_vecs = tfidf_matrix[4:6]
        
        features['rule_similarity'] = cosine_similarity(text_vec, rule_vec)[0][0]
        features['pos_similarity'] = np.mean([cosine_similarity(text_vec, pos_vec)[0][0] for pos_vec in pos_vecs])
        features['neg_similarity'] = np.mean([cosine_similarity(text_vec, neg_vec)[0][0] for neg_vec in neg_vecs])
    except:
        features['rule_similarity'] = 0.5
        features['pos_similarity'] = 0.5
        features['neg_similarity'] = 0.5
    
    return features

def get_adaptive_weights(features, subreddit):
    """基于特征和subreddit动态计算权重"""
    # 默认权重
    qwen_weight = 0.5
    llama_weight = 0.5
    
    # 基于文本长度调整权重
    if features['text_length'] > 500:  # 长文本，Qwen可能更好
        qwen_weight += 0.1
        llama_weight -= 0.1
    elif features['text_length'] < 100:  # 短文本，Llama可能更好
        qwen_weight -= 0.1
        llama_weight += 0.1
    
    # 基于大写字母比例调整（可能是愤怒/激动的文本）
    if features['upper_ratio'] > 0.3:
        qwen_weight += 0.05  # Qwen可能在情绪文本上更敏感
        llama_weight -= 0.05
    
    # 基于相似度调整
    similarity_diff = features['pos_similarity'] - features['neg_similarity']
    if similarity_diff > 0.2:  # 明显更像违规样本
        qwen_weight += 0.1
        llama_weight -= 0.1
    elif similarity_diff < -0.2:  # 明显更像正常样本
        qwen_weight -= 0.1
        llama_weight += 0.1
    
    # 基于subreddit调整（可以根据经验添加）
    if subreddit in ['AskReddit', 'politics']:  # 这些可能需要更细致的判断
        qwen_weight += 0.05
        llama_weight -= 0.05
    
    # 确保权重在合理范围内
    qwen_weight = max(0.1, min(0.9, qwen_weight))
    llama_weight = 1 - qwen_weight
    
    return qwen_weight, llama_weight

def two_stage_prediction(text, rule, examples, base_prediction):
    """简单的两阶段推理"""
    # Stage 1: 快速判断是否可能违规
    pos_examples = [examples['positive_example_1'], examples['positive_example_2']]
    neg_examples = [examples['negative_example_1'], examples['negative_example_2']]
    
    features = extract_text_features(text, rule, pos_examples, neg_examples)
    
    # 如果基础预测很低且特征也不像违规，直接返回低分
    if base_prediction < 0.3 and features['pos_similarity'] < features['neg_similarity']:
        return base_prediction * 0.8  # 进一步降低
    
    # 如果基础预测很高且特征也像违规，提升分数
    if base_prediction > 0.7 and features['pos_similarity'] > features['neg_similarity']:
        return min(0.95, base_prediction * 1.1)  # 适度提升
    
    # Stage 2: 细化判断（根据特征调整）
    adjustment = 0
    
    # 基于文本特征调整
    if features['caps_words'] > 2:  # 很多大写单词，可能是愤怒
        adjustment += 0.05
    
    if features['punct_count'] > features['word_count'] * 0.3:  # 标点很多
        adjustment += 0.03
    
    # 基于相似度调整
    similarity_ratio = features['pos_similarity'] / max(features['neg_similarity'], 0.01)
    if similarity_ratio > 2:
        adjustment += 0.1
    elif similarity_ratio < 0.5:
        adjustment -= 0.1
    
    final_prediction = base_prediction + adjustment
    return max(0.01, min(0.99, final_prediction))

In [5]:
%%writefile /tmp/src/infer_qwen.py

import os
import pandas as pd
from logits_processor_zoo.vllm import MultipleChoiceLogitsProcessor
import torch
import vllm
import numpy as np
from vllm.lora.request import LoRARequest
import argparse
from scipy.special import softmax
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ============== 添加辅助函数 ==============

def extract_text_features(text, rule, pos_examples, neg_examples):
    """提取文本特征用于动态权重计算"""
    features = {}
    
    # 基础特征
    features['text_length'] = len(text)
    features['word_count'] = len(text.split())
    features['upper_ratio'] = sum(1 for c in text if c.isupper()) / max(len(text), 1)
    features['punct_count'] = len(re.findall(r'[!@#$%^&*(),.?":{}|<>]', text))
    features['caps_words'] = len(re.findall(r'\b[A-Z]{2,}\b', text))
    
    # 简单的相似度特征
    try:
        tfidf = TfidfVectorizer(max_features=100, stop_words='english')
        all_texts = [text, rule] + pos_examples + neg_examples
        tfidf_matrix = tfidf.fit_transform(all_texts)
        
        text_vec = tfidf_matrix[0]
        rule_vec = tfidf_matrix[1]
        pos_vecs = tfidf_matrix[2:4]
        neg_vecs = tfidf_matrix[4:6]
        
        features['rule_similarity'] = cosine_similarity(text_vec, rule_vec)[0][0]
        features['pos_similarity'] = np.mean([cosine_similarity(text_vec, pos_vec)[0][0] for pos_vec in pos_vecs])
        features['neg_similarity'] = np.mean([cosine_similarity(text_vec, neg_vec)[0][0] for neg_vec in neg_vecs])
    except:
        features['rule_similarity'] = 0.5
        features['pos_similarity'] = 0.5
        features['neg_similarity'] = 0.5
    
    return features

def two_stage_prediction(text, rule, examples, base_prediction):
    """简单的两阶段推理"""
    # Stage 1: 快速判断是否可能违规
    pos_examples = [examples['positive_example_1'], examples['positive_example_2']]
    neg_examples = [examples['negative_example_1'], examples['negative_example_2']]
    
    features = extract_text_features(text, rule, pos_examples, neg_examples)
    
    # 如果基础预测很低且特征也不像违规，直接返回低分
    if base_prediction < 0.3 and features['pos_similarity'] < features['neg_similarity']:
        return base_prediction * 0.8  # 进一步降低
    
    # 如果基础预测很高且特征也像违规，提升分数
    if base_prediction > 0.7 and features['pos_similarity'] > features['neg_similarity']:
        return min(0.95, base_prediction * 1.1)  # 适度提升
    
    # Stage 2: 细化判断（根据特征调整）
    adjustment = 0
    
    # 基于文本特征调整
    if features['caps_words'] > 2:  # 很多大写单词，可能是愤怒
        adjustment += 0.05
    
    if features['punct_count'] > features['word_count'] * 0.3:  # 标点很多
        adjustment += 0.03
    
    # 基于相似度调整
    similarity_ratio = features['pos_similarity'] / max(features['neg_similarity'], 0.01)
    if similarity_ratio > 2:
        adjustment += 0.1
    elif similarity_ratio < 0.5:
        adjustment -= 0.1
    
    final_prediction = base_prediction + adjustment
    return max(0.01, min(0.99, final_prediction))

# ============== 原始代码开始 ==============

df = pd.read_csv("/kaggle/input/jigsaw-agile-community-rules/test.csv")

MODEL_NAME = "/kaggle/input/qwen2-5-32b-instruct-gptq-int4"
LORA_PATH = "/kaggle/input/jigsaw-exp003-fold0/trained_model"

if __name__=='__main__':
    os.environ["VLLM_USE_V1"] = "0"

    llm = vllm.LLM(
        MODEL_NAME,
        # quantization='awq',
        quantization='gptq',
        tensor_parallel_size=torch.cuda.device_count(),
        gpu_memory_utilization=0.95,
        trust_remote_code=True,
        dtype="half",
        enforce_eager=True,
        max_model_len=4096,
        disable_log_stats=True,
        enable_prefix_caching=True,
        enable_lora=True,
    )
    tokenizer = llm.get_tokenizer()
    SYS_PROMPT = """
    You are given a comment on reddit. Your task is to classify if it violates the given rule. Only respond Yes/No.
    """

    prompts = []
    for i, row in df.iterrows():
        text = f"""
    r/{row.subreddit}
    Rule: {row.rule}

    1) {row.positive_example_1}
    Violation: Yes

    2) {row.positive_example_2}
    Violation: Yes

    3) {row.negative_example_1}
    Violation: No

    4) {row.negative_example_2}
    Violation: No

    5) {row.body}
    """

        messages = [
            {"role": "system", "content": SYS_PROMPT},
            {"role": "user", "content": text}
        ]

        prompt = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=False,
        ) + "Answer:"
        prompts.append(prompt)

    df["prompt"] = prompts

    mclp = MultipleChoiceLogitsProcessor(tokenizer, choices=['Yes','No'])
    outputs = llm.generate(
        prompts,
        vllm.SamplingParams(
            skip_special_tokens=True,
            max_tokens=1,
            logits_processors=[mclp],
            logprobs=2,
        ),
        use_tqdm=True,
        lora_request=LoRARequest("default", 1, LORA_PATH)
    )
    logprobs = [
        {lp.decoded_token: lp.logprob for lp in out.outputs[0].logprobs[0].values()}
        for out in outputs
    ]
    logit_matrix = pd.DataFrame(logprobs)[['Yes','No']]
    df = pd.concat([df, logit_matrix], axis=1)

    df[['Yes',"No"]] = df[['Yes',"No"]].apply(lambda x: softmax(x.values), axis=1, result_type="expand")
    df["pred"] = df["Yes"]
    
    # ============== 添加两阶段优化 ==============
    print("开始两阶段优化...")
    enhanced_predictions = []
    for idx, row in df.iterrows():
        examples = {
            'positive_example_1': row['positive_example_1'],
            'positive_example_2': row['positive_example_2'], 
            'negative_example_1': row['negative_example_1'],
            'negative_example_2': row['negative_example_2']
        }
        enhanced_pred = two_stage_prediction(row['body'], row['rule'], examples, row['pred'])
        enhanced_predictions.append(enhanced_pred)

    df['rule_violation'] = enhanced_predictions
    print("两阶段优化完成！")
    # ============== 优化结束 ==============
    
    df[['row_id', 'rule_violation']].to_csv("submission_qwen.csv",index=False)
    print(pd.read_csv('submission_qwen.csv'))

Writing /tmp/src/infer_qwen.py


In [6]:
%cd /tmp
!python src/infer_qwen.py

/tmp
2025-08-10 22:52:00.219778: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754866320.601461      60 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754866320.710638      60 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
INFO 08-10 22:52:15 [__init__.py:235] Automatically detected platform cuda.
INFO 08-10 22:52:32 [config.py:1604] Using max model len 4096
INFO 08-10 22:52:34 [llm_engine.py:228] Initializing a V0 LLM engine (v0.10.0) with config: model='/kaggle/input/qwen2-5-32b-instruct-gptq-int4', speculative_config=None, tokenizer='/kaggle/input/qwen2-5-32b-instruct-gptq-int4', skip_tokenizer_init=False, tokenizer_mode=auto, revision=Non

# 2. Llama3.1 8B Instruct Inference

In [7]:
import os, math, numpy as np
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

In [8]:
import pandas as pd
import numpy as np

test = pd.read_csv('/kaggle/input/jigsaw-agile-community-rules/test.csv')
sub = pd.read_csv('/kaggle/input/jigsaw-agile-community-rules/sample_submission.csv', index_col='row_id')
sub


Unnamed: 0_level_0,rule_violation
row_id,Unnamed: 1_level_1
2029,0.5
2030,0.5
2031,0.5
2032,0.5
2033,0.5
2034,0.5
2035,0.5
2036,0.5
2037,0.5
2038,0.5


In [9]:
import vllm

llm = vllm.LLM(
    "/kaggle/input/jigsaw-llama3-1-8b-instruct-training-one-epoch/llama-8b-instruct-jigsaw",
    tensor_parallel_size=2, 
    gpu_memory_utilization=0.95, 
    trust_remote_code=True,
    dtype="half", 
    enforce_eager=True,
    max_model_len=2048,
    # disable_log_stats=True,
    # enable_prefix_caching=True,
    
)
tokenizer = llm.get_tokenizer()


2025-08-10 22:58:06.416583: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754866686.439721      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754866686.446490      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 08-10 22:58:12 [__init__.py:235] Automatically detected platform cuda.
INFO 08-10 22:58:28 [config.py:1604] Using max model len 2048
INFO 08-10 22:58:28 [llm_engine.py:228] Initializing a V0 LLM engine (v0.10.0) with config: model='/kaggle/input/jigsaw-llama3-1-8b-instruct-training-one-epoch/llama-8b-instruct-jigsaw', speculative_config=None, tokenizer='/kaggle/input/jigsaw-llama3-1-8b-instruct-training-one-epoch/llama-8b-instruct-jigsaw', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), 

2025-08-10 22:58:34.196643: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754866714.217237     398 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754866714.223826     398 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 08-10 22:58:39 [__init__.py:235] Automatically detected platform cuda.
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 22:58:40 [multiproc_worker_utils.py:226] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 22:58:41 [cuda.py:346] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 22:58:41 [cuda.py:395] Using XFormers backend.


[W810 22:58:52.512202172 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W810 22:58:52.883159025 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W810 22:59:02.522867645 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 08-10 22:59:12 [__init__.py:1375] Found nccl from library libnccl.so.2
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 22:59:12 [__init__.py:1375] Found nccl from library libnccl.so.2
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 22:59:12 [pynccl.py:70] vLLM is using nccl==2.26.2
INFO 08-10 22:59:12 [pynccl.py:70] vLLM is using nccl==2.26.2


[W810 22:59:12.533412463 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 08-10 22:59:12 [custom_all_reduce_utils.py:246] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 22:59:12 [custom_all_reduce_utils.py:246] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
INFO 08-10 22:59:12 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[1], buffer_handle=(1, 4194304, 6, 'psm_e6a94384'), local_subscribe_addr='ipc:///tmp/92e100b3-d156-447e-b99c-f3bc3f6d4ef3', remote_subscribe_addr=None, remote_addr_ipv6=False)
INFO 08-10 22:59:12 [parallel_state.py:1102] rank 0 in world size 2 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 22:59:12 [parallel_state.py:1102] rank 1 in world size 2 is assigned as DP rank 0, PP rank 0, TP rank 1, EP rank 1
INFO 08-10 22:59:12 [model_runner.py:1083] Starting to load model /kaggle/input/jigsaw-llama3-1-8b-ins

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 23:00:52 [default_loader.py:262] Loading weights took 99.46 seconds
INFO 08-10 23:00:52 [default_loader.py:262] Loading weights took 99.55 seconds
INFO 08-10 23:00:53 [model_runner.py:1115] Model loading took 7.5123 GiB and 99.785241 seconds
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 23:00:53 [model_runner.py:1115] Model loading took 7.5123 GiB and 99.701905 seconds
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 23:00:57 [worker.py:295] Memory profiling takes 3.71 seconds
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 23:00:57 [worker.py:295] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.95) = 14.00GiB
[1;36m(VllmWorkerProcess pid=398)[0;0m INFO 08-10 23:00:57 [worker.py:295] model weights take 7.51GiB; non_torch_memory takes 0.11GiB; PyTorch activation peak memory takes 0.14GiB; the rest of the memory reserved for KV Cache is 6.25GiB.
INFO 08-10 23:00:58 [worker.py:29

In [10]:
from typing import Any, Dict, List
from transformers import LogitsProcessor
import torch

choices = ["No", "Yes"]

KEEP = []
for x in choices:
    c = tokenizer.encode(x,add_special_tokens=False)[0]
    KEEP.append(c)
print(f"Force predictions to be tokens {KEEP} which are {choices}.")

class DigitLogitsProcessor(LogitsProcessor):
    def __init__(self, tokenizer):
        self.allowed_ids = KEEP
        
    def __call__(self, input_ids: List[int], scores: torch.Tensor) -> torch.Tensor:
        scores[self.allowed_ids] += 100
        return scores

Force predictions to be tokens [2822, 9642] which are ['No', 'Yes'].


In [11]:


sys_prompt = '''You are given a comment on reddit and a rule. Your task is to classify whether the comment violates the rule. Only respond Yes/No.'''



In [12]:


def formatting(dataset):
    texts = []
    for i in range(len(dataset)):
        texts.append(tokenizer.apply_chat_template(dataset[i], tokenize=False, add_generation_prompt=False))
    return texts



In [13]:


template = """
Subreddit: r/{subreddit}
Rule: {rule}
Examples:
1) {positive_example_1}
Violation: Yes

2) {negative_example_1}
Violation: No

3) {negative_example_2}
Violation: No

4) {positive_example_2}
Violation: Yes
Comment:
{body}
Violation: """



In [14]:
dataset = []
for index,row in test.iterrows():
    
    formatted_sample = [
        {
        "role": "system",
        "content": sys_prompt
    },
       {
           "role": "user",
           "content": template.format(
               rule = row.rule,
               subreddit = row.subreddit,
               body = row.body,
               positive_example_1 = row.positive_example_1,
               negative_example_1 = row.negative_example_1,
               positive_example_2 = row.positive_example_2,
               negative_example_2 = row.negative_example_2
           )
       }]
    
    dataset.append( formatted_sample )


In [15]:
all_prompts = formatting(dataset)

In [16]:
logits_processors = [DigitLogitsProcessor(tokenizer)]
responses = llm.generate(
    all_prompts,
    vllm.SamplingParams(
        n=1,  # Number of output sequences to return for each prompt.
        top_p=0.9,  # Float that controls the cumulative probability of the top tokens to consider.
        temperature=0,  # randomness of the sampling
        seed=777, # Seed for reprodicibility
        skip_special_tokens=True,  # Whether to skip special tokens in the output.
        max_tokens=1,  # Maximum number of tokens to generate per output sequence.
        logits_processors=logits_processors,
        logprobs = 2
    ),
    use_tqdm = True
)

Adding requests:   0%|          | 0/10 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/10 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

In [17]:
results = []
errors = 0

for i,response in enumerate(responses):
    try:
        x = response.outputs[0].logprobs[0]
        logprobs = []
        for k in KEEP:
            if k in x:
                logprobs.append( math.exp(x[k].logprob) )
            else:
                logprobs.append( 0 )
                print(f"bad logits {i}")
        logprobs = np.array( logprobs )
        logprobs /= logprobs.sum()
        results.append( logprobs )
    except:
        #print(f"error {i}")
        results.append( np.array([1/2., 1/2.]) )
        errors += 1
        
print(f"There were {errors} inference errors out of {i+1} inferences")
results = np.vstack(results)

There were 0 inference errors out of 10 inferences


In [18]:
probs = [x[1] for x in results]
# === 添加的改进代码 ===
enhanced_probs = []
for i, prob in enumerate(probs):
    row = test.iloc[i]
    examples = {
        'positive_example_1': row['positive_example_1'],
        'positive_example_2': row['positive_example_2'],
        'negative_example_1': row['negative_example_1'], 
        'negative_example_2': row['negative_example_2']
    }
    enhanced_prob = two_stage_prediction(row['body'], row['rule'], examples, prob)
    enhanced_probs.append(enhanced_prob)

probs = enhanced_probs
sub['rule_violation'] = probs
sub.to_csv('submission_llama.csv')

# 3. ENSEMBLE RESULT

In [19]:
q = pd.read_csv('submission_qwen.csv')
l = pd.read_csv('submission_llama.csv')

rq = q['rule_violation'].rank(method='average') / (len(q)+1)
rl = l['rule_violation'].rank(method='average') / (len(l)+1)

# blend = 0.5*rq + 0.5*rl   # or tune the rank-weights with a tiny grid using OOF
# === 替换为自适应权重 ===
test_df = pd.read_csv('/kaggle/input/jigsaw-agile-community-rules/test.csv')
adaptive_predictions = []

for i in range(len(q)):
    row = test_df.iloc[i]
    pos_examples = [row['positive_example_1'], row['positive_example_2']]
    neg_examples = [row['negative_example_1'], row['negative_example_2']]
    
    features = extract_text_features(row['body'], row['rule'], pos_examples, neg_examples)
    qwen_weight, llama_weight = get_adaptive_weights(features, row['subreddit'])
    
    qwen_pred = q.iloc[i]['rule_violation']
    llama_pred = l.iloc[i]['rule_violation'] 
    
    adaptive_pred = qwen_weight * qwen_pred + llama_weight * llama_pred
    adaptive_predictions.append(adaptive_pred)

q['rule_violation'] = adaptive_predictions
# === 自适应权重结束 ===
# q['rule_violation'] = blend
q.to_csv('/kaggle/working/submission.csv', index=False)