In [15]:
import json
import torch
import numpy as np
import torch.nn.functional as F
from tqdm import tqdm
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import logging as hf_logging
from sklearn.metrics import accuracy_score
import pickle
import torch.nn as nn
import re
import nltk
from collections import Counter
from nltk.corpus import stopwords as nltk_stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# ───────────────────────────────────────────────────────────────────────────────
# 0) 配置与准备
# ───────────────────────────────────────────────────────────────────────────────
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MAX_LEN = 256
id2label = {0: "SUPPORTS", 1: "NOT_ENOUGH_INFO", 2: "REFUTES", 3: "DISPUTED"}
label2id = {v: k for k, v in id2label.items()}

# ───────────────────────────────────────────────────────────────────────────────
# 1) 加载数据
# ───────────────────────────────────────────────────────────────────────────────
with open("test-claims-predictions.json", "r") as f:
    test_claims = json.load(f)

with open("data/evidence.json", "r") as f:
    evidence_dict = json.load(f)

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/felikskong/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/felikskong/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/felikskong/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [16]:
# ───────────────────────────────────────────────────────────────────────────────
# 2) 加载 BERT 模型
# ───────────────────────────────────────────────────────────────────────────────
bert_model = BertForSequenceClassification.from_pretrained("my_bert_classifier").to(DEVICE)
bert_tokenizer = BertTokenizer.from_pretrained("my_bert_classifier")
bert_model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [17]:
# ───────────────────────────────────────────────────────────────────────────────
# 3) 加载 BiLSTM 模型
# ───────────────────────────────────────────────────────────────────────────────
class BiLSTMWithBertEncoder(torch.nn.Module):
    def __init__(self, bert_name, lstm_hid, num_classes, dropout_prob, lstm_layers):
        super().__init__()
        from transformers import AutoModel
        self.bert = AutoModel.from_pretrained(bert_name)
        for p in self.bert.parameters():
            p.requires_grad = False
        bert_dim = self.bert.config.hidden_size
        self.dropout_bert = torch.nn.Dropout(dropout_prob)
        self.lstm = torch.nn.LSTM(
            input_size=bert_dim,
            hidden_size=lstm_hid,
            num_layers=lstm_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout_prob
        )
        self.attn_fc = torch.nn.Linear(2 * lstm_hid, 1)
        self.dropout_pool = torch.nn.Dropout(dropout_prob)
        self.classifier = torch.nn.Linear(2 * lstm_hid, num_classes)

    def forward(self, input_ids, attention_mask):
        bert_out = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        seq_emb = self.dropout_bert(bert_out.last_hidden_state)
        lstm_out, _ = self.lstm(seq_emb)
        scores = self.attn_fc(lstm_out).squeeze(-1)
        scores = scores.masked_fill(attention_mask == 0, -1e9)
        alphas = torch.softmax(scores, dim=1)
        pooled = torch.sum(lstm_out * alphas.unsqueeze(-1), dim=1)
        pooled = self.dropout_pool(pooled)
        logits = self.classifier(pooled)
        return logits

BERT_MODEL = "bert-base-uncased"
LSTM_HID_DIM = 512
NUM_CLASSES = 4
DROPOUT_PROB = 0.2
NUM_LAYERS = 3

bilstm_model = BiLSTMWithBertEncoder(BERT_MODEL, LSTM_HID_DIM, NUM_CLASSES, DROPOUT_PROB, NUM_LAYERS).to(DEVICE)
bilstm_model.load_state_dict(torch.load("task2_best_model_6.pt", map_location=DEVICE))
bilstm_model.eval()
bilstm_tokenizer = bert_tokenizer  # 可共用

  bilstm_model.load_state_dict(torch.load("task2_best_model_6.pt", map_location=DEVICE))


In [18]:
# ───────────────────────────────────────────────────────────────────────────────
# 4) 加载 RNN 模型
# ───────────────────────────────────────────────────────────────────────────────
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MAX_LEN = 50
EMBED_DIM = 100
HIDDEN_DIM = 64
NUM_CLASSES = 4
DROPOUT_PROB = 0.4
vocab_size = 1  # placeholder, will be overwritten

stopwords = set(nltk_stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
stemmer = SnowballStemmer('english')

def lemmatize(word):
    lemma = lemmatizer.lemmatize(word, 'v')
    return lemmatizer.lemmatize(lemma, 'n')

def preprocess(text, remove_stopwords=True, lemma=True, stem=False):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [t for t in tokens if re.match('^[a-zA-Z0-9-]+$', t)]
    if remove_stopwords:
        tokens = [t for t in tokens if t not in stopwords]
    if lemma:
        tokens = [lemmatize(t) for t in tokens]
    if stem:
        tokens = [stemmer.stem(t) for t in tokens]
    return ' '.join(tokens)

def text_to_seq(text):
    tokens = text.split()
    seq = [vocab.get(t, 0) for t in tokens]
    return seq + [0] * (MAX_LEN - len(seq)) if len(seq) < MAX_LEN else seq[:MAX_LEN]

class SelfAttentionPooling(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.attention = nn.Linear(input_dim, 1)

    def forward(self, x):
        weights = torch.softmax(self.attention(x), dim=1)
        pooled = torch.sum(weights * x, dim=1)
        return pooled

class RNNModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.embed_dropout = nn.Dropout(DROPOUT_PROB)
        self.rnn_claim = nn.RNN(embed_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.rnn_evid = nn.RNN(embed_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.rnn_dropout = nn.Dropout(DROPOUT_PROB)
        self.attention_claim = SelfAttentionPooling(hidden_dim * 2)
        self.attention_evid = SelfAttentionPooling(hidden_dim * 2)
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * 4, 128),
            nn.ReLU(),
            nn.Dropout(DROPOUT_PROB),
            nn.Linear(128, num_classes)
        )

    def forward(self, claim, evidence):
        claim_emb = self.embed_dropout(self.embedding(claim))
        evid_emb = self.embed_dropout(self.embedding(evidence))

        claim_out, _ = self.rnn_claim(claim_emb)
        evid_out, _ = self.rnn_evid(evid_emb)

        claim_out = self.rnn_dropout(claim_out)
        evid_out = self.rnn_dropout(evid_out)

        claim_pool = self.attention_claim(claim_out)
        evid_pool = self.attention_evid(evid_out)

        combined = torch.cat([claim_pool, evid_pool], dim=1)
        return self.classifier(combined)

with open("data/train-claims.json", "r", encoding="utf-8") as f:
    train_data = json.load(f)
with open("data/evidence.json", "r", encoding="utf-8") as f:
    evidence_dict = json.load(f)

all_text = []
for item in train_data.values():
    claim = preprocess(item["claim_text"])
    evids = ' '.join([evidence_dict.get(eid, '') for eid in item["evidences"]])
    ev_text = preprocess(evids)
    all_text.extend(claim.split() + ev_text.split())

token_counts = Counter(all_text)
vocab = {w: idx + 1 for idx, (w, _) in enumerate(token_counts.items())}
vocab_size = len(vocab) + 1

# 加载模型权重
rnn_model = RNNModel(vocab_size, EMBED_DIM, HIDDEN_DIM, NUM_CLASSES).to(DEVICE)
rnn_model.load_state_dict(torch.load("rnn_model.pth", map_location=DEVICE))
rnn_model.eval()

with open("label_encoder.pkl", "rb") as f:
    label_enc = pickle.load(f)


  rnn_model.load_state_dict(torch.load("rnn_model.pth", map_location=DEVICE))


In [19]:
# ───────────────────────────────────────────────────────────────────────────────
# 5) 推理函数
# ───────────────────────────────────────────────────────────────────────────────
def get_bert_probs(claim, evid_ids):
    evids = " ".join([evidence_dict.get(eid, "") for eid in evid_ids])
    inputs = bert_tokenizer(claim, evids, truncation=True, padding="max_length", max_length=MAX_LEN, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        logits = bert_model(**inputs).logits
        probs = F.softmax(logits, dim=-1).cpu().numpy()[0]
    reordered = [probs[0], probs[2], probs[1], probs[3]]
    return np.array(reordered)

def get_bilstm_probs(claim, evid_ids):
    evids = " ".join([evidence_dict.get(eid, "") for eid in evid_ids])
    inputs = bilstm_tokenizer(claim, evids, truncation=True, padding="max_length", max_length=MAX_LEN, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        logits = bilstm_model(inputs["input_ids"], inputs["attention_mask"])
    return F.softmax(logits, dim=-1).cpu().numpy()[0]


def get_rnn_probs(claim, evid_ids):
    claim_text = preprocess(claim)
    evid_text = " ".join([evidence_dict.get(eid, "") for eid in evid_ids])
    evid_text = preprocess(evid_text)
    claim_seq = text_to_seq(claim_text)
    evid_seq = text_to_seq(evid_text)
    claim_tensor = torch.tensor([claim_seq], dtype=torch.long).to(DEVICE)
    evid_tensor = torch.tensor([evid_seq], dtype=torch.long).to(DEVICE)
    with torch.no_grad():
        logits = rnn_model(claim_tensor, evid_tensor)
        probs = F.softmax(logits, dim=-1).cpu().numpy()[0]
    reordered = [probs[3], probs[1], probs[2], probs[0]]
    return np.array(reordered)

In [None]:
# ✅ 关闭 transformers 中关于 token 溢出的警告
hf_logging.set_verbosity_error()

with open("data/dev-claims.json", "r") as f:
    dev_claims = json.load(f)

# soft voting
true_labels = []
pred_labels = []

for cid, entry in tqdm(dev_claims.items(), desc="Ensemble Predicting"):
    claim_text = entry["claim_text"]
    evidence_ids = entry.get("evidences", [])
    true_label = label2id[entry["claim_label"]]

    p1 = get_bert_probs(claim_text, evidence_ids)
    p2 = get_bilstm_probs(claim_text, evidence_ids)
    p3 = get_rnn_probs(claim_text, evidence_ids)

    avg_probs = (p1 + p2 + p3) / 3
    pred_idx = int(np.argmax(avg_probs))

    true_labels.append(true_label)
    pred_labels.append(pred_idx)

acc = accuracy_score(true_labels, pred_labels)
print(f"✅ Ensemble Accuracy on Dev Set: {acc:.4f}")

Ensemble Predicting: 100%|██████████| 154/154 [02:47<00:00,  1.09s/it]

✅ Ensemble Accuracy on Dev Set: 0.5519





In [None]:
# ───────────────────────────────────────────────────────────────────────────────
# 6) Soft Voting Ensemble 推理
# ───────────────────────────────────────────────────────────────────────────────
final_preds = {}

for cid, entry in tqdm(test_claims.items(), desc="Ensemble Predicting"):
    claim = entry["claim_text"]
    evid_ids = entry.get("evidences", [])

    p1 = get_bert_probs(claim, evid_ids)
    p2 = get_bilstm_probs(claim, evid_ids)
    p3 = get_rnn_probs(claim, evid_ids)

    avg_probs = (p1 + p2 + p3) / 3
    pred_idx = int(np.argmax(avg_probs))
    pred_label = id2label[pred_idx]

    final_preds[cid] = {
        "claim_text": claim,
        "claim_label": pred_label,
        "evidences": evid_ids
    }

# ───────────────────────────────────────────────────────────────────────────────
# 7) 保存输出
# ───────────────────────────────────────────────────────────────────────────────
with open("softvote-predictions.json", "w", encoding="utf-8") as f:
    json.dump(final_preds, f, ensure_ascii=False, indent=2)

print("✅ Ensemble prediction saved to softvote-predictions.json")

In [30]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np
from tqdm import tqdm

X_dev = []
y_dev = []

for cid, entry in tqdm(dev_claims.items(), desc="Extracting stacking features"):
    claim_text = entry["claim_text"]
    evidence_ids = entry.get("evidences", [])
    label = label2id[entry["claim_label"]]

    p1 = get_bert_probs(claim_text, evidence_ids)
    p2 = get_bilstm_probs(claim_text, evidence_ids)
    p3 = get_rnn_probs(claim_text, evidence_ids)

    features = np.concatenate([p1, p2, p3])
    X_dev.append(features)
    y_dev.append(label)

X_dev = np.array(X_dev)
y_dev = np.array(y_dev)

Extracting stacking features: 100%|██████████| 154/154 [02:31<00:00,  1.02it/s]


In [None]:
meta_clf_lr = LogisticRegression(max_iter=1000, random_state=42)
meta_clf_lr.fit(X_dev, y_dev)

dev_preds = meta_clf_lr.predict(X_dev)
acc = accuracy_score(y_dev, dev_preds)
print(f"✅ LR accuracy on dev set: {acc:.4f}")

Extracting stacking features: 100%|██████████| 154/154 [03:12<00:00,  1.25s/it]


✅ Meta-model accuracy on dev set: 0.6169


In [None]:
from sklearn.neural_network import MLPClassifier

meta_clf_mlp = MLPClassifier(
    hidden_layer_sizes=(64, 32),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42
)
meta_clf_mlp.fit(X_dev, y_dev)

dev_preds_mlp = meta_clf_mlp.predict(X_dev)
acc_mlp = accuracy_score(y_dev, dev_preds_mlp)
print(f"✅ MLP accuracy on dev set: {acc_mlp:.4f}")

✅ MLP accuracy on dev set: 0.7403




In [31]:
from sklearn.ensemble import RandomForestClassifier

meta_clf_rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42,
    n_jobs=-1
)
meta_clf_rf.fit(X_dev, y_dev)

dev_preds_rf = meta_clf_rf.predict(X_dev)
acc_rf = accuracy_score(y_dev, dev_preds_rf)
print(f"✅ RandomForest accuracy on dev set: {acc_rf:.4f}")

✅ RandomForest accuracy on dev set: 1.0000


In [29]:
final_preds = {}

for cid, entry in tqdm(test_claims.items(), desc="Stacking Predicting"):
    claim = entry["claim_text"]
    evidence_ids = entry.get("evidences", [])

    p1 = get_bert_probs(claim, evidence_ids)
    p2 = get_bilstm_probs(claim, evidence_ids)
    p3 = get_rnn_probs(claim, evidence_ids)

    features = np.concatenate([p1, p2, p3]).reshape(1, -1)
    pred_idx = meta_clf_mlp.predict(features)[0]
    pred_label = id2label[pred_idx]

    final_preds[cid] = {
        "claim_text": claim,
        "claim_label": pred_label,
        "evidences": evidence_ids
    }

# 保存为 JSON 以便上传
with open("stacking-predictions.json", "w", encoding="utf-8") as f:
    json.dump(final_preds, f, ensure_ascii=False, indent=2)

print("✅ Saved stacking predictions to stacking-predictions.json")

Stacking Predicting: 100%|██████████| 153/153 [02:55<00:00,  1.15s/it]

✅ Saved stacking predictions to stacking-predictions.json



