In [None]:
import json
import numpy as np
from itertools import product
from sklearn.metrics import cohen_kappa_score

# ファイルパス
file_paths = [
    "ku-DeBERTa-large-A-dev-with-probs.txt",
    "so-LUKE-large-dev-with-probs.txt",
    "wa-RoBERTa-large-A-dev-with-probs.txt",
    "so-LUKE-large-A-dev-with-probs.txt",
    "wa-RoBERTa-large-seq512-A-dev-with-probs.txt",
    "wa-RoBERTa-large2-dev-with-probs.txt",
    "wa-RoBERTa-large-dev-with-probs.txt",
    "wa-RoBERTa-large-seq512-dev-with-probs.txt",
    "ku-DeBERTa-large-dev-with-probs.txt",
    "ku-RoBERTa-large-dev-with-probs.txt"
]

# ラベルの総数
num_labels = 5

# valid.json から正解ラベルを取得（JSONL形式対応）
true_labels = []
with open('valid.json', 'r', encoding='utf-8') as f:
    for line in f:
        item = json.loads(line)  # 各行を個別にJSONとして読み込む
        true_labels.append(item['writer_sentiment'])  # ラベル変換なし

# すべての 0/1 の組み合わせを試す
best_score = -1
best_weights = None
qwk_scores = {}

for weights in product([0, 1], repeat=len(file_paths)):  # 2^9 通りの組み合わせ
    if sum(weights) == 0:  # すべてが0ならスキップ
        continue

    # 結果ファイルを開く
    with open('result.txt', 'w') as result_file:
        for line_idx in range(sum(1 for _ in open(file_paths[0]))):  # 最初のファイルの行数で繰り返し
            label_probs = np.zeros(num_labels)

            # 各ファイルを順に処理
            for file_path, weight in zip(file_paths, weights):
                if weight == 0:
                    continue

                with open(file_path, 'r') as f:
                    for i, line in enumerate(f):
                        if i == line_idx:
                            parts = line.strip().split(", ")
                            probs = [float(prob.split(": ")[1]) for prob in parts[1:]]
                            label_probs += np.array(probs) * weight
                            break

            # 最も高い確率を持つラベルを選択
            max_label = np.argmax(label_probs) - 2
            result_file.write(f"{max_label}\n")

    # result.txt から予測ラベルを取得
    with open('result.txt', 'r', encoding='utf-8') as f:
        pred_labels = [int(line.strip()) for line in f.readlines()]

    # QWK の計算
    qwk_score = cohen_kappa_score(pred_labels, true_labels, weights='quadratic')
    qwk_scores[str(weights)] = qwk_score

    # ベストスコアの更新
    if qwk_score > best_score:
        best_score = qwk_score
        best_weights = weights

    print(f"weights={weights}, QWK={qwk_score:.4f}")

# 結果を JSON に保存
with open('qwk_scores.json', 'w') as f:
    json.dump(qwk_scores, f, indent=4)

print("\nBest Weights:", best_weights)
print("Best QWK Score:", best_score)

weights=(0, 0, 0, 0, 0, 0, 0, 0, 0, 1), QWK=0.6122
weights=(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), QWK=0.6041
weights=(0, 0, 0, 0, 0, 0, 0, 0, 1, 1), QWK=0.6139
weights=(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), QWK=0.6437
weights=(0, 0, 0, 0, 0, 0, 0, 1, 0, 1), QWK=0.6324
weights=(0, 0, 0, 0, 0, 0, 0, 1, 1, 0), QWK=0.6310
weights=(0, 0, 0, 0, 0, 0, 0, 1, 1, 1), QWK=0.6294
weights=(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), QWK=0.6341
weights=(0, 0, 0, 0, 0, 0, 1, 0, 0, 1), QWK=0.6305
weights=(0, 0, 0, 0, 0, 0, 1, 0, 1, 0), QWK=0.6259
weights=(0, 0, 0, 0, 0, 0, 1, 0, 1, 1), QWK=0.6241
weights=(0, 0, 0, 0, 0, 0, 1, 1, 0, 0), QWK=0.6512
weights=(0, 0, 0, 0, 0, 0, 1, 1, 0, 1), QWK=0.6399
weights=(0, 0, 0, 0, 0, 0, 1, 1, 1, 0), QWK=0.6349
weights=(0, 0, 0, 0, 0, 0, 1, 1, 1, 1), QWK=0.6388
weights=(0, 0, 0, 0, 0, 1, 0, 0, 0, 0), QWK=0.6224
weights=(0, 0, 0, 0, 0, 1, 0, 0, 0, 1), QWK=0.6434
weights=(0, 0, 0, 0, 0, 1, 0, 0, 1, 0), QWK=0.6318
weights=(0, 0, 0, 0, 0, 1, 0, 0, 1, 1), QWK=0.6469
weights=(0, 0, 0, 0, 0, 1, 0, 1

In [None]:
import json

# QWK スコアの結果を読み込む
with open('qwk_scores.json', 'r') as f:
    qwk_scores = json.load(f)

# QWK が 0.650 以上のものをフィルタリング
filtered_scores = {weights: score for weights, score in qwk_scores.items() if score >= 0.650}

# 結果を JSON に保存
with open('qwk_scores_filtered.json', 'w') as f:
    json.dump(filtered_scores, f, indent=4)

print(f"Filtered {len(filtered_scores)} weight sets with QWK >= 0.650 and saved to 'qwk_scores_filtered.json'")

Filtered 97 weight sets with QWK >= 0.650 and saved to 'qwk_scores_filtered.json'


In [None]:
import json
import numpy as np
from collections import Counter
from itertools import product
from sklearn.metrics import cohen_kappa_score

# ファイルパス
file_paths = [
    "ku-DeBERTa-large-A-dev-with-probs.txt",
    "so-LUKE-large-dev-with-probs.txt",
    "wa-RoBERTa-large-A-dev-with-probs.txt",
    "so-LUKE-large-A-dev-with-probs.txt",
    "wa-RoBERTa-large-seq512-A-dev-with-probs.txt",
    "wa-RoBERTa-large2-dev-with-probs.txt",
    "wa-RoBERTa-large-dev-with-probs.txt",
    "wa-RoBERTa-large-seq512-dev-with-probs.txt",
    "ku-DeBERTa-large-dev-with-probs.txt",
    "ku-RoBERTa-large-dev-with-probs.txt"
]

# valid.json から正解ラベルを取得（JSONL形式対応）
true_labels = []
with open('valid.json', 'r', encoding='utf-8') as f:
    for line in f:
        item = json.loads(line)  # 各行を個別にJSONとして読み込む
        true_labels.append(item['writer_sentiment'])  # ラベル変換なし

# 保存されたQWKスコアの結果を読み込む
with open('qwk_scores_filtered.json', 'r') as f:
    qwk_scores = json.load(f)

# ベストスコアの重みを取得
best_weights = max(qwk_scores, key=qwk_scores.get)
best_weights = tuple(map(int, best_weights.strip("()").split(", ")))

# 多数決によるアンサンブル
final_predictions = []

# 各行の多数決を取る
num_lines = sum(1 for _ in open(file_paths[0]))  # 最初のファイルの行数

for line_idx in range(num_lines):
    label_votes = []  # 各ファイルから取得したラベルのリスト

    for file_path, weight in zip(file_paths, best_weights):
        if weight == 0:
            continue  # 無視するモデル

        with open(file_path, 'r') as f:
            for i, line in enumerate(f):
                if i == line_idx:
                    parts = line.strip().split(", ")
                    probs = [float(prob.split(": ")[1]) for prob in parts[1:]]
                    max_label = np.argmax(probs) - 2
                    label_votes.append(max_label)
                    break

    # 多数決を適用
    most_common_label = Counter(label_votes).most_common(1)[0][0]
    final_predictions.append(most_common_label)

# 予測結果をファイルに保存
with open('final_predictions.txt', 'w') as f:
    for label in final_predictions:
        f.write(f"{label}\n")

# QWK の計算
final_qwk = cohen_kappa_score(final_predictions, true_labels, weights='quadratic')

print("\nBest Weights:", best_weights)
print("QWK Score after Majority Voting:", final_qwk)



Best Weights: (0, 0, 1, 0, 0, 1, 0, 1, 0, 1)
QWK Score after Majority Voting: 0.6455160090072634
