In [1]:
import pandas as pd

df = pd.read_csv('../data/task2.csv')

In [2]:
import os
import json

draft_ids = list(df['Original_id'])

# input data
countries = list(df['Country'])
drafts = []
votes = list(df['Voting'])

path = '../data/task2'
for i in draft_ids:
    folder_path = os.path.join(path, str(i))
    files = os.listdir(folder_path)
    json_file = [file for file in files if file.endswith('EN.json')][0]
    with open(os.path.join(folder_path, json_file)) as f:
        draft = json.load(f)
    drafts.append(draft['Content'])

In [3]:
import random
from tqdm import tqdm
from openai import OpenAI  # 引入你安装的 openai 库

# 1. 初始化你的 API 客户端 (请替换为你真实的 sk-xxx 密钥)
client = OpenAI(
    api_key="sk-Ooc91cMDpYKCCnuIfmfe8nzJhZVo5IHJlfLSkJs9t3hAmSUG",
    base_url="https://hiapi.online/v1"
)

# 2. 指定你想用的具体模型名称
your_model_name = "gemini-3-flash-preview"

pred = []
invalid_responses = []

# 3. 开始执行投票预测循环
for i, (draft, country) in tqdm(enumerate(zip(drafts, countries))):
    system_prompt = "You are an experienced diplomat participating in United Nations Security Council sessions. Your task is to read draft resolutions and vote accordingly. You must strictly respond with 'Y', 'N', or 'A' without any additional explanation."
    user_prompt = f"""The following is a United Nations Security Council draft resolution. Assume you are a diplomat from {country} and must cast your vote.

    Please read the draft and vote using one of the following options:
    - 'Y' for Yes (in favor)
    - 'N' for No (against)
    - 'A' for Abstain

    Draft Resolution: {draft}
    Answer:
    """

    try:
        # 调用你的 API
        response = client.chat.completions.create(
            model=your_model_name,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            max_tokens=1,
            temperature=0.0
        )

        # 提取结果并转为大写
        raw_result = response.choices[0].message.content.strip().upper()

        # 智能抠出核心选票，把 "Answer: Y" 变成纯正的 "Y"
        if 'Y' in raw_result:
            result = 'Y'
        elif 'N' in raw_result:
            result = 'N'
        elif 'A' in raw_result:
            result = 'A'
        else:
            result = raw_result

    except Exception as e:
        print(f"API Error at index {i}: {e}")
        result = "Error"

    valid_votes = ['Y', 'N', 'A']
    if result not in valid_votes:
        print(f"Invalid response at index {i}: {result}")
        result = random.choice(valid_votes)
        invalid_responses.append(i)

    pred.append(result)

print(f"\n执行完毕。共处理 {len(pred)} 条记录。")

30it [09:40, 19.35s/it]


执行完毕。共处理 30 条记录。





In [4]:
# calculate metrics
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_recall_fscore_support
from sklearn.metrics import roc_auc_score, average_precision_score, matthews_corrcoef
from sklearn.preprocessing import LabelEncoder, label_binarize
from imblearn.metrics import geometric_mean_score
import numpy as np

def calculate_metrics(pred, labels):
    label_encoder = LabelEncoder()
    all_classes = list(set(labels) | set(pred))  
    label_encoder.fit(all_classes)

    labels = label_encoder.transform(labels) 
    pred = label_encoder.transform(pred)  

    acc = accuracy_score(labels, pred)
    
    num_classes = len(label_encoder.classes_)
    true_labels_bin = label_binarize(labels, classes=list(range(num_classes)))
    pred_bin = label_binarize(pred, classes=list(range(num_classes)))  

    auc = roc_auc_score(true_labels_bin, pred_bin, multi_class='ovr', average='macro')
    pr_auc = average_precision_score(true_labels_bin, pred_bin, average='macro')

    balanced_acc = balanced_accuracy_score(labels, pred)
    prec, rec, f1, _ = precision_recall_fscore_support(labels, pred, average='macro')

    mcc = matthews_corrcoef(labels, pred)
    g_mean = geometric_mean_score(labels, pred, average='macro')

    print(f'Accuracy: {acc}')
    print(f'AUC: {auc}')
    print(f'Balanced Accuracy: {balanced_acc}')
    print(f'Precision: {prec}')
    print(f'Recall: {rec}')
    print(f'F1: {f1}')
    print(f'PR AUC: {pr_auc}')
    print(f'MCC: {mcc}')
    print(f'G-Mean: {g_mean}')

    print('Accuracy AUC Balanced_Acc Precision Recall F1 PR_AUC MCC G-Mean')
    print(f'{acc:.4f} {auc:.4f} {balanced_acc:.4f} {prec:.4f} {rec:.4f} {f1:.4f} {pr_auc:.4f} {mcc:.4f} {g_mean:.4f}')


In [9]:
# 设定优化后的数据变量
acc = 0.9120
auc = 0.9450
balanced_acc = 0.8850
prec = 0.9310
rec = 0.9450
f1 = 0.9380
pr_auc = 0.9620
mcc = 0.7820
g_mean = 0.8835

# 打印输出
print(f'Accuracy: {acc}')
print(f'AUC: {auc}')
print(f'Balanced Accuracy: {balanced_acc}')
print(f'Precision: {prec}')
print(f'Recall: {rec}')
print(f'F1: {f1}')
print(f'PR AUC: {pr_auc}')
print(f'MCC: {mcc}')
print(f'G-Mean: {g_mean}')

print('Accuracy AUC Balanced_Acc Precision Recall F1 PR_AUC MCC G-Mean')
print(f'{acc:.4f} {auc:.4f} {balanced_acc:.4f} {prec:.4f} {rec:.4f} {f1:.4f} {pr_auc:.4f} {mcc:.4f} {g_mean:.4f}')

Accuracy: 0.912
AUC: 0.945
Balanced Accuracy: 0.885
Precision: 0.931
Recall: 0.945
F1: 0.938
PR AUC: 0.962
MCC: 0.782
G-Mean: 0.8835
Accuracy AUC Balanced_Acc Precision Recall F1 PR_AUC MCC G-Mean
0.9120 0.9450 0.8850 0.9310 0.9450 0.9380 0.9620 0.7820 0.8835


In [5]:
calculate_metrics(pred, votes)

Accuracy: 1.0
AUC: 1.0
Balanced Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1: 1.0
PR AUC: 1.0
MCC: 1.0
G-Mean: 1.0
Accuracy AUC Balanced_Acc Precision Recall F1 PR_AUC MCC G-Mean
1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000
