In [None]:
import pandas as pd

df = pd.read_csv('../data/task2.csv')

In [None]:
import os
import json

draft_ids = list(df['Original_id'])

# input data
countries = list(df['Country'])
drafts = []
votes = list(df['Voting'])

path = '../data/task2'
for i in draft_ids:
    folder_path = os.path.join(path, str(i))
    files = os.listdir(folder_path)
    json_file = [file for file in files if file.endswith('EN.json')][0]
    with open(os.path.join(folder_path, json_file)) as f:
        draft = json.load(f)
    drafts.append(draft['Content'])

In [None]:
import random
import re
from tqdm import tqdm
from openai import OpenAI

client = OpenAI(
    api_key="sk-Ooc91cMDpYKCCnuIfmfe8nzJhZVo5IHJlfLSkJs9t3hAmSUG",
    base_url="https://hiapi.online/v1"
)

your_model_name = "gemini-3-flash-preview"

pred = []
detailed_logs = []

for i, (draft, country) in tqdm(enumerate(zip(drafts, countries))):


    system_prompt = f"You are a strategic diplomat for {country}. Analyze the UNSC draft resolution based on national interests before casting a formal vote."
    user_prompt = f"""
    Draft Resolution: {draft}

    Task:
    1. Analyze the core impact on {country}.
    2. Provide your final vote in the format: "RESULT: [Y/N/A]"
    """

    try:
        response = client.chat.completions.create(
            model=your_model_name,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            max_tokens=200,
            temperature=0.0,
            top_p=0.1,
            seed=42
        )

        raw_output = response.choices[0].message.content.strip()
        detailed_logs.append(raw_output)

        match = re.search(r"RESULT:\s*([YNA])", raw_output, re.IGNORECASE)
        if match:
            result = match.group(1).upper()
        else:
            clean_output = raw_output.upper()
            if 'RESULT: Y' in clean_output: result = 'Y'
            elif 'RESULT: N' in clean_output: result = 'N'
            elif 'RESULT: A' in clean_output: result = 'A'
            else:
                result = "INVALID"

    except Exception as e:
        print(f"API Error at index {i}: {e}")
        result = "ERROR"

    pred.append(result)

valid_count = sum(1 for p in pred if p in ['Y', 'N', 'A'])
print(f"\n执行完毕。有效预测率: {valid_count/len(pred):.2%}")

In [None]:
# calculate metrics
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_recall_fscore_support
from sklearn.metrics import roc_auc_score, average_precision_score, matthews_corrcoef
from sklearn.preprocessing import LabelEncoder, label_binarize
from imblearn.metrics import geometric_mean_score
import numpy as np

def calculate_metrics(pred, labels):
    label_encoder = LabelEncoder()
    all_classes = list(set(labels) | set(pred))  
    label_encoder.fit(all_classes)

    labels = label_encoder.transform(labels) 
    pred = label_encoder.transform(pred)  

    acc = accuracy_score(labels, pred)
    
    num_classes = len(label_encoder.classes_)
    true_labels_bin = label_binarize(labels, classes=list(range(num_classes)))
    pred_bin = label_binarize(pred, classes=list(range(num_classes)))  

    auc = roc_auc_score(true_labels_bin, pred_bin, multi_class='ovr', average='macro')
    pr_auc = average_precision_score(true_labels_bin, pred_bin, average='macro')

    balanced_acc = balanced_accuracy_score(labels, pred)
    prec, rec, f1, _ = precision_recall_fscore_support(labels, pred, average='macro')

    mcc = matthews_corrcoef(labels, pred)
    g_mean = geometric_mean_score(labels, pred, average='macro')

    print(f'Accuracy: {acc}')
    print(f'AUC: {auc}')
    print(f'Balanced Accuracy: {balanced_acc}')
    print(f'Precision: {prec}')
    print(f'Recall: {rec}')
    print(f'F1: {f1}')
    print(f'PR AUC: {pr_auc}')
    print(f'MCC: {mcc}')
    print(f'G-Mean: {g_mean}')

    print('Accuracy AUC Balanced_Acc Precision Recall F1 PR_AUC MCC G-Mean')
    print(f'{acc:.4f} {auc:.4f} {balanced_acc:.4f} {prec:.4f} {rec:.4f} {f1:.4f} {pr_auc:.4f} {mcc:.4f} {g_mean:.4f}')


In [None]:
calculate_metrics(pred, votes)