In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random
pd.set_option('display.max_colwidth', 100)

In [2]:
class_name = 't'

# Training Set
# Load true labels
train_labels = pd.read_csv("../../radnlp_2024_train_val_20240731/en/main_task/train/label.csv")
train_path = "../prediction/ensemble/train/"

# Load predictions
# deepseek-reasoner (r1)
r1_train_1 = pd.read_csv(f"{train_path}deepseek-reasoner_train_01.csv")
r1_train_2 = pd.read_csv(f"{train_path}deepseek-reasoner_train_02.csv")
r1_train_3 = pd.read_csv(f"{train_path}deepseek-reasoner_train_03.csv")

# o1
o1_train_1 = pd.read_csv(f"{train_path}o1-mini_train_01.csv")
o1_train_2 = pd.read_csv(f"{train_path}o1-mini_train_02.csv")
o1_train_3 = pd.read_csv(f"{train_path}o1-mini_train_03.csv")

# Validation Set
val_labels = pd.read_csv("../../radnlp_2024_train_val_20240731/en/main_task/val/label.csv")
val_path = "../prediction/ensemble/val/"
# r1
r1_val_1 = pd.read_csv(f"{val_path}deepseek-reasoner_val_01.csv")
r1_val_2 = pd.read_csv(f"{val_path}deepseek-reasoner_val_02.csv")
r1_val_3 = pd.read_csv(f"{val_path}deepseek-reasoner_val_03.csv")
# o1
o1_val_1 = pd.read_csv(f"{val_path}o1-mini_val_01.csv")
o1_val_2 = pd.read_csv(f"{val_path}o1-mini_val_02.csv")
o1_val_3 = pd.read_csv(f"{val_path}o1-mini_val_03.csv")

In [3]:
# Concatenate labels
labels = pd.concat([train_labels, val_labels], ignore_index=True)

# Concatenate predictions for r1 (deepseek-reasoner)
r1_pred_1 = pd.concat([r1_train_1, r1_val_1], ignore_index=True)
r1_pred_2 = pd.concat([r1_train_2, r1_val_2], ignore_index=True)
r1_pred_3 = pd.concat([r1_train_3, r1_val_3], ignore_index=True)

# Concatenate predictions for o1 (o1-mini)
o1_pred_1 = pd.concat([o1_train_1, o1_val_1], ignore_index=True)
o1_pred_2 = pd.concat([o1_train_2, o1_val_2], ignore_index=True)
o1_pred_3 = pd.concat([o1_train_3, o1_val_3], ignore_index=True)

In [4]:
data = pd.DataFrame({
    'id': labels['id'],
    f'r1_1_{class_name}': r1_pred_1[class_name],
    f'r1_2_{class_name}': r1_pred_2[class_name],
    f'r1_3_{class_name}': r1_pred_3[class_name],
    f'o1_1_{class_name}': o1_pred_1[class_name],
    f'o1_2_{class_name}': o1_pred_2[class_name],
    f'o1_3_{class_name}': o1_pred_3[class_name],
    'label': labels[class_name],
})
print(data)

           id r1_1_t r1_2_t r1_3_t o1_1_t o1_2_t o1_3_t label
0       56344     T4     T4     T4     T4     T4     T4    T4
1      133166    T1c    T1c    T1c    T1c    T1c    T1c   T1c
2      165742     T3     T3     T3     T3     T3     T3    T3
3      404886     T4     T4     T4     T4     T4     T4    T4
4      463397    T2b    T2b    T2b    T2b    T2b    T2b   T2b
..        ...    ...    ...    ...    ...    ...    ...   ...
157  15448681    Tis    Tis    Tis    Tis    Tis     T0   Tis
158  15532322    T1c    T1c    T1c    T1c    T1c    T1c   T1c
159  16066820    T1c    T1c    T1c    T1c    T1c    T1c   T1c
160  16191878    T1b    T1b    T1b    T1b    T1b    T1b   Tis
161  16572985    T1b    T1b    T1b    T1b    T1b    T1b   Tis

[162 rows x 8 columns]


In [None]:
data.columns.tolist()[1:-1]

['r1_1_t', 'r1_2_t', 'r1_3_t', 'o1_1_t', 'o1_2_t', 'o1_3_t']

In [6]:
import pandas as pd
from collections import defaultdict



# 1. Calculate model accuracies
model_columns = data.columns.tolist()[1:-1]
accuracies = {}

for model in model_columns:
    correct_predictions = (data[model] == data['label']).sum()
    total_predictions = len(data)
    accuracies[model] = correct_predictions / total_predictions

# 2. Create weighted voting system with tie-breakers
def enhanced_vote(row):
    vote_weights = defaultdict(float)
    
    # Add weights based on model accuracy
    for model in model_columns:
        class_val = row[model]
        vote_weights[class_val] += accuracies[model]
    
    # Get max weight and candidates
    max_weight = max(vote_weights.values())
    candidates = [k for k, v in vote_weights.items() if v == max_weight]
    
    # Enhanced tie-breaking: use class frequency as secondary criterion
    if len(candidates) > 1:
        class_counts = data['label'].value_counts().to_dict()
        return max(candidates, key=lambda x: class_counts.get(x, 0))
    
    return candidates[0]

# 3. Apply the enhanced voting
data['enhanced_prediction'] = data.apply(enhanced_vote, axis=1)

# 4. Calculate accuracy
accuracy = (data['enhanced_prediction'] == data['label']).mean()
print(f"Enhanced Accuracy: {accuracy:.2%}")
print("\nResults:")
print(data.head())

Enhanced Accuracy: 90.12%

Results:
       id r1_1_t r1_2_t r1_3_t o1_1_t o1_2_t o1_3_t label enhanced_prediction
0   56344     T4     T4     T4     T4     T4     T4    T4                  T4
1  133166    T1c    T1c    T1c    T1c    T1c    T1c   T1c                 T1c
2  165742     T3     T3     T3     T3     T3     T3    T3                  T3
3  404886     T4     T4     T4     T4     T4     T4    T4                  T4
4  463397    T2b    T2b    T2b    T2b    T2b    T2b   T2b                 T2b


In [7]:
y_true = data['label']
y_pred = data['enhanced_prediction']

acc = accuracy_score(y_true, y_pred)
print(f"Accuracy: {acc:.4f}")

Accuracy: 0.9012


In [8]:
filtered_df = data[data['enhanced_prediction'] != data['label']]

filtered_df

Unnamed: 0,id,r1_1_t,r1_2_t,r1_3_t,o1_1_t,o1_2_t,o1_3_t,label,enhanced_prediction
9,1185427,T2a,T2a,T2a,T2a,T1c,T2a,T1c,T2a
11,1679413,T4,T4,T4,T4,T4,T2b,T2b,T4
12,1736655,T0,T4,T4,T3,T0,T2a,T3,T4
18,2195733,T2a,T3,T2a,T2a,T2a,T3,T3,T2a
22,2343928,T2b,T2b,T2b,T2b,T2b,T2b,T4,T2b
36,4760374,T4,T4,T4,T2a,T2a,T2a,T2a,T4
44,6363776,T4,T4,T4,T4,T4,T4,T3,T4
77,10320785,T4,T4,T4,T4,T4,T0,T3,T4
97,15045923,T3,T3,T3,T3,T3,T3,T2b,T3
99,15095613,T2b,T2b,T2b,T2b,T2b,T2b,T4,T2b


In [9]:
accuracies

{'r1_1_t': 0.8888888888888888,
 'r1_2_t': 0.8950617283950617,
 'r1_3_t': 0.8703703703703703,
 'o1_1_t': 0.8024691358024691,
 'o1_2_t': 0.8518518518518519,
 'o1_3_t': 0.8271604938271605}