## Load submission files

In [12]:
import pandas as pd
import numpy as np

# Load the submission files
version_numbers = [2, 9, 19, 20, '24_hard_voting']
submissions = []
for v in version_numbers:
    file_name = f'submissions/submission_v{v}.csv' if isinstance(v, int) else f'submissions/submission_v{v}.csv'
    submission = pd.read_csv(file_name)
    submissions.append(submission)

## Convert class labels to probabilities

In [13]:
def convert_labels_to_probabilities(submissions):
    num_classes = len(submissions[0]['label'].unique())
    submissions_probs = []

    for submission in submissions:
        label_probs = pd.get_dummies(submission['label']).astype(float)
        for _class in range(num_classes):
            if _class not in label_probs.columns:
                label_probs[_class] = 0
        label_probs = label_probs[sorted(label_probs.columns)]
        submissions_probs.append(label_probs)
    return submissions_probs

submissions_probs = convert_labels_to_probabilities(submissions)

## Calculate F1 score

In [15]:
def calculate_weights(f1_macro_scores):
    return np.array(f1_macro_scores) / sum(f1_macro_scores)

f1_macro_scores = [0.604292704, 0.5999046962, 0.5882138172, 0.6165329653]
weights = calculate_weights(f1_macro_scores)

## Ensemble based on calculated weights

In [17]:
def ensemble_predictions(submissions_probs, weights):
    ensemble_probs = sum(submission_probs * weight for submission_probs, weight in zip(submissions_probs, weights))
    return ensemble_probs

ensemble_probs = ensemble_predictions(submissions_probs, weights)

def get_class_labels(ensemble_probs):
    return ensemble_probs.idxmax(axis=1).astype(int)

ensemble_labels = get_class_labels(ensemble_probs)

## Save file

In [19]:
def save_ensembled_predictions(ids, ensemble_labels, output_file):
    ensemble_df = pd.DataFrame({'id': ids, 'label': ensemble_labels})
    ensemble_df.to_csv(output_file, index=False)

ids = submissions[0]['id']
output_file = 'submissions/ensemble_submission.csv'
save_ensembled_predictions(ids, ensemble_labels, output_file)