In [1]:
import pandas as pd
import numpy as np

# Load the submission files
version_numbers = [2, 9, 11, 20, '24_hard_voting', '26_soft', '30_soft']
submissions = []
for v in version_numbers:
    file_name = f'submissions/submission_v{v}.csv' if isinstance(v, int) else f'submissions/submission_v{v}.csv'
    submission = pd.read_csv(file_name)
    submissions.append(submission)

# Convert labels to probabilities
def convert_labels_to_probabilities(submissions):
    num_classes = len(submissions[0]['label'].unique())
    submissions_probs = []

    for submission in submissions:
        label_probs = pd.get_dummies(submission['label']).astype(float)
        for _class in range(num_classes):
            if _class not in label_probs.columns:
                label_probs[_class] = 0
        label_probs = label_probs[sorted(label_probs.columns)]
        submissions_probs.append(label_probs)
    return submissions_probs

submissions_probs = convert_labels_to_probabilities(submissions)

# Calculate weights based on validation F1 scores
validation_f1_scores = [0.9048, 0.912, 0.929, 0.926, 0.8765, 0.9253, 0.9131]
weights = np.array(validation_f1_scores) / sum(validation_f1_scores)

# Ensemble predictions using the calculated weights
def ensemble_predictions(submissions_probs, weights):
    ensemble_probs = sum(submission_probs * weight for submission_probs, weight in zip(submissions_probs, weights))
    return ensemble_probs

ensemble_probs = ensemble_predictions(submissions_probs, weights)

# Get class labels from the ensembled probabilities
def get_class_labels(ensemble_probs):
    return ensemble_probs.idxmax(axis=1).astype(int)

ensemble_labels = get_class_labels(ensemble_probs)

# Save ensembled predictions to a CSV file
def save_ensembled_predictions(ids, ensemble_labels, output_file):
    ensemble_df = pd.DataFrame({'id': ids, 'label': ensemble_labels})
    ensemble_df.to_csv(output_file, index=False)

ids = submissions[0]['id']
output_file = 'submissions/ensemble_submission_f1_based.csv'
save_ensembled_predictions(ids, ensemble_labels, output_file)
