In [None]:
import pandas as pd
import numpy as np

def get_class_labels(ensemble_probs):
    return ensemble_probs.idxmax(axis=1).astype(int)

def save_ensembled_predictions(ids, ensemble_labels, output_file):
    ensemble_df = pd.DataFrame({'id': ids, 'label': ensemble_labels})
    ensemble_df.to_csv(output_file, index=False)

# Load the best weighted ensemble submission
best_weighted_submission = pd.read_csv('submissions/ensemble_submission_v1.csv')
best_hard_voting_submission = pd.read_csv('submissions/ensemble_submission_v2.csv')

num_classes = len(best_weighted_submission['label'].unique())

# Convert the best weighted ensemble labels to probabilities
best_weighted_probs = pd.get_dummies(best_weighted_submission['label']).astype(float)
for _class in range(num_classes):
    if _class not in best_weighted_probs.columns:
        best_weighted_probs[_class] = 0
best_weighted_probs = best_weighted_probs[sorted(best_weighted_probs.columns)]

# Convert the best hard voting ensemble labels to probabilities
best_hard_voting_probs = pd.get_dummies(best_hard_voting_submission['label']).astype(float)
for _class in range(num_classes):
    if _class not in best_hard_voting_probs.columns:
        best_hard_voting_probs[_class] = 0
best_hard_voting_probs = best_hard_voting_probs[sorted(best_hard_voting_probs.columns)]

# Average the probabilities from the two best ensembles
final_probs = (best_weighted_probs + best_hard_voting_probs) / 2

# Get the final predictions
final_labels = get_class_labels(final_probs)

# Save the final predictions
ids = best_weighted_submission['id']
output_file = 'submissions/final_combined_ensemble_submission.csv'
save_ensembled_predictions(ids, final_labels, output_file)