## Import libraries for ensemble techniques

In [1]:
import pandas as pd
import numpy as np 

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
    fbeta_score,
    log_loss,
    precision_score,
    recall_score,
    f1_score,
)

## Load valid_df and test_df

In [2]:
valid_df = pd.read_csv('valid_df.csv')
valid_df.head(5)

Unnamed: 0,image_id,labels
0,1611662564_noise_4.jpg,4
1,801551318_hue3_4.jpg,4
2,train-cbb-434.jpg,0
3,3078964330_hue2_4.jpg,4
4,175989862_hue4_2.jpg,2


### TEST SET !!

In [3]:
test_df = pd.read_csv('test_df.csv')

## Load csv files

In [4]:
csv_files = ['example_best_validation_probabilities.csv', 'example2_best_validation_probabilities.csv'] 

In [5]:
model_count = len(csv_files)

In [6]:
for i,file in enumerate(csv_files): 
    if i == 0: 
        merged_df = pd.read_csv(file)
        #move the image_id column to the first
        image_name = merged_df.columns[-1]
        merged_df = merged_df[[image_name] + merged_df.columns[:-1].tolist()]
        #sort the order of validation true labels and get the labels
        valid_df = merged_df.merge(valid_df, on = 'image_id', how = 'left')
        valid_df = valid_df[['image_id', 'labels']]
        true_val_labels = valid_df['labels'].values
    else: 
        df = pd.read_csv(file)
        merged_df = merged_df.merge(df, on = 'image_id', how = 'left', suffixes = ('', '_model'+str(i+1)))

## Initialize Eval Table for Ensembles

In [7]:
# Initialize an empty DataFrame with specific column names
columns = ['Method','Log Loss', 'Accuracy', 'Precision', 'Recall', 'F1 Score', 'F2 Score']
val_ensemble_df = pd.DataFrame(columns=columns)
test_ensemble_df = pd.DataFrame(columns=columns)

In [8]:
def add_ensemble (ensemble_df, true_labels, pred_labels, pred_probs, method, mode):
    logloss = log_loss(true_labels, pred_probs)
    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, average="weighted")
    recall = recall_score(true_labels, pred_labels, average="weighted")
    f1 = f1_score(true_labels, pred_labels, average="weighted")
    f2 = fbeta_score(true_labels, pred_labels, beta=2, average="weighted")

    print(f"Metrics for {method} on {mode} set")
    print(f"Log Loss: {logloss:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"F2 Score: {f2:.4f}")
    
    ensemble_df.loc[len(ensemble_df)] = [method, logloss, accuracy, precision, recall, f1, f2]
    return ensemble_df

## Load softmax probabilities
Note: Assuming that all csv files contain a last column stating the image_ids. Here, we only extract the true labels from the first file

## Ensemble 1 - Soft Voting: Averaging

In [9]:
average_df = pd.DataFrame()

average_df['image_id'] = merged_df['image_id']
average_df['avg_prob_class0'] = merged_df[['prob_class_0'] + [f'prob_class_0_model{i+2}' for i in range(model_count - 1)]].mean(axis=1)
average_df['avg_prob_class1'] = merged_df[['prob_class_1'] + [f'prob_class_1_model{i+2}' for i in range(model_count - 1)]].mean(axis=1)
average_df['avg_prob_class2'] = merged_df[['prob_class_2'] + [f'prob_class_2_model{i+2}' for i in range(model_count - 1)]].mean(axis=1)
average_df['avg_prob_class3'] = merged_df[['prob_class_3'] + [f'prob_class_3_model{i+2}' for i in range(model_count - 1)]].mean(axis=1)
average_df['avg_prob_class4'] = merged_df[['prob_class_4'] + [f'prob_class_4_model{i+2}' for i in range(model_count - 1)]].mean(axis=1)

average_df.to_csv('test_average.csv', index=False)

In [10]:
# Get predictions for logloss 
pred_val_probs = np.array([average_df.iloc[i, 1:].values for i in range(len(average_df))])

In [11]:
# Get the column index (integer) of the maximum value in each row, starting from the second column
pred_val_labels = average_df.values[:, 1:].argmax(axis=1) 

In [12]:
val_ensemble_df = add_ensemble(val_ensemble_df, true_val_labels, pred_val_labels, pred_val_probs, "Averaging", "Val")

Metrics for Averaging on Val set
Log Loss: 0.8583
Accuracy: 0.6678
Precision: 0.6717
Recall: 0.6678
F1 Score: 0.6675
F2 Score: 0.6672




In [13]:
val_ensemble_df

Unnamed: 0,Method,Log Loss,Accuracy,Precision,Recall,F1 Score,F2 Score
0,Averaging,0.858309,0.66777,0.671675,0.66777,0.667488,0.667171


Remember add for test set. 

## Ensemble 2 - Stacking Classifier

In [14]:
merged_df

Unnamed: 0,image_id,prob_class_0,prob_class_1,prob_class_2,prob_class_3,prob_class_4,prob_class_0_model2,prob_class_1_model2,prob_class_2_model2,prob_class_3_model2,prob_class_4_model2
0,2932210794_rotate_4.jpg,0.099557,0.069480,0.062635,0.021433,0.746895,0.099557,0.069480,0.062635,0.021433,0.746895
1,2090870733_shadow_4.jpg,0.001777,0.000660,0.040215,0.950968,0.006381,0.001777,0.000660,0.040215,0.950968,0.006381
2,2347718160_hue3_0.jpg,0.146259,0.352487,0.067375,0.031296,0.402584,0.146259,0.352487,0.067375,0.031296,0.402584
3,train-cmd-994_shear_3.jpg,0.113118,0.086233,0.291113,0.292378,0.217159,0.113118,0.086233,0.291113,0.292378,0.217159
4,1439675422_hue4_0.jpg,0.634798,0.272170,0.016158,0.032814,0.044060,0.634798,0.272170,0.016158,0.032814,0.044060
...,...,...,...,...,...,...,...,...,...,...,...
14795,4239036041_rotate_1.jpg,0.765609,0.208602,0.004535,0.004617,0.016637,0.765609,0.208602,0.004535,0.004617,0.016637
14796,train-cmd-8_hue3_3.jpg,0.023618,0.018197,0.690077,0.249425,0.018683,0.023618,0.018197,0.690077,0.249425,0.018683
14797,2779441922_hue1_2.jpg,0.205329,0.132130,0.342390,0.258805,0.061347,0.205329,0.132130,0.342390,0.258805,0.061347
14798,847847826_rotate_1.jpg,0.002532,0.988133,0.002781,0.002482,0.004072,0.002532,0.988133,0.002781,0.002482,0.004072


In [15]:
#load the arrays of softmax probabilities 
model_probs = []
for j in range (model_count): 
    if j == 0: 
        column_names = [f'prob_class_{i}' for i in range(5)]
    else: 
        column_names = [f'prob_class_{i}_model{j+1}' for i in range(5)]
    model_prob = merged_df[column_names].values
    model_probs.append(model_prob)

In [16]:
# Stack all model probabilities horizontally
X_stacked = np.hstack(model_probs)

In [17]:
# Initialize and fit the logistic regression model
logistic_classifier = LogisticRegression(solver='lbfgs', max_iter=1000)
logistic_classifier.fit(X_stacked, true_val_labels)
# Make predictions on the validation set
log_val_predlabel = logistic_classifier.predict(X_stacked)
log_val_predprob = logistic_classifier.predict_proba(X_stacked)


In [18]:
val_ensemble_df = add_ensemble(val_ensemble_df, true_val_labels, log_val_predlabel, log_val_predprob, "LogReg", "Val")

Metrics for LogReg on Val set
Log Loss: 0.8936
Accuracy: 0.6706
Precision: 0.6719
Recall: 0.6706
F1 Score: 0.6708
F2 Score: 0.6706


In [19]:
val_ensemble_df

Unnamed: 0,Method,Log Loss,Accuracy,Precision,Recall,F1 Score,F2 Score
0,Averaging,0.858309,0.66777,0.671675,0.66777,0.667488,0.667171
1,LogReg,0.893574,0.670608,0.671944,0.670608,0.670828,0.670591


Remember add for test set. 