## Ensembling by finding the most frequent label for each sample from public notebooks
This notebook presents an automated ensemble model using predicted results from the most relevant public notebooks. The goal is to show the power of a simple ensembling technique on the final score.

In [None]:
import os
import numpy as np
import pandas as pd
from scipy import stats
import plotly.express as px

In [None]:
targetName = 'prediction'
competitionDir = '/kaggle/input/contradictory-my-dear-watson'
submission = pd.read_csv('../input/contradictory-my-dear-watson/sample_submission.csv')

## Import any number of public notebooks to update the ensemble prediction¶

In [None]:
preds = []
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if (dirname != competitionDir) & ('.csv' in filename):
            df = pd.read_csv(os.path.join(dirname, filename))
            if len(df) == len(submission):
                try:
                    preds.append(df[targetName])
                except Exception:
                    pass

## Save ensemble prediction to csv¶

In [None]:
submission[targetName] = stats.mode(np.array(preds), axis=0)[0].transpose()
submission.to_csv("submission.csv", index=False)

## Distribution of the predicted classes¶

In [None]:
target_df = pd.DataFrame(np.log(submission[targetName].value_counts())).reset_index()
target_df.columns = [targetName, 'Count']
fig = px.bar(data_frame = target_df, 
             x = targetName,
             y = 'Count' , 
             color = "Count",
             color_continuous_scale="Emrld") 
fig.show()