In [1]:
import pandas as pd
import numpy as np
from functools import reduce
import warnings
import shutil
from scipy import stats

warnings.filterwarnings('ignore')

class OptimizedEnsemble:
    def __init__(self, random_seed=42):
        self.random_seed = random_seed

    def create_minimal_noise_variants(self, base_preds):
        variants = []
        n = len(base_preds)
        gaussian_noise = np.random.normal(0, 0.00025, n)
        variants.append(base_preds + gaussian_noise)
        symmetric_noise = np.random.uniform(-0.00015, 0.00015, n)
        variants.append(base_preds + symmetric_noise)
        targeted_noise = np.where(
            (base_preds < np.percentile(base_preds, 5)) |
            (base_preds > np.percentile(base_preds, 95)),
            np.random.uniform(-0.0001, 0.0001, n), 0
        )
        variants.append(base_preds + targeted_noise)
        laplace_noise = np.random.laplace(0, 0.00015, n)
        laplace_noise = np.clip(laplace_noise, -0.0003, 0.0003)
        variants.append(base_preds + laplace_noise)
        variants.append(base_preds.copy())
        return variants

    def create_weighted_ensemble(self, variants, strategy='median_weighted'):
        if strategy == 'median':
            return np.median(variants, axis=0)
        elif strategy == 'mean':
            return np.mean(variants, axis=0)
        elif strategy == 'median_weighted':
            median_preds = np.median(variants, axis=0)
            weights = []
            for v in variants:
                deviation = np.mean(np.abs(v - median_preds))
                weight = 1.0 / (deviation + 1e-6)
                weights.append(weight)
            weights = np.array(weights) / np.sum(weights)
            return np.average(variants, axis=0, weights=weights)
        else:
            return stats.trim_mean(variants, 0.2, axis=0)

    def apply_robust_post_processing(self, predictions):
        processed = predictions.copy()
        median_val = np.median(processed)
        mad = np.median(np.abs(processed - median_val))
        upper_bound = median_val + 3.5 * mad * 1.4826
        lower_bound = median_val - 3.5 * mad * 1.4826
        processed = np.where(processed > upper_bound,
                             processed * 0.99995 + median_val * 0.00005, processed)
        processed = np.where(processed < lower_bound,
                             processed * 0.99995 + median_val * 0.00005, processed)
        processed = np.round(processed, 8)
        return processed

    def create_ensemble_set_repeated(self, base_submission, repeat_n=10):
        base_preds = base_submission['loan_paid_back'].values
        for i in range(1, repeat_n + 1):
            print(f"\n== Ensemble attempt {i} ==")
            np.random.seed(self.random_seed + i)
            variants = self.create_minimal_noise_variants(base_preds)
            ensemble_preds = self.create_weighted_ensemble(variants, strategy='median_weighted')
            final_preds = self.apply_robust_post_processing(ensemble_preds)
            result_df = base_submission.copy()
            result_df['loan_paid_back'] = final_preds
            filename = f"submission_median_weighted_{i}.csv"
            result_df.to_csv(filename, index=False)
            print(f"Saved: {filename} (Mean: {np.mean(final_preds):.6f}, Std: {np.std(final_preds):.6f})")
        shutil.copy(f"submission_median_weighted_{repeat_n}.csv", "submission.csv")
        print(f"\nðŸŽ¯ Recommend submit: submission.csv (from last repeat)")

def run_ensemble_repeated():
    ensemble = OptimizedEnsemble(random_seed=42)
    base_sub = pd.read_csv('/kaggle/input/ps-s5e11-hb11g-2/submission.csv')
    ensemble.create_ensemble_set_repeated(base_sub, repeat_n=17)

if __name__ == '__main__':
    run_ensemble_repeated()



== Ensemble attempt 1 ==
Saved: submission_median_weighted_1.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attempt 2 ==
Saved: submission_median_weighted_2.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attempt 3 ==
Saved: submission_median_weighted_3.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attempt 4 ==
Saved: submission_median_weighted_4.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attempt 5 ==
Saved: submission_median_weighted_5.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attempt 6 ==
Saved: submission_median_weighted_6.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attempt 7 ==
Saved: submission_median_weighted_7.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attempt 8 ==
Saved: submission_median_weighted_8.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attempt 9 ==
Saved: submission_median_weighted_9.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attempt 10 ==
Saved: submission_median_weighted_10.csv (Mean: 0.680436, Std: 0.259379)

== Ensemble attem