Original Notebook:

https://www.kaggle.com/code/ducknew/drw-blend-h-v-remix-higher-changepoint

In [2]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import minmax_scale

import warnings
warnings.filterwarnings('ignore')

def iBlend(path_to_ds, file_short_names, sls):

    def tida(sls):
        
        def read_subm(sls,i):
            tnm = sls["subm"][i]["name"]
            # Check if this is the submission(47) file
            if tnm == "submission(47)":
                FiN = "/kaggle/input/convergence-drw/" + tnm + ".csv"
            else:
                FiN = sls["path"] + tnm + ".csv"
            return pd.read_csv(FiN).rename(columns={'target':tnm, sls["target"]:tnm})
        
        dfs_subm = [read_subm(sls,i) for i in range(len(sls["subm"]))]
        df_subms = pd.merge(dfs_subm[0],  dfs_subm[1], on=['ID'])
        
        for i in range(2, len(sls["subm"])): 
            df_subms = pd.merge(df_subms, dfs_subm[i], on=['ID'])
            
        cols = [col for col in df_subms.columns if col != "ID"]
        short_name_cols = [c.replace(sls["prefix"], '') for c in cols]
        corrects  = [wt for wt in sls["subwts"]]
        weights   = [subm['weight'] for subm in sls["subm"]]
        corrects2 = [wt for wt in sls["subwts2"]]
        weights2  = [subm['weight'] for subm in sls["subm2"]]
        
        def alls(x, cs=cols):
            tes = {c: x[c] for c in cs}.items()
            subms_sorted = [
              t[0].replace(sls["prefix"], '')
              for t in sorted(tes,key=lambda k:k[1],reverse=True if sls["sort"]=='desc' else False)]
            return subms_sorted
        
        def correct(x, cs=cols, w=weights, cw=corrects, w2=weights2, cw2=corrects2):
            ic = [x['alls'].index(c) for c in short_name_cols]
            if x['abs(mx-m)'] > 0.74:
                cS = [x[cols[j]] * (w [j] + cw [ic[j]]) for j in range(len(cols))]
            else:
                cS = [x[cols[j]] * (w2[j] + cw2[ic[j]]) for j in range(len(cols))]
            return sum(cS)

        def amxm(x, cs=cols):
            list_values = x[cs].to_list()
            mxm = abs(max(list_values)-min(list_values))
            return mxm

        df_subms['abs(mx-m)']   = df_subms.apply(lambda x: amxm   (x), axis=1)
        
        df_subms['alls']        = df_subms.apply(lambda x: alls   (x), axis=1)
        df_subms[sls["target"]] = df_subms.apply(lambda x: correct(x), axis=1)
        
        schema_rename = { old_nc:new_shnc for old_nc, new_shnc in zip(cols, short_name_cols) }
        
        df_subms = df_subms.rename(columns=schema_rename)
        df_subms = df_subms.rename(columns={sls["target"]:"ensemble"})
        
        df_subms.insert(loc=1, column=' _ ', value=['   '] * sls["q_rows"])
        
        df_subms[' _ '] = df_subms[' _ '].astype(str)
        pd.set_option('display.max_rows',100)
        pd.set_option('display.float_format', '{:.3f}'.format)
        vcols = ['ID'] + [' _ '] + short_name_cols + [' _ '] + ['abs(mx-m)'] + [' _ '] + ['alls'] + [' _ '] + ['ensemble']
        df_subms = df_subms[vcols]
        display(df_subms.head(100))
        pd.set_option('display.float_format', '{:.7f}'.format)
        df_subms = df_subms.rename(columns={"ensemble":sls["target"]})
        
        return df_subms
        

    sample_subm = pd.read_csv(path_to_ds + file_short_names[1] + ".csv")

    
    def ensemble_tida(sls,submission=sample_subm):   
        sls['sort'] = 'desc'
        dfs = tida(sls)
        dfD = dfs[['ID', sls['target']]]
        dfD.to_csv(f'tida_desc.csv', index=False)
        sls['sort'] = 'asc'
        dfs = tida(sls)
        dfA = dfs[['ID', sls['target']]]
        dfA.to_csv(f'tida_asc.csv',  index=False)
        target,d,a = sls['target'],sls['desc'],sls['asc']
        submission[target] = dfD[target] * d + a * dfA[target]
        return submission

    submission = ensemble_tida(sls)
    
    return submission


def train_prophet_enhancement(df_ensemble, prophet_weight=0.01):
    """
    Train a Prophet model on the ensemble predictions and blend with small weight
    
    Parameters:
    df_ensemble: DataFrame with ID and prediction columns
    prophet_weight: Weight for Prophet predictions (default 0.01)
    """
    
    print("Training Prophet model for ensemble enhancement...")
    
    # Prepare data for Prophet
    # Use a more recent base date and scale IDs to fit within reasonable date range
    prophet_df = pd.DataFrame()
    # Scale IDs to hours instead of days to avoid overflow
    base_date = pd.to_datetime('2024-01-01')
    # Convert IDs to hours (538150 hours ≈ 61 years, which is manageable)
    prophet_df['ds'] = base_date + pd.to_timedelta(df_ensemble['ID'] - 1, unit='H')
    prophet_df['y'] = df_ensemble['prediction']
    
    # Advanced Prophet configuration with tuned hyperparameters
    model = Prophet(
        # Growth parameters
        growth='linear',
        changepoint_prior_scale=0.06,  # More flexible trend changes
        changepoint_range=0.9,         # Allow changepoints through 90% of data
        n_changepoints=75,             # More changepoints for complex patterns
        
        # Seasonality parameters
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,
        seasonality_mode='multiplicative',  # Better for varying amplitude
        seasonality_prior_scale=10.0,      # Strong seasonality
        
        # Holiday effects (can add specific holidays if known)
        holidays_prior_scale=10.0,
        
        # Uncertainty intervals
        interval_width=0.92,
        uncertainty_samples=1000,
        
        # MCMC sampling for better uncertainty estimates
        mcmc_samples=0,  # Set to 300 for full Bayesian inference (slower)
    )
    
    # Add custom seasonalities for potential patterns
    # Adjust periods to work with hourly data
    model.add_seasonality(
        name='pattern_730h',  # ~30 days in hours
        period=730,
        fourier_order=5,
        prior_scale=10
    )
    
    model.add_seasonality(
        name='pattern_2190h',  # ~3 months in hours
        period=2190,
        fourier_order=10,
        prior_scale=5
    )
    
    # Add regressors if we have additional features from the ensemble
    # For example, we could use the variance or ranking information
    if 'abs(mx-m)' in df_ensemble.columns:
        prophet_df['variance'] = df_ensemble['abs(mx-m)']
        model.add_regressor('variance', prior_scale=5.0)
    
    # Fit the model
    print("Fitting Prophet model...")
    model.fit(prophet_df)
    
    # Make predictions
    future = prophet_df[['ds']].copy()  # Use the same dataframe for predictions
    if 'variance' in prophet_df.columns:
        future['variance'] = prophet_df['variance']
    
    forecast = model.predict(future)
    
    # Extract predictions
    prophet_predictions = forecast['yhat'].values
    
    # # Blend with ensemble
    ensemble_predictions = df_ensemble['prediction'].values
    final_predictions = (1 - prophet_weight) * ensemble_predictions + prophet_weight * prophet_predictions
    # scaled_variance = minmax_scale(df_ensemble['abs(mx-m)'], feature_range=(0.09, 0.095))
    # final_predictions = (1 - scaled_variance) * ensemble_predictions + scaled_variance * prophet_predictions
    
    # Create final dataframe
    df_final = pd.DataFrame({
        'ID': df_ensemble['ID'],
        'prediction': final_predictions
    })
    
    # Print enhancement statistics
    print(f"\nProphet Enhancement Statistics:")
    print(f"Mean adjustment: {np.mean(prophet_predictions - ensemble_predictions):.6f}")
    print(f"Std adjustment: {np.std(prophet_predictions - ensemble_predictions):.6f}")
    print(f"Max adjustment: {np.max(np.abs(prophet_predictions - ensemble_predictions)):.6f}")
    
    # Plot components if needed (commented out for production)
    # model.plot_components(forecast)
    
    return df_final, model, forecast


# Main execution
path_to_ds ='/kaggle/input/15-juli-2025-drw/submission '
file_short_names = ['0.83975','0.86767','0.88377','0.89178','submission(47)']

params = {
      'path'  : path_to_ds,                                 
      'sort'  : "dynamic",
      'target': "prediction",
      'q_rows': 538_150,
      'prefix': "subm_",
      'desc'  : 0.30,
      'asc'   : 0.70,
      'subwts': [+0.20, +0.10, -0.05,-0.10,-0.15],
      'subm'  : [
         { 'name':file_short_names[0],'weight':0.20, },
         { 'name':file_short_names[1],'weight':0.20, },
         { 'name':file_short_names[2],'weight':0.21, },
         { 'name':file_short_names[3],'weight':0.23, },
         { 'name':file_short_names[4],'weight':0.30, },
      ],
      'subwts2': [+0.18, +0.09, -0.04,-0.09,-0.14],
      'subm2'  : [
         { 'name':file_short_names[0],'weight':0.19, },
         { 'name':file_short_names[1],'weight':0.20, },
         { 'name':file_short_names[2],'weight':0.21, },
         { 'name':file_short_names[3],'weight':0.22, },
         { 'name':file_short_names[4],'weight':0.31, },
      ]
    }

# Step 1: Create ensemble predictions
df_ensemble = iBlend(path_to_ds, file_short_names, params)

# Step 2: Apply Prophet enhancement
df_final, prophet_model, forecast = train_prophet_enhancement(df_ensemble, prophet_weight=0.09)

# Step 3: Save final predictions
df_final.to_csv('submission.csv', index=False)

print("\nFinal submission saved to 'submission.csv'")
print(f"Shape: {df_final.shape}")
print(f"Prediction range: [{df_final['prediction'].min():.6f}, {df_final['prediction'].max():.6f}]")

# Display sample predictions
display(df_final.head(20))

Unnamed: 0,ID,_,0.83975,0.86767,0.88377,0.89178,submission(47),_.1,abs(mx-m),_.2,alls,_.3,ensemble
0,1,,0.009,-0.107,-0.19,-0.191,-0.188,,0.2,,"[0.83975, 0.86767, submission(47), 0.88377, 0....",,-0.116
1,2,,0.249,0.416,0.215,0.196,0.258,,0.219,,"[0.86767, submission(47), 0.83975, 0.88377, 0....",,0.34
2,3,,-1.531,-1.981,-1.099,-0.971,-1.004,,1.01,,"[0.89178, submission(47), 0.88377, 0.83975, 0....",,-1.247
3,4,,-0.236,-0.237,-0.154,-0.157,-0.206,,0.083,,"[0.88377, 0.89178, submission(47), 0.83975, 0....",,-0.202
4,5,,-0.199,0.289,0.173,0.178,0.132,,0.488,,"[0.86767, 0.89178, 0.88377, submission(47), 0....",,0.214
5,6,,-0.169,-0.472,-0.799,-0.766,-0.822,,0.653,,"[0.83975, 0.86767, 0.89178, 0.88377, submissio...",,-0.573
6,7,,-0.388,1.238,1.384,1.976,1.27,,2.364,,"[0.89178, 0.88377, submission(47), 0.86767, 0....",,1.7
7,8,,0.057,0.126,-0.27,-0.326,-0.168,,0.452,,"[0.86767, 0.83975, submission(47), 0.88377, 0....",,-0.04
8,9,,0.247,0.645,0.088,0.077,0.163,,0.568,,"[0.86767, 0.83975, submission(47), 0.88377, 0....",,0.375
9,10,,0.23,0.071,0.212,0.189,0.184,,0.159,,"[0.83975, 0.88377, 0.89178, submission(47), 0....",,0.227


Unnamed: 0,ID,_,0.83975,0.86767,0.88377,0.89178,submission(47),_.1,abs(mx-m),_.2,alls,_.3,ensemble
0,1,,0.009,-0.107,-0.19,-0.191,-0.188,,0.2,,"[0.89178, 0.88377, submission(47), 0.86767, 0....",,-0.196
1,2,,0.249,0.416,0.215,0.196,0.258,,0.219,,"[0.89178, 0.88377, 0.83975, submission(47), 0....",,0.262
2,3,,-1.531,-1.981,-1.099,-0.971,-1.004,,1.01,,"[0.86767, 0.83975, 0.88377, submission(47), 0....",,-1.706
3,4,,-0.236,-0.237,-0.154,-0.157,-0.206,,0.083,,"[0.86767, 0.83975, submission(47), 0.89178, 0....",,-0.243
4,5,,-0.199,0.289,0.173,0.178,0.132,,0.488,,"[0.83975, submission(47), 0.88377, 0.89178, 0....",,0.049
5,6,,-0.169,-0.472,-0.799,-0.766,-0.822,,0.653,,"[submission(47), 0.88377, 0.89178, 0.86767, 0....",,-0.841
6,7,,-0.388,1.238,1.384,1.976,1.27,,2.364,,"[0.83975, 0.86767, submission(47), 0.88377, 0....",,0.844
7,8,,0.057,0.126,-0.27,-0.326,-0.168,,0.452,,"[0.89178, 0.88377, submission(47), 0.83975, 0....",,-0.243
8,9,,0.247,0.645,0.088,0.077,0.163,,0.568,,"[0.89178, 0.88377, submission(47), 0.83975, 0....",,0.164
9,10,,0.23,0.071,0.212,0.189,0.184,,0.159,,"[0.86767, submission(47), 0.89178, 0.88377, 0....",,0.171


Training Prophet model for ensemble enhancement...
Fitting Prophet model...


11:46:49 - cmdstanpy - INFO - Chain [1] start processing
11:48:30 - cmdstanpy - INFO - Chain [1] done processing



Prophet Enhancement Statistics:
Mean adjustment: -0.000082
Std adjustment: 0.694064
Max adjustment: 6.832411

Final submission saved to 'submission.csv'
Shape: (538150, 2)
Prediction range: [-6.244416, 4.698712]


Unnamed: 0,ID,prediction
0,1,-0.1592111
1,2,0.2570667
2,3,-1.4300889
3,4,-0.2129313
4,5,0.086695
5,6,-0.6948136
6,7,0.9988112
7,8,-0.1687211
8,9,0.2041782
9,10,0.1683793


Result:

Score: 0.95004

Rank: 32 (2025-07-18-21:01, JST)

Runtime: 12min (Kaggle editor)

Your Best Entry!
Your most recent submission scored 0.95004, which is an improvement of your previous score of 0.95001. Great job!

Moving up to rank 32. rising like my electricity bill. #kaggle - https://kaggle.com/competitions/drw-crypto-market-prediction 

DRW Trading Group. DRW - Crypto Market Prediction. https://kaggle.com/competitions/drw-crypto-market-prediction, 2025. Kaggle.