In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

pd.options.display.max_columns = 100 
print("‚úÖ Libraries loaded")

‚úÖ Libraries loaded


In [2]:
DATA_PATH   = '/kaggle/input/forecasting-the-future-the-helios-corn-climate-challenge/'
OUTPUT_PATH = '/kaggle/working/'

df = pd.read_csv(f'{DATA_PATH}corn_climate_risk_futures_daily_master.csv')
df['date_on'] = pd.to_datetime(df['date_on'])
market_share_df = pd.read_csv(f'{DATA_PATH}corn_regional_market_share.csv')

print(f"üìä Dataset: {df.shape[0]:,} rows")

üìä Dataset: 320,661 rows


In [3]:
RISK_CATEGORIES = ['heat_stress', 'unseasonably_cold', 'excess_precip', 'drought']

merged_daily_df = df.copy()
merged_daily_df['day_of_year'] = merged_daily_df['date_on'].dt.dayofyear
merged_daily_df['quarter'] = merged_daily_df['date_on'].dt.quarter

merged_daily_df = merged_daily_df.merge(
    market_share_df[['region_id', 'percent_country_production']], 
    on='region_id', how='left'
)
merged_daily_df['percent_country_production'] = merged_daily_df['percent_country_production'].fillna(1.0)

# Track which features we create
CREATED_FEATURES = []

print("‚úÖ Base setup")

‚úÖ Base setup


In [4]:
for cat in RISK_CATEGORIES:
    cols = [x for x in df.columns if cat in x] 
    df.loc[df.harvest_period=="Off-season",cols[0]] = df[cols].sum(axis=1)
    df.loc[df.harvest_period=="Off-season",cols[1:]] = 0

In [5]:
# Risk scores
for risk_type in RISK_CATEGORIES:
    low_col = f'climate_risk_cnt_locations_{risk_type}_risk_low'
    med_col = f'climate_risk_cnt_locations_{risk_type}_risk_medium' 
    high_col = f'climate_risk_cnt_locations_{risk_type}_risk_high'
    
    total = merged_daily_df[low_col] + merged_daily_df[med_col] + merged_daily_df[high_col]
    risk_score = (merged_daily_df[med_col] + 2 * merged_daily_df[high_col]) / (total + 1e-6)
    weighted = risk_score * (merged_daily_df['percent_country_production'] / 100)
    
    merged_daily_df[f'climate_risk_{risk_type}_score'] = risk_score
    merged_daily_df[f'climate_risk_{risk_type}_weighted'] = weighted
    CREATED_FEATURES.extend([f'climate_risk_{risk_type}_score', f'climate_risk_{risk_type}_weighted'])

print(f"‚úÖ Risk scores: {len(CREATED_FEATURES)} features")

‚úÖ Risk scores: 8 features


In [6]:
# Composite indices
score_cols = [f'climate_risk_{r}_score' for r in RISK_CATEGORIES]

merged_daily_df['climate_risk_temperature_stress'] = merged_daily_df[[f'climate_risk_{r}_score' for r in ['heat_stress', 'unseasonably_cold']]].max(axis=1)
merged_daily_df['climate_risk_precipitation_stress'] = merged_daily_df[[f'climate_risk_{r}_score' for r in ['excess_precip', 'drought']]].max(axis=1)
merged_daily_df['climate_risk_overall_stress'] = merged_daily_df[score_cols].max(axis=1)
merged_daily_df['climate_risk_combined_stress'] = merged_daily_df[score_cols].mean(axis=1)

CREATED_FEATURES.extend(['climate_risk_temperature_stress', 'climate_risk_precipitation_stress',
                         'climate_risk_overall_stress', 'climate_risk_combined_stress'])

print(f"‚úÖ Composites: {len(CREATED_FEATURES)} total features")

‚úÖ Composites: 12 total features


In [7]:
# Rolling features
merged_daily_df = merged_daily_df.sort_values(['region_id', 'date_on'])

for window in [7, 14, 30]:
    for risk_type in RISK_CATEGORIES:
        score_col = f'climate_risk_{risk_type}_score'
        
        ma_col = f'climate_risk_{risk_type}_ma_{window}d'
        max_col = f'climate_risk_{risk_type}_max_{window}d'
        
        merged_daily_df[ma_col] = (
            merged_daily_df.groupby('region_id')[score_col]
            .rolling(window=window, min_periods=1).mean()
            .reset_index(level=0, drop=True)
        )
        merged_daily_df[max_col] = (
            merged_daily_df.groupby('region_id')[score_col]
            .rolling(window=window, min_periods=1).max()
            .reset_index(level=0, drop=True)
        )
        CREATED_FEATURES.extend([ma_col, max_col])

print(f"‚úÖ Rolling: {len(CREATED_FEATURES)} total features")

‚úÖ Rolling: 36 total features


In [8]:
# Momentum features (create NaN - determines valid rows)
for risk_type in RISK_CATEGORIES:
    score_col = f'climate_risk_{risk_type}_score'
    
    c1 = f'climate_risk_{risk_type}_change_1d'
    c7 = f'climate_risk_{risk_type}_change_7d'
    acc = f'climate_risk_{risk_type}_acceleration'
    
    merged_daily_df[c1] = merged_daily_df.groupby('region_id')[score_col].diff(1)
    merged_daily_df[c7] = merged_daily_df.groupby('region_id')[score_col].diff(7)
    merged_daily_df[acc] = merged_daily_df.groupby('region_id')[c1].diff(1)
    
    CREATED_FEATURES.extend([c1, c7, acc])

print(f"‚úÖ Momentum: {len(CREATED_FEATURES)} total features")

‚úÖ Momentum: 48 total features


In [9]:
# Country aggregations
for risk_type in RISK_CATEGORIES:
    score_col = f'climate_risk_{risk_type}_score'
    weighted_col = f'climate_risk_{risk_type}_weighted'
    
    country_agg = merged_daily_df.groupby(['country_name', 'date_on']).agg({
        score_col: ['mean', 'max', 'std'],
        weighted_col: 'sum',
        'percent_country_production': 'sum'
    }).round(4)
    
    country_agg.columns = [f'country_{risk_type}_{"_".join(col).strip()}' for col in country_agg.columns]
    country_agg = country_agg.reset_index()
    
    new_cols = [c for c in country_agg.columns if c not in ['country_name', 'date_on']]
    CREATED_FEATURES.extend(new_cols)
    
    merged_daily_df = merged_daily_df.merge(country_agg, on=['country_name', 'date_on'], how='left')

print(f"‚úÖ Country aggs: {len(CREATED_FEATURES)} total features")

‚úÖ Country aggs: 68 total features


In [10]:
# Get valid rows
print(f"\nüìä Before dropna: {len(merged_daily_df):,}")
baseline_df = merged_daily_df.copy()
print(f"üìä After dropna: {len(baseline_df):,} (expected: 219,161)")


üìä Before dropna: 320,661
üìä After dropna: 320,661 (expected: 219,161)


In [11]:
def compute_cfcs(df):
    """Compute CFCS score for a dataframe."""
    climate_cols = [c for c in df.columns if c.startswith("climate_risk_")]
    futures_cols = [c for c in df.columns if c.startswith("futures_")]
    
    correlations = []
    
    for country in df['country_name'].unique():
        df_country = df[df['country_name'] == country]
        
        for month in df_country['date_on_month'].unique():
            df_month = df_country[df_country['date_on_month'] == month]
            
            for clim in climate_cols:
                for fut in futures_cols:
                    if df_month[clim].std() > 0 and df_month[fut].std() > 0:
                        corr = df_month[[clim, fut]].corr().iloc[0, 1]
                        correlations.append(corr)
    
    correlations = pd.Series(correlations).dropna()
    abs_corrs = correlations.abs()
    sig_corrs = abs_corrs[abs_corrs >= 0.5]
    
    avg_sig = sig_corrs.mean() if len(sig_corrs) > 0 else 0
    max_corr = abs_corrs.max()
    sig_pct = len(sig_corrs) / len(correlations) * 100 if len(correlations) > 0 else 0
    
    avg_sig_score = min(100, avg_sig * 100)
    max_score = min(100, max_corr * 100)
    
    cfcs = (0.5 * avg_sig_score) + (0.3 * max_score) + (0.2 * sig_pct)
    
    return {
        'cfcs': round(cfcs, 2),
        'avg_sig_corr': round(avg_sig, 4),
        'max_corr': round(max_corr, 4),
        'sig_count': len(sig_corrs),
        'total': len(correlations),
        'sig_pct': round(sig_pct, 4)
    }

In [12]:
submission = baseline_df.drop(CREATED_FEATURES, axis=1)

In [13]:
prod_dict = {"Argentina":4,"Brazil":11,"Canada":1,"China":24,"European Union":5,"India":3,"Mexico":2,"Paraguay":0.5,"Russia":0.5,"Ukraine":2,"United States":31,"South Africa":1}

In [14]:
submission["percent_world_production"]=submission.country_name.apply(prod_dict.get)

In [15]:
submissionm = submission.copy()

In [16]:
for x in ["heat","cold","precip","drought"]:
    t=[y for y in submission.columns if x in y]
    for y in t:
        submissionm[y+"m"] = submissionm[y]/submissionm[t].sum(axis=1)
    submissionn = submissionm.drop(t, axis=1)

    for y in t:
        submissionn[y+"m"]=submissionm[y+"m"]*submissionm.percent_country_production/100
        u=submissionm.groupby(["country_name","date_on"])[y+"m"].sum()
        submissionm[y+"m"]=submissionm.apply(lambda x:u.get((x.country_name,x.date_on)),axis=1)

    for y in t:
        submissionm[y+"m"]=submissionm[y+"m"]*submissionm.percent_world_production/100
        u=submissionm.drop_duplicates(["country_name","date_on"]).groupby(["date_on"])[y+"m"].sum()
        submissionm[y+"m"]=submissionm.apply(lambda x:u.get(x.date_on),axis=1)

In [17]:
submissionm=submissionm.drop_duplicates("date_on").sort_values("date_on").set_index("date_on").iloc[:,-12:].reset_index()

In [18]:
cr = [x for x in submissionm.columns if x.startswith("climate_risk")]

In [19]:
for y in cr:
    for window in [7, 14, 28,63,91,119,182,364]:
        submissionm[y+"a"+str(window)] = submissionm[y].rolling(window=window, min_periods=1).mean()
        submissionm[y+"b"+str(window)] = submissionm[y].rolling(window=window, min_periods=1).max()
for y in range(0,12,3):
    submissionm[cr[y]+"ma"] = submissionm[cr[y+1]]+2*submissionm[cr[y+2]]
    for window in [7, 14, 28,63,91,119,182,364]:
        submissionm[cr[y]+"ma"+str(window)] = submissionm[cr[y]+"ma"].rolling(window=window, min_periods=1).mean()
        submissionm[cr[y]+"mb"+str(window)] = submissionm[cr[y]+"ma"].rolling(window=window, min_periods=1).max()

In [20]:
submission = submission.drop([x for x in submission.columns if x.startswith("climate_risk_")],axis=1)

In [21]:
submission = submission.merge(submissionm,on="date_on")

In [22]:
submission=submission[~merged_daily_df.isna().any(axis=1)]

In [23]:
fr = [x for x in submission.columns if x.startswith("future")]
cr = [x for x in submission.columns if x.startswith("climate_risk_")]
scores = {}
for country in submission.country_name.unique():
    a=submission[submission.country_name==country]
    for month in a.date_on_month.unique():
        t = a[(a.date_on_month==month)]
        for x in cr:
            if x not in scores.keys():
                scores[x] = []
            for y in fr:
                if t[x].std()>0 and t[y].std()>0:
                    scores[x].append(t[[x, y]].corr().iloc[0,1])
fs=[]
for x in scores.keys():
    t = pd.Series(scores[x])
    fs.append((x, sum(abs(t)>=0.5)/len(t)))
feats = [x[0] for x in sorted(fs, key=lambda x:x[1])[::-1][30:]]

In [24]:
#drop_feats = []
#m= compute_cfcs(submission)["cfcs"]
#for x in submission.columns:
#    if x.startswith('climate_risk_'):
#        t = compute_cfcs(submission.drop([x]+drop_feats,axis=1))["cfcs"]
#        if t>m:
#            print(t,m)
#            m = t
#            drop_feats.append(x)
#if len(drop_feats)>0:
submission = submission.drop(feats,axis=1)
compute_cfcs(submission)

{'cfcs': np.float64(63.35),
 'avg_sig_corr': np.float64(0.6056),
 'max_corr': 0.9059,
 'sig_count': 19842,
 'total': 67320,
 'sig_pct': 29.4742}

In [25]:
# Save
output_file = f'{OUTPUT_PATH}submission.csv'
submission.to_csv(output_file, index=False)

print(f"\nüìÅ Saved: {output_file}")
#print(f"   Version: {best_name}")
#print(f"   CFCS: {best_score}")
print(f"   Rows: {len(submission):,}")
print(f"   Climate features: {len([c for c in submission.columns if c.startswith('climate_risk_')])}")


üìÅ Saved: /kaggle/working/submission.csv
   Rows: 219,161
   Climate features: 30


In [26]:
submission

Unnamed: 0,ID,crop_name,country_name,country_code,region_name,region_id,harvest_period,growing_season_year,date_on,futures_close_ZC_1,futures_close_ZC_2,futures_close_ZW_1,futures_close_ZS_1,futures_zc1_ret_pct,futures_zc1_ret_log,futures_zc_term_spread,futures_zc_term_ratio,futures_zc1_ma_20,futures_zc1_ma_60,futures_zc1_ma_120,futures_zc1_vol_20,futures_zc1_vol_60,futures_zw_zc_spread,futures_zc_zw_ratio,futures_zs_zc_spread,futures_zc_zs_ratio,date_on_year,date_on_month,date_on_year_month,day_of_year,quarter,percent_country_production,percent_world_production,climate_risk_cnt_locations_heat_stress_risk_highmb63,climate_risk_cnt_locations_heat_stress_risk_highmb91,climate_risk_cnt_locations_heat_stress_risk_highmb119,climate_risk_cnt_locations_heat_stress_risk_highmb182,climate_risk_cnt_locations_unseasonably_cold_risk_mediummb182,climate_risk_cnt_locations_unseasonably_cold_risk_mediumma364,climate_risk_cnt_locations_unseasonably_cold_risk_mediummb364,climate_risk_cnt_locations_drought_risk_lowma63,climate_risk_cnt_locations_drought_risk_lowma91,climate_risk_cnt_locations_drought_risk_lowmb91,climate_risk_cnt_locations_drought_risk_lowma119,climate_risk_cnt_locations_drought_risk_lowmb119,climate_risk_cnt_locations_drought_risk_lowma182,climate_risk_cnt_locations_drought_risk_lowmb182,climate_risk_cnt_locations_drought_risk_lowma364,climate_risk_cnt_locations_drought_risk_mediumma28,climate_risk_cnt_locations_drought_risk_mediummb28,climate_risk_cnt_locations_drought_risk_mediumma63,climate_risk_cnt_locations_drought_risk_mediumma91,climate_risk_cnt_locations_drought_risk_mediummb91,climate_risk_cnt_locations_drought_risk_mediumma119,climate_risk_cnt_locations_drought_risk_mediummb119,climate_risk_cnt_locations_drought_risk_mediumma182,climate_risk_cnt_locations_drought_risk_mediummb182,climate_risk_cnt_locations_drought_risk_mediumma364,climate_risk_cnt_locations_heat_stress_risk_lowmmb63,climate_risk_cnt_locations_heat_stress_risk_lowmmb91,climate_risk_cnt_locations_heat_stress_risk_lowmmb119,climate_risk_cnt_locations_heat_stress_risk_lowmmb182,climate_risk_cnt_locations_drought_risk_lowmma182
7,36bcf707-3c9a-4516-a20b-eaaaff8ee81c,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-08,357.00,362.75,478.50,879.50,0.011331,0.011268,5.75,1.016106,365.1125,368.979167,372.456250,0.010629,0.010832,121.50,0.746082,522.50,0.405912,2016,1,2016_01,8,1,1.0,0.5,0.000000,0.000000,0.000000,0.000000,1.562701,0.804813,1.562701,6.127156,6.127156,6.340547,6.127156,6.340547,6.127156,6.340547,6.127156,0.246528,0.422675,0.246528,0.246528,0.422675,0.246528,0.422675,0.246528,0.422675,0.246528,0.000000,0.000000,0.000000,0.00000,0.389159
10,a7c6b1e6-6f03-4d3b-be88-0b607303c97a,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-11,351.75,357.50,469.00,881.00,-0.014706,-0.014815,5.75,1.016347,363.8375,368.525000,372.000000,0.010219,0.010840,117.25,0.750000,529.25,0.399262,2016,1,2016_01,11,1,1.0,0.5,0.000000,0.000000,0.000000,0.000000,1.562701,0.850875,1.562701,6.078792,6.078792,6.340547,6.078792,6.340547,6.078792,6.340547,6.078792,0.257383,0.422675,0.257383,0.257383,0.422675,0.257383,0.422675,0.257383,0.422675,0.257383,0.000000,0.000000,0.000000,0.00000,0.475033
11,8f030962-0267-4f93-97c9-5ae11990b65e,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-12,356.75,362.25,481.25,890.75,0.014215,0.014115,5.50,1.015417,363.0250,368.212500,371.616667,0.010766,0.010967,124.50,0.741299,534.00,0.400505,2016,1,2016_01,12,1,1.0,0.5,0.000000,0.000000,0.000000,0.000000,1.562701,0.899232,1.562701,6.060845,6.060845,6.340547,6.060845,6.340547,6.060845,6.340547,6.060845,0.261327,0.422675,0.261327,0.261327,0.422675,0.261327,0.422675,0.261327,0.422675,0.261327,0.000000,0.000000,0.000000,0.00000,0.506984
12,c18c8f9b-63f2-4017-9923-f904db0f7da9,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-13,358.00,363.00,478.00,899.00,0.003504,0.003498,5.00,1.013966,361.9750,367.900000,371.239583,0.009983,0.010968,120.00,0.748954,541.00,0.398220,2016,1,2016_01,13,1,1.0,0.5,0.000000,0.000000,0.000000,0.000000,1.562701,0.899026,1.562701,6.056680,6.056680,6.340547,6.056680,6.340547,6.056680,6.340547,6.056680,0.254613,0.422675,0.254613,0.254613,0.422675,0.254613,0.422675,0.254613,0.422675,0.254613,0.000000,0.000000,0.000000,0.00000,0.522027
13,070e5716-45dc-44cd-8113-a20d6248cefc,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-14,358.00,362.75,468.75,882.25,0.000000,0.000000,4.75,1.013268,361.0125,367.650000,370.952083,0.009992,0.010902,110.75,0.763733,524.25,0.405781,2016,1,2016_01,14,1,1.0,0.5,0.000000,0.000000,0.000000,0.000000,1.562701,0.923494,1.562701,6.050245,6.050245,6.340547,6.050245,6.340547,6.050245,6.340547,6.050245,0.250476,0.422675,0.250476,0.250476,0.422675,0.250476,0.422675,0.250476,0.422675,0.250476,0.000000,0.000000,0.000000,0.00000,0.539035
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
320632,0f6f2424-6d29-4fa3-8344-7a7e4403a4e6,Corn: Commodity Tracked,Russia,RU,Penza,fe800444-9044-49f4-b87e-fe4c9fdef38b,Off-season,2026,2025-11-17,434.75,448.00,544.25,1157.25,0.010459,0.010405,13.25,1.030477,430.5500,418.800000,413.864583,0.010657,0.014107,109.50,0.798806,722.50,0.375675,2025,11,2025_11,321,4,1.0,0.5,0.149073,0.205713,0.329447,0.613493,2.038119,0.607049,3.302388,4.172295,4.099128,5.500780,4.123017,5.500780,4.174335,5.500780,4.421974,2.031053,2.766007,2.036561,2.022079,2.845730,2.033619,2.897105,1.985191,2.897105,1.813540,0.302869,0.449162,0.738992,1.33676,3.356138
320633,2a13adef-eda2-4dd3-af06-e43a351adf8d,Corn: Commodity Tracked,Russia,RU,Penza,fe800444-9044-49f4-b87e-fe4c9fdef38b,Off-season,2026,2025-11-18,436.75,449.50,546.50,1153.50,0.004600,0.004590,12.75,1.029193,431.4000,419.591667,413.779167,0.010429,0.014111,109.75,0.799177,716.75,0.378630,2025,11,2025_11,322,4,1.0,0.5,0.149073,0.198700,0.329447,0.613493,2.038119,0.607620,3.302388,4.165325,4.098319,5.500780,4.118228,5.500780,4.175469,5.500780,4.419515,2.050292,2.766007,2.044099,2.027271,2.845730,2.037427,2.897105,1.984844,2.897105,1.814980,0.302869,0.402956,0.738992,1.33676,3.354218
320634,89d84e36-5277-4baa-a2ef-355566ff9a8e,Corn: Commodity Tracked,Russia,RU,Penza,fe800444-9044-49f4-b87e-fe4c9fdef38b,Off-season,2026,2025-11-19,429.75,441.50,536.75,1136.25,-0.016027,-0.016157,11.75,1.027341,431.7375,420.295833,413.660417,0.011079,0.014279,107.00,0.800652,706.50,0.378218,2025,11,2025_11,323,4,1.0,0.5,0.149073,0.198700,0.329447,0.613493,2.038119,0.606443,3.302388,4.158720,4.098519,5.500780,4.111992,5.500780,4.177085,5.500780,4.418272,2.070591,2.766007,2.052622,2.031951,2.845730,2.042433,2.897105,1.985235,2.897105,1.815145,0.302869,0.402956,0.738992,1.33676,3.350594
320635,8bd71cd3-0ac4-41a1-a68d-aa32f6a62226,Corn: Commodity Tracked,Russia,RU,Penza,fe800444-9044-49f4-b87e-fe4c9fdef38b,Off-season,2026,2025-11-20,426.50,437.75,527.00,1122.50,-0.007563,-0.007591,11.25,1.026377,431.6625,421.029167,413.562500,0.010915,0.014202,100.50,0.809298,696.00,0.379955,2025,11,2025_11,324,4,1.0,0.5,0.149073,0.149073,0.329447,0.613493,2.038119,0.603756,3.302388,4.153217,4.095143,5.500780,4.105751,5.500780,4.175551,5.500780,4.416261,2.080625,2.766007,2.062587,2.037685,2.845730,2.047889,2.897105,1.988165,2.897105,1.815947,0.302869,0.302869,0.738992,1.33676,3.350734
