In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
import os

# --- [1] 경로 및 파일 설정 ---
DATA_PATH = '../../data/raw/'
OOF_PATH = './oof_data/'
SUB_PATH = './submissions/'

M1_OOF = 'exp11_sota_AUC_0.77212.npy'
M4_OOF = 'exp15_snn_AUC_0.77330.npy'
M7_OOF = 'exp25_m7_Refined_AUC_0.77375.npy'

M1_SUB = '0130-1923.csv'
M4_SUB = 'Diversity_Focal_SNN_AUC_0.7708.csv'
M7_SUB = 'm7_Refined_test_preds.csv'

# --- [2] 데이터 로드 및 인덱스 정합성 ---
train_df = pd.read_csv(f'{DATA_PATH}train.csv')
a_cols = [f'Q{i}A' for i in 'abcdefghijklmnopqrst']
m1_idx = train_df[train_df.familysize <= 50].index
m7_mask = (train_df.familysize <= 50) & (train_df[a_cols].std(axis=1) > 0)
m7_idx = train_df[m7_mask].index
train_y = (2 - train_df.loc[m7_idx, 'voted'].to_numpy()).astype(np.float32)

# --- [3] OOF 정렬 및 랭크 변환 ---
def get_aligned_rank(file_name, origin_idx, target_idx):
    raw = np.load(f'{OOF_PATH}{file_name}')
    df = pd.DataFrame(raw, index=origin_idx, columns=['val'])
    aligned = df.reindex(target_idx)['val'].values
    return pd.Series(aligned).rank(pct=True).values

r1 = get_aligned_rank(M1_OOF, m1_idx, m7_idx)
r4 = get_aligned_rank(M4_OOF, m1_idx, m7_idx)
r7 = pd.Series(np.load(f'{OOF_PATH}{M7_OOF}')).rank(pct=True).values

X_stack = np.stack([r1, r4, r7], axis=1)

# --- [4] 메타 모델 학습 (Ridge) ---
# Ridge는 L2 규제를 통해 가중치가 특정 모델에 과하게 쏠리는 것을 방지합니다.
meta_model = Ridge(alpha=1.0)
meta_model.fit(X_stack, train_y)

stack_oof = meta_model.predict(X_stack)
stack_auc = roc_auc_score(train_y, stack_oof)

print('-' * 50)
print('exp32: Stacking (Ridge) Result')
print(f'Meta-Weights (m1, m4, m7): {meta_model.coef_}')
print(f'Stacking Local AUC: {stack_auc:.5f}')
print('-' * 50)

# --- [5] 테스트 데이터 적용 ---
def get_test_rank(file_name):
    probs = 2.0 - pd.read_csv(f'{SUB_PATH}{file_name}')['voted']
    return probs.rank(pct=True).values

t1 = get_test_rank(M1_SUB)
t4 = get_test_rank(M4_SUB)
t7 = get_test_rank(M7_SUB)

X_test_stack = np.stack([t1, t4, t7], axis=1)
final_preds = meta_model.predict(X_test_stack)

# 제출 파일 생성
sub = pd.read_csv(f'{DATA_PATH}sample_submission.csv')
sub['voted'] = 2.0 - pd.Series(final_preds).rank(pct=True).values # 랭크 기반 최종 보정
output_name = f'{SUB_PATH}exp32_stacking_ridge_{stack_auc:.5f}.csv'
sub.to_csv(output_name, index=False)
print(f'Stacking submission saved: {output_name}')

--------------------------------------------------
exp32: Stacking (Ridge) Result
Meta-Weights (m1, m4, m7): [0.05618164 0.34722011 0.41870381]
Stacking Local AUC: 0.77456
--------------------------------------------------
Stacking submission saved: ./submissions/exp32_stacking_ridge_0.77456.csv
