In [9]:
import pandas as pd
import numpy as np
from ensemble_boxes import weighted_boxes_fusion
from tqdm import tqdm
import os

# ---------------------------------------------------------
# 1. üì¶ Îç∞Ïù¥ÌÑ∞ ÌååÏã± Ìï®Ïàò (ÏàòÏ†ï ÏóÜÏùå)
# ---------------------------------------------------------
def parse_prediction_string(pred_str, width=1024, height=1024):
    """CSVÏùò Î¨∏ÏûêÏó¥ÏùÑ Î∞ïÏä§, Ï†êÏàò, ÎùºÎ≤® Î¶¨Ïä§Ìä∏Î°ú Î≥ÄÌôò + Ï†ïÍ∑úÌôî"""
    boxes = []
    scores = []
    labels = []
    
    if pd.isna(pred_str) or str(pred_str).strip() == "":
        return [], [], []
    
    parts = str(pred_str).strip().split(" ")
    
    for i in range(0, len(parts), 6):
        if len(parts[i:i+6]) < 6: continue
        
        label = int(parts[i])
        score = float(parts[i+1])
        x1 = float(parts[i+2])
        y1 = float(parts[i+3])
        x2 = float(parts[i+4])
        y2 = float(parts[i+5])
        
        # Ï†ïÍ∑úÌôî (0~1) - WBF ÌïÑÏàò
        boxes.append([
            min(max(x1 / width, 0), 1),
            min(max(y1 / height, 0), 1),
            min(max(x2 / width, 0), 1),
            min(max(y2 / height, 0), 1)
        ])
        scores.append(score)
        labels.append(label)
        
    return boxes, scores, labels

# ---------------------------------------------------------
# 2. üöÄ Î©îÏù∏ ÏïôÏÉÅÎ∏î Ìï®Ïàò (Scaling Ï†úÍ±∞Îê®)
# ---------------------------------------------------------
def ensemble_3_models_simple(
    submission_files, 
    weights=[1, 1, 1],  
    iou_thr=0.5,       
    skip_box_thr=0.1, 
    img_size=1024,
    output_path="ensemble_submission_simple.csv"
):
    print(f"üöÄ Loading {len(submission_files)} submission files...")
    dfs = [pd.read_csv(f) for f in submission_files]
    
    image_ids = dfs[0]['image_id'].unique()
    results = []
    
    print("üîÑ Running Standard WBF (No Scaling)...")
    
    for img_id in tqdm(image_ids):
        boxes_list = []
        scores_list = []
        labels_list = []
        
        # Í∞Å Î™®Îç∏Î≥Ñ Îç∞Ïù¥ÌÑ∞ Ï∂îÏ∂ú
        for df in dfs:
            row = df[df['image_id'] == img_id]
            if len(row) == 0:
                boxes, scores, labels = [], [], []
            else:
                pred_str = row.iloc[0]['PredictionString']
                boxes, scores, labels = parse_prediction_string(pred_str, img_size, img_size)
            
            # üí° Scaling Î°úÏßÅ Ï†úÍ±∞Îê®: ÏõêÎ≥∏ Ï†êÏàò Í∑∏ÎåÄÎ°ú ÏÇ¨Ïö©
            boxes_list.append(boxes)
            scores_list.append(scores)
            labels_list.append(labels)
            
        # WBF Ïã§Ìñâ
        if len(boxes_list) > 0:
            final_boxes, final_scores, final_labels = weighted_boxes_fusion(
                boxes_list, 
                scores_list, 
                labels_list, 
                weights=weights, 
                iou_thr=iou_thr, 
                skip_box_thr=skip_box_thr,
                conf_type='avg' 
            )
        else:
            final_boxes, final_scores, final_labels = [], [], []
            
        # Í≤∞Í≥º Î≥µÏõê
        pred_strs = []
        for box, score, label in zip(final_boxes, final_scores, final_labels):
            x1 = box[0] * img_size
            y1 = box[1] * img_size
            x2 = box[2] * img_size
            y2 = box[3] * img_size
            
            pred_strs.append(f"{int(label)} {score:.4f} {x1:.2f} {y1:.2f} {x2:.2f} {y2:.2f}")
            
        results.append({
            "PredictionString": " ".join(pred_strs),
            "image_id": img_id
        })
        
    submission_df = pd.DataFrame(results)
    submission_df.to_csv(output_path, index=False)
    print(f"‚úÖ Ensemble Complete! Saved to: {output_path}")

# =========================================================
# üî• Ïã§Ìñâ Î∂ÄÎ∂Ñ
# =========================================================

files = [          #       mAP50     mAP50Ï†úÍ≥±
    "yolo12x.csv", #       0.6220    0.3868
    "rfdetr.csv", #        0.6619    0.4381
    "deimv2.csv", #        0.7203    0.5188
    "swin_cascade.csv", #   0.5960    0.3552
    "dfine.csv"
]

# Í∞ÄÏ§ëÏπò ÏÑ§Ï†ï (ÏÑ±Îä•Ïóê Îî∞Îùº Ï°∞Ï†à)
model_weights = [0.15, 0.25, 0.5, 0.15, 0.25]

# Ïã§Ìñâ
ensemble_3_models_simple(
    files, 
    weights=model_weights,
    iou_thr=0.6,
    skip_box_thr=0.1,
    output_path="final_ensemble.csv"
)

üöÄ Loading 5 submission files...
üîÑ Running Standard WBF (No Scaling)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4871/4871 [01:18<00:00, 61.68it/s]


‚úÖ Ensemble Complete! Saved to: final_ensemble.csv
