In [None]:
!pip install --upgrade --no-index --find-links=/kaggle/input/transformers_package transformers -qq

In [None]:
# qwen3-8b
!torchrun --nproc_per_node=2 inference.py \
  --model_name "/kaggle/input/qwen-3/transformers/8b/1" \
  --lora_path "/kaggle/input/qwen3-8b-models-adaptors" \
  --output_filename "submission_qwen3_8b_prob"

# qwen3-14b
!torchrun --nproc_per_node=2 inference.py \
    --model_name "/kaggle/input/qwen-3/transformers/14b/1" \
    --lora_path "/kaggle/input/qwen3-14b-models-adaptors" \
    --output_filename "submission_qwen3_14b_prob"

# qwen3-4b
!torchrun --nproc_per_node=2 inference.py \
    --model_name "/kaggle/input/qwen-3-4b-instruct-2507" \
    --lora_path "/kaggle/input/qwen3-4b-models-adaptors" \
    --output_filename "submission_qwen3_4b_prob"

In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict

# -------------------------
# Build 'family' mapping: for each QuestionId in train, take the MC_Answer most frequently labeled as 'True_*'
# Used to determine the 'True_/False_' prefix for each test row
# -------------------------
train = pd.read_csv('/kaggle/input/map-charting-student-math-misunderstandings/train.csv')  # read train set
test_df = pd.read_csv('/kaggle/input/map-charting-student-math-misunderstandings/test.csv')  # read test set

train['is_true'] = train['Category'].str.startswith('True')  # Boolean column: whether row belongs to True_* category

# Keep only the most frequent correct option as the 'ground truth'
correct = (train[train.is_true]
           .assign(c=lambda df: df.groupby(['QuestionId','MC_Answer']).MC_Answer.transform('count'))
           .sort_values('c', ascending=False)
           .drop_duplicates(['QuestionId'])[['QuestionId','MC_Answer']])
correct['is_correct'] = 1  # Mark this (QuestionId, MC_Answer) as the correct answer

# Set is_correct=1 for the most common correct option; otherwise 0
# Set row_id as index and map to {row_id: 'True_' or 'False_'}
fam_map = (test_df.merge(correct, on=['QuestionId','MC_Answer'], how='left')
                  .assign(is_correct=lambda df: df.is_correct.fillna(0).astype(int))
                  .set_index('row_id')['is_correct']
                  .map({1: 'True_', 0: 'False_'}).to_dict())

# -------------------------
# Ensembling logic
# -------------------------
def extract_class_probabilities(row, model_suffix='', top_k=25):
    """Extract class names and probabilities for a given model from a merged row.
    
    Args:
        row: a merged row containing columns from different models
        model_suffix: column suffix for the current model (empty for first)
        top_k: max number of classes to read (as ordered in classes column)
    Returns:
        dict: {class_name: prob} for the first top_k classes available for this model
    """
    classes_col = f'top_classes{model_suffix}'  # column name for the class list of current model
    if classes_col in row:
        classes = row[classes_col].split(' ')[:top_k]  # take the first top_k class labels
    else:
        return {}  # return empty if column not present
    class_probs = {}
    for i in range(min(top_k, len(classes))):
        prob_col = f'prob_{i}{model_suffix}'  # column name for probability of class i
        if prob_col in row:
            class_probs[classes[i]] = row[prob_col]  # class name: probability
    return class_probs


def ensemble_with_disagreement_handling(prob_files, model_weights=None, top_k=3):
    """Merge multiple probability files for ensembling and apply 'family' prefix filtering.
    
    Args:
        prob_files: list of CSV files from base models; must contain row_id, top_classes, prob_i columns
        model_weights: list of weights; length must equal n_models
        top_k: take top_k classes after final scoring
    Returns:
        list[str]: aligned with merged rows; each line contains space-separated "Category:Misconception"
    """
    n_models = len(prob_files)  # number of models
    prob_dfs = []  # store DataFrames
    final_predictions = []  # store final prediction strings

    # Read probability outputs of each model
    for file_path in prob_files:
        df = pd.read_csv(file_path)  
        prob_dfs.append(df)
    
    # Merge by row_id; first keeps columns, later files get suffix _model{i+1}
    merged_df = prob_dfs[0]
    for i, df in enumerate(prob_dfs[1:], 1):
        merged_df = pd.merge(merged_df, df, on='row_id', suffixes=('', f'_model{i+1}'))

    # merged_df cols: row_id, top_classes, prob_0, ..., prob_24, top_classes_model2, prob_0_model2, ..., prob_24_model2, top_classes_model3, prob_0_model3, ..., prob_24_model3

    for idx, row in merged_df.iterrows():
        pref = fam_map[row['row_id']]  # get the 'family' prefix ('True_' or 'False_') for the row
        
        # Extract class-prob distributions for each model (up to 25 candidates)
        all_class_probs = []
        for i in range(n_models):
            suffix = f'_model{i+1}' if i > 0 else ''  # suffix for all but the first model
            class_probs = extract_class_probabilities(row, suffix, top_k=25)
            all_class_probs.append(class_probs)
        
        # Collect union of all class names across models
        all_classes = set()
        for class_probs in all_class_probs:
            all_classes.update(class_probs.keys())
        
        # Accumulate 'vote count', 'weighted total prob', 'weighted max prob'
        # Note: assumes model_weights length equals n_models
        class_votes = defaultdict(int)       # times a class is hit across models
        class_total_prob = defaultdict(float)  # weighted probability sum
        class_max_prob = defaultdict(float)    # largest weighted probability
        
        for i, class_probs in enumerate(all_class_probs):
            weight = model_weights[i]  # current model weight
            for class_name, prob in class_probs.items():
                class_votes[class_name] += 1 # vote count
                class_total_prob[class_name] += prob * weight # weighted total prob
                class_max_prob[class_name] = max(class_max_prob[class_name], prob * weight) # weighted max prob
        
        # Compute final score = weighted_total_prob×0.34 + agreement_ratio×0.33 + weighted_max_prob×0.33
        # Intuition: balances overall support, cross-model agreement, and single-model peak confidence
        final_scores = {}
        for class_name in all_classes:
            base_score = class_total_prob[class_name]
            agreement_bonus = class_votes[class_name] / n_models
            confidence_bonus = class_max_prob[class_name]
            final_scores[class_name] = (
                base_score * 0.34 +
                agreement_bonus * 0.33 +
                confidence_bonus * 0.33
            )
        
        # -------------------------
        # Family filter: keep only classes with prefix matching pref ('True_'/'False_')
        # -------------------------
        final_scores = {k: v for k, v in final_scores.items() if k.startswith(pref)}
        
        # Sort and take top_k classes
        sorted_classes = sorted(final_scores.items(), key=lambda x: -x[1])
        top_classes = [class_name for class_name, _ in sorted_classes[:top_k]]
        
        # If fewer than 3 candidates, fill with 'Neither:NA' first; for True_ also add 'Correct:NA'
        fillers = [f"{pref}Neither:NA"] + ([f"{pref}Correct:NA"] if pref == "True_" else [])
        for f in fillers:
            if len(top_classes) >= 3: 
                # Stop filling once there are already 3 or more
                break
            if f not in top_classes:
                # Add the filler if not already present
                top_classes.append(f)

        while len(top_classes) < 3:
            top_classes.append(fillers[0])  # ensure at least 3
        
        final_predictions.append(' '.join(top_classes))  # join into a single-line string
    
    return final_predictions


# -------------------------
# Run ensembling
# -------------------------
weights = [
    1,1,1
]

prob_files = [
    '/kaggle/working/submission_qwen3_4b_prob.csv',
    '/kaggle/working/submission_qwen2_8b_prob.csv',
    '/kaggle/working/submission_qwen3_14b_prob.csv',
]

# Run ensemble prediction; here top_k=8 selects the top 8 classes
predictions = ensemble_with_disagreement_handling(
    prob_files, 
    model_weights=weights,  
    top_k=8
)

# Build submission file with required column names
submission = pd.DataFrame({
    'row_id': test_df.row_id.values,
    'Category:Misconception': predictions
})

submission.to_csv('submission.csv', index=False)  # save submission file
print(submission.head())