In [None]:
%%writefile utils_validation.py
import pandas as pd
from datasets import Dataset
from constants_validation import POSITIVE_ANSWER, NEGATIVE_ANSWER, COMPLETE_PHRASE, BASE_PROMPT, TRAIN_PERCENTAGE, VALIDATION_PERCENTAGE
import random, numpy as np
from sklearn.model_selection import train_test_split
random.seed(42)
np.random.seed(42)


def build_prompt(row):
    return f"""
{BASE_PROMPT}

Subreddit: r/{row["subreddit"]}
Rule: {row["rule"]}
Examples:
1) {row["positive_example"]}
{COMPLETE_PHRASE} Yes

2) {row["negative_example"]}
{COMPLETE_PHRASE} No

---
Comment: {row["body"]}
{COMPLETE_PHRASE}"""


def get_dataframe_to_train_validation(data_path, mode='train'):
    """
    Modified function to support train/validation split from training data
    mode: 'train' to get training portion, 'validation' to get validation portion
    """
    train_dataset = pd.read_csv(f"{data_path}/train.csv")
    test_dataset = pd.read_csv(f"{data_path}/test.csv").sample(frac=0.5, random_state=42).reset_index(drop=True)
    
    # Split training data into train and validation
    train_split, val_split = train_test_split(
        train_dataset, 
        test_size=VALIDATION_PERCENTAGE, 
        random_state=42, 
        stratify=train_dataset['rule_violation']
    )
    
    if mode == 'train':
        chosen_dataset = train_split
        print(f"Using {len(chosen_dataset)} samples for training ({TRAIN_PERCENTAGE*100:.1f}% of original training data)")
    elif mode == 'validation':
        chosen_dataset = val_split
        print(f"Using {len(chosen_dataset)} samples for validation ({VALIDATION_PERCENTAGE*100:.1f}% of original training data)")
    else:
        raise ValueError("mode must be 'train' or 'validation'")

    flatten = []

    # ---------- Process chosen dataset ----------
    chosen_df = chosen_dataset[["body", "rule", "subreddit", "rule_violation",
                              "positive_example_1","positive_example_2",
                              "negative_example_1","negative_example_2"]].copy()

    # Randomly select positive_example and negative_example
    chosen_df["positive_example"] = np.where(
        np.random.rand(len(chosen_df)) < 0.5,
        chosen_df["positive_example_1"],
        chosen_df["positive_example_2"]
    )
    chosen_df["negative_example"] = np.where(
        np.random.rand(len(chosen_df)) < 0.5,
        chosen_df["negative_example_1"],
        chosen_df["negative_example_2"]
    )

    # Drop original candidate columns
    chosen_df.drop(columns=["positive_example_1","positive_example_2",
                           "negative_example_1","negative_example_2"], inplace=True)

    flatten.append(chosen_df)

    # ---------- Process test dataset (only for training mode) ----------
    if mode == 'train':
        for violation_type in ["positive", "negative"]:
            for i in range(1, 3):
                sub_dataset = test_dataset[["rule","subreddit",
                                            "positive_example_1","positive_example_2",
                                            "negative_example_1","negative_example_2"]].copy()

                if violation_type == "positive":
                    # body uses current positive_example
                    body_col = f"positive_example_{i}"
                    other_positive_col = f"positive_example_{3-i}"  # other positive
                    sub_dataset["body"] = sub_dataset[body_col]
                    sub_dataset["positive_example"] = sub_dataset[other_positive_col]
                    # negative_example randomly selected
                    sub_dataset["negative_example"] = np.where(
                        np.random.rand(len(sub_dataset)) < 0.5,
                        sub_dataset["negative_example_1"],
                        sub_dataset["negative_example_2"]
                    )
                    sub_dataset["rule_violation"] = 1

                else:  # violation_type == "negative"
                    body_col = f"negative_example_{i}"
                    other_negative_col = f"negative_example_{3-i}"
                    sub_dataset["body"] = sub_dataset[body_col]
                    sub_dataset["negative_example"] = sub_dataset[other_negative_col]
                    sub_dataset["positive_example"] = np.where(
                        np.random.rand(len(sub_dataset)) < 0.5,
                        sub_dataset["positive_example_1"],
                        sub_dataset["positive_example_2"]
                    )
                    sub_dataset["rule_violation"] = 0

                # Drop original candidate columns
                sub_dataset.drop(columns=["positive_example_1","positive_example_2",
                                          "negative_example_1","negative_example_2"], inplace=True)

                flatten.append(sub_dataset)

    # Combine all DataFrames
    dataframe = pd.concat(flatten, axis=0)
    dataframe = dataframe.drop_duplicates(ignore_index=True)

    return dataframe


def build_dataset(dataframe):
    dataframe["prompt"] = dataframe.apply(build_prompt, axis=1)

    columns = ["prompt"]
    if "rule_violation" in dataframe:
        dataframe["completion"] = dataframe["rule_violation"].map(
            {
                1: POSITIVE_ANSWER,
                0: NEGATIVE_ANSWER,
            }
        )
        columns.append("completion")

    dataframe = dataframe[columns]
    dataset = Dataset.from_pandas(dataframe)
    dataset.to_pandas().to_csv("/kaggle/working/dataset_validation.csv", index=False)
    return dataset


def get_validation_dataframe_with_labels(data_path):
    """
    Get validation dataframe with true labels for evaluation
    """
    dataframe = get_dataframe_to_train_validation(data_path, mode='validation')
    return dataframe

## Evaluation Metrics and Visualization Functions

In [None]:
%%writefile evaluation_metrics.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    roc_auc_score, confusion_matrix, classification_report, 
    roc_curve, precision_recall_curve, average_precision_score,
    accuracy_score, precision_score, recall_score, f1_score
)
from sklearn.preprocessing import label_binarize
import warnings
warnings.filterwarnings('ignore')

def calculate_detailed_metrics(y_true, y_pred_proba, y_pred_binary=None, threshold=0.5):
    """
    Calculate comprehensive evaluation metrics
    
    Args:
        y_true: True binary labels (0 or 1)
        y_pred_proba: Predicted probabilities for positive class
        y_pred_binary: Predicted binary labels (optional, will be calculated from proba if not provided)
        threshold: Threshold for converting probabilities to binary predictions
    
    Returns:
        Dictionary containing all metrics
    """
    if y_pred_binary is None:
        y_pred_binary = (y_pred_proba >= threshold).astype(int)
    
    metrics = {}
    
    # Basic metrics
    metrics['accuracy'] = accuracy_score(y_true, y_pred_binary)
    metrics['precision'] = precision_score(y_true, y_pred_binary, zero_division=0)
    metrics['recall'] = recall_score(y_true, y_pred_binary, zero_division=0)
    metrics['f1_score'] = f1_score(y_true, y_pred_binary, zero_division=0)
    
    # AUC metrics
    try:
        metrics['roc_auc'] = roc_auc_score(y_true, y_pred_proba)
        metrics['pr_auc'] = average_precision_score(y_true, y_pred_proba)
    except ValueError as e:
        print(f"Warning: Could not calculate AUC metrics: {e}")
        metrics['roc_auc'] = None
        metrics['pr_auc'] = None
    
    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred_binary)
    metrics['confusion_matrix'] = cm
    
    if cm.shape == (2, 2):
        tn, fp, fn, tp = cm.ravel()
        metrics['true_negatives'] = tn
        metrics['false_positives'] = fp
        metrics['false_negatives'] = fn
        metrics['true_positives'] = tp
        
        # Additional derived metrics
        metrics['specificity'] = tn / (tn + fp) if (tn + fp) > 0 else 0
        metrics['negative_predictive_value'] = tn / (tn + fn) if (tn + fn) > 0 else 0
    
    return metrics


def plot_confusion_matrix(cm, class_names=['No Violation', 'Violation'], title='Confusion Matrix', normalize=False):
    """
    Plot confusion matrix with nice formatting
    """
    plt.figure(figsize=(8, 6))
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        fmt = '.2f'
        title += ' (Normalized)'
    else:
        fmt = 'd'
    
    sns.heatmap(cm, annot=True, fmt=fmt, cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names,
                cbar_kws={'label': 'Count' if not normalize else 'Proportion'})
    
    plt.title(title, fontsize=14, fontweight='bold')
    plt.ylabel('True Label', fontweight='bold')
    plt.xlabel('Predicted Label', fontweight='bold')
    plt.tight_layout()
    plt.show()


def plot_roc_curve(y_true, y_pred_proba, title='ROC Curve'):
    """
    Plot ROC curve
    """
    if len(np.unique(y_true)) < 2:
        print("Cannot plot ROC curve: only one class present in y_true")
        return
    
    fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)
    auc_score = roc_auc_score(y_true, y_pred_proba)
    
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, 
             label=f'ROC curve (AUC = {auc_score:.3f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', 
             label='Random classifier')
    
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontweight='bold')
    plt.ylabel('True Positive Rate', fontweight='bold')
    plt.title(title, fontsize=14, fontweight='bold')
    plt.legend(loc="lower right")
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()


def plot_precision_recall_curve(y_true, y_pred_proba, title='Precision-Recall Curve'):
    """
    Plot Precision-Recall curve
    """
    if len(np.unique(y_true)) < 2:
        print("Cannot plot PR curve: only one class present in y_true")
        return
    
    precision, recall, thresholds = precision_recall_curve(y_true, y_pred_proba)
    pr_auc = average_precision_score(y_true, y_pred_proba)
    
    plt.figure(figsize=(8, 6))
    plt.plot(recall, precision, color='darkorange', lw=2,
             label=f'PR curve (AUC = {pr_auc:.3f})')
    
    # Baseline (random classifier)
    baseline = np.sum(y_true) / len(y_true)
    plt.axhline(y=baseline, color='navy', linestyle='--', lw=2,
                label=f'Random classifier (AP = {baseline:.3f})')
    
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall', fontweight='bold')
    plt.ylabel('Precision', fontweight='bold')
    plt.title(title, fontsize=14, fontweight='bold')
    plt.legend(loc="lower left")
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()


def print_classification_report(y_true, y_pred_binary, class_names=['No Violation', 'Violation']):
    """
    Print detailed classification report
    """
    report = classification_report(y_true, y_pred_binary, 
                                 target_names=class_names, 
                                 digits=4)
    print("Classification Report:")
    print("=" * 50)
    print(report)


def comprehensive_evaluation_report(y_true, y_pred_proba, threshold=0.5, 
                                  class_names=['No Violation', 'Violation'],
                                  model_name="Model"):
    """
    Generate comprehensive evaluation report with all metrics and plots
    """
    print(f"\n{'='*60}")
    print(f"COMPREHENSIVE EVALUATION REPORT: {model_name}")
    print(f"{'='*60}")
    
    # Calculate binary predictions
    y_pred_binary = (y_pred_proba >= threshold).astype(int)
    
    # Get all metrics
    metrics = calculate_detailed_metrics(y_true, y_pred_proba, y_pred_binary, threshold)
    
    # Print summary metrics
    print(f"\nSUMMARY METRICS (Threshold = {threshold})")
    print("-" * 40)
    print(f"Accuracy:        {metrics['accuracy']:.4f}")
    print(f"Precision:       {metrics['precision']:.4f}")
    print(f"Recall:          {metrics['recall']:.4f}")
    print(f"F1-Score:        {metrics['f1_score']:.4f}")
    print(f"ROC AUC:         {metrics['roc_auc']:.4f}" if metrics['roc_auc'] else "ROC AUC:         N/A")
    print(f"PR AUC:          {metrics['pr_auc']:.4f}" if metrics['pr_auc'] else "PR AUC:          N/A")
    
    if 'specificity' in metrics:
        print(f"Specificity:     {metrics['specificity']:.4f}")
    
    # Print detailed classification report
    print(f"\n")
    print_classification_report(y_true, y_pred_binary, class_names)
    
    # Print confusion matrix details
    if 'true_positives' in metrics:
        print(f"\nCONFUSION MATRIX BREAKDOWN")
        print("-" * 40)
        print(f"True Positives:  {metrics['true_positives']}")
        print(f"True Negatives:  {metrics['true_negatives']}")
        print(f"False Positives: {metrics['false_positives']}")
        print(f"False Negatives: {metrics['false_negatives']}")
    
    # Plot confusion matrix
    plot_confusion_matrix(metrics['confusion_matrix'], class_names, 
                         f'Confusion Matrix - {model_name}')
    
    # Plot normalized confusion matrix
    plot_confusion_matrix(metrics['confusion_matrix'], class_names, 
                         f'Confusion Matrix - {model_name}', normalize=True)
    
    # Plot ROC curve
    if metrics['roc_auc']:
        plot_roc_curve(y_true, y_pred_proba, f'ROC Curve - {model_name}')
    
    # Plot PR curve
    if metrics['pr_auc']:
        plot_precision_recall_curve(y_true, y_pred_proba, f'Precision-Recall Curve - {model_name}')
    
    return metrics

## Training Script

In [None]:
%%writefile train_validation.py
import pandas as pd

from trl import SFTTrainer, SFTConfig
from peft import LoraConfig
from tqdm.auto import tqdm
from transformers.utils import is_torch_bf16_gpu_available
from utils_validation import build_dataset, get_dataframe_to_train_validation
from constants_validation import DATA_PATH, BASE_MODEL_PATH, LORA_PATH


def main():
    print("Starting training with validation split...")
    
    # Get training portion of the data
    dataframe = get_dataframe_to_train_validation(DATA_PATH, mode='train')
    train_dataset = build_dataset(dataframe)
    
    print(f"Training dataset size: {len(train_dataset)}")
    
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.1,
        bias="none",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        task_type="CAUSAL_LM",
    )
    
    training_args = SFTConfig(
        num_train_epochs=1,
        
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        
        optim="paged_adamw_8bit",
        learning_rate=1e-4, #keep high, lora usually likes high. 
        weight_decay=0.01,
        max_grad_norm=1.0,
        
        lr_scheduler_type="cosine",
        warmup_ratio=0.03,
        
        bf16=is_torch_bf16_gpu_available(),
        fp16=not is_torch_bf16_gpu_available(),
        dataloader_pin_memory=True,
        
        gradient_checkpointing=True,
        gradient_checkpointing_kwargs={"use_reentrant": False},
    
        save_strategy="no",
        report_to="none",
    
        completion_only_loss=True,
        packing=False,
        remove_unused_columns=False,
    )
    
    trainer = SFTTrainer(
        BASE_MODEL_PATH,
        args=training_args,
        train_dataset=train_dataset,
        peft_config=lora_config,
    )
    
    print("Starting training...")
    trainer.train()
    trainer.save_model(LORA_PATH)
    print(f"✅ Training completed! Model saved to {LORA_PATH}")


if __name__ == "__main__":
    main()

## Validation Inference Script

In [None]:
%%writefile inference_validation.py
import os
os.environ["VLLM_USE_V1"] = "0"

import vllm
import torch
import pandas as pd
import numpy as np
from logits_processor_zoo.vllm import MultipleChoiceLogitsProcessor
from vllm.lora.request import LoRARequest
from utils_validation import build_dataset, get_validation_dataframe_with_labels
from constants_validation import BASE_MODEL_PATH, LORA_PATH, DATA_PATH, POSITIVE_ANSWER, NEGATIVE_ANSWER
from evaluation_metrics import comprehensive_evaluation_report


def run_validation_inference():
    """Run inference on validation set and return predictions with true labels"""
    
    print("Loading validation dataset...")
    validation_df = get_validation_dataframe_with_labels(DATA_PATH)
    
    # Build dataset for inference (only prompts needed)
    validation_dataset = build_dataset(validation_df)
    texts = validation_dataset["prompt"]
    
    # Get true labels
    y_true = validation_df["rule_violation"].values
    
    print(f"Validation set size: {len(texts)}")
    print(f"Class distribution: {np.bincount(y_true)}")
    
    print("Initializing vLLM model...")
    llm = vllm.LLM(
        BASE_MODEL_PATH,
        quantization="gptq",
        tensor_parallel_size=1,
        gpu_memory_utilization=0.98,
        trust_remote_code=True,
        dtype="half",
        enforce_eager=True,
        max_model_len=2836,
        disable_log_stats=True,
        enable_prefix_caching=True,
        enable_lora=True,
        max_lora_rank=64,
    )
    
    tokenizer = llm.get_tokenizer()
    mclp = MultipleChoiceLogitsProcessor(tokenizer, choices=[POSITIVE_ANSWER, NEGATIVE_ANSWER])
    
    print("Running inference...")
    outputs = llm.generate(
        texts,
        vllm.SamplingParams(
            skip_special_tokens=True,
            max_tokens=1,
            logits_processors=[mclp],
            logprobs=2,
        ),
        use_tqdm=True,
        lora_request=LoRARequest("default", 1, LORA_PATH)
    )
    
    # Extract predictions and probabilities
    log_probs = [
        {lp.decoded_token: lp.logprob for lp in out.outputs[0].logprobs[0].values()}
        for out in outputs
    ]
    
    predictions_df = pd.DataFrame(log_probs)
    
    # Convert log probabilities to probabilities
    yes_logprobs = predictions_df[POSITIVE_ANSWER].values
    no_logprobs = predictions_df[NEGATIVE_ANSWER].values
    
    # Calculate softmax to get proper probabilities
    yes_probs = np.exp(yes_logprobs)
    no_probs = np.exp(no_logprobs)
    total_probs = yes_probs + no_probs
    
    y_pred_proba = yes_probs / total_probs  # Probability of positive class
    
    print("✅ Validation inference completed!")
    
    return y_true, y_pred_proba, validation_df


def main():
    print("Starting validation evaluation...")
    
    # Run inference and get results
    y_true, y_pred_proba, validation_df = run_validation_inference()
    
    # Save detailed results
    results_df = validation_df.copy()
    results_df['predicted_probability'] = y_pred_proba
    results_df['predicted_binary'] = (y_pred_proba >= 0.5).astype(int)
    results_df.to_csv("/kaggle/working/validation_results.csv", index=False)
    
    print(f"\n{'='*60}")
    print("VALIDATION RESULTS SAVED TO: /kaggle/working/validation_results.csv")
    print(f"{'='*60}")
    
    # Generate comprehensive evaluation report
    metrics = comprehensive_evaluation_report(
        y_true, 
        y_pred_proba, 
        threshold=0.5,
        class_names=['No Violation', 'Violation'],
        model_name="Qwen 2.5 0.5B (TT-1 Validation)"
    )
    
    # Test different thresholds
    print(f"\n{'='*60}")
    print("THRESHOLD ANALYSIS")
    print(f"{'='*60}")
    
    thresholds = [0.3, 0.4, 0.5, 0.6, 0.7]
    threshold_results = []
    
    for threshold in thresholds:
        y_pred_binary = (y_pred_proba >= threshold).astype(int)
        from evaluation_metrics import calculate_detailed_metrics
        metrics_th = calculate_detailed_metrics(y_true, y_pred_proba, y_pred_binary, threshold)
        
        threshold_results.append({
            'threshold': threshold,
            'accuracy': metrics_th['accuracy'],
            'precision': metrics_th['precision'],
            'recall': metrics_th['recall'],
            'f1_score': metrics_th['f1_score']
        })
        
        print(f"Threshold {threshold}: Acc={metrics_th['accuracy']:.3f}, "
              f"Prec={metrics_th['precision']:.3f}, Rec={metrics_th['recall']:.3f}, "
              f"F1={metrics_th['f1_score']:.3f}")
    
    # Save threshold analysis
    threshold_df = pd.DataFrame(threshold_results)
    threshold_df.to_csv("/kaggle/working/threshold_analysis.csv", index=False)
    
    print(f"\n✅ Validation evaluation completed!")
    print(f"📊 Results saved to /kaggle/working/validation_results.csv")
    print(f"📈 Threshold analysis saved to /kaggle/working/threshold_analysis.csv")
    
    return metrics


if __name__ == "__main__":
    main()

## Accelerate Configuration

In [None]:
%%writefile accelerate_config_validation.yaml
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
  gradient_accumulation_steps: 4
  gradient_clipping: 1.0
  train_batch_size: 64
  train_micro_batch_size_per_gpu: 4
  
  zero_stage: 2
  offload_optimizer_device: none
  offload_param_device: none
  zero3_init_flag: false
  
  stage3_gather_16bit_weights_on_model_save: false
  stage3_max_live_parameters: 1e8
  stage3_max_reuse_distance: 1e8
  stage3_prefetch_bucket_size: 5e7
  stage3_param_persistence_threshold: 1e5
  
  zero_allow_untested_optimizer: true
  zero_force_ds_cpu_optimizer: false
  
  fp16:
    enabled: true
    loss_scale: 0
    initial_scale_power: 16
    loss_scale_window: 1000
    hysteresis: 2
    min_loss_scale: 1
  
distributed_type: DEEPSPEED
downcast_bf16: 'no'
dynamo_config:
  dynamo_backend: INDUCTOR
  dynamo_use_fullgraph: false
  dynamo_use_dynamic: false
enable_cpu_affinity: false
machine_rank: 0
main_training_function: main
mixed_precision: fp16
num_machines: 1
num_processes: 2
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false

## Execution: Training and Validation

In [None]:
# Step 1: Run training on the controlled portion of training data
!accelerate launch --config_file accelerate_config_validation.yaml train_validation.py

In [None]:
# Step 2: Run validation inference and get detailed evaluation metrics
!python inference_validation.py

## Results Analysis and Visualization

In [None]:
# Load and examine validation results
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load validation results
validation_results = pd.read_csv('/kaggle/working/validation_results.csv')
threshold_analysis = pd.read_csv('/kaggle/working/threshold_analysis.csv')

print("Validation Results Overview:")
print(f"Total validation samples: {len(validation_results)}")
print(f"Actual class distribution:")
print(validation_results['rule_violation'].value_counts().sort_index())
print(f"\nPredicted class distribution (threshold=0.5):")
print(validation_results['predicted_binary'].value_counts().sort_index())

# Display sample of results
print(f"\nSample of validation results:")
print(validation_results[['body', 'rule_violation', 'predicted_probability', 'predicted_binary']].head())

In [None]:
# Plot threshold analysis
plt.figure(figsize=(12, 8))

# Create subplots for different metrics
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Plot accuracy vs threshold
axes[0, 0].plot(threshold_analysis['threshold'], threshold_analysis['accuracy'], 'o-', linewidth=2, markersize=8)
axes[0, 0].set_title('Accuracy vs Threshold', fontweight='bold')
axes[0, 0].set_xlabel('Threshold')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].grid(True, alpha=0.3)

# Plot precision vs threshold
axes[0, 1].plot(threshold_analysis['threshold'], threshold_analysis['precision'], 'o-', linewidth=2, markersize=8, color='orange')
axes[0, 1].set_title('Precision vs Threshold', fontweight='bold')
axes[0, 1].set_xlabel('Threshold')
axes[0, 1].set_ylabel('Precision')
axes[0, 1].grid(True, alpha=0.3)

# Plot recall vs threshold
axes[1, 0].plot(threshold_analysis['threshold'], threshold_analysis['recall'], 'o-', linewidth=2, markersize=8, color='green')
axes[1, 0].set_title('Recall vs Threshold', fontweight='bold')
axes[1, 0].set_xlabel('Threshold')
axes[1, 0].set_ylabel('Recall')
axes[1, 0].grid(True, alpha=0.3)

# Plot F1-score vs threshold
axes[1, 1].plot(threshold_analysis['threshold'], threshold_analysis['f1_score'], 'o-', linewidth=2, markersize=8, color='red')
axes[1, 1].set_title('F1-Score vs Threshold', fontweight='bold')
axes[1, 1].set_xlabel('Threshold')
axes[1, 1].set_ylabel('F1-Score')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.suptitle('Threshold Analysis for TT-1 Validation', fontsize=16, fontweight='bold', y=1.02)
plt.show()

# Display threshold analysis table
print("\nThreshold Analysis:")
print(threshold_analysis.round(4))

In [None]:
# Additional Analysis: Prediction Distribution
plt.figure(figsize=(15, 5))

# Subplot 1: Prediction probability distribution
plt.subplot(1, 3, 1)
plt.hist(validation_results['predicted_probability'], bins=50, alpha=0.7, edgecolor='black')
plt.title('Distribution of Predicted Probabilities', fontweight='bold')
plt.xlabel('Predicted Probability')
plt.ylabel('Frequency')
plt.grid(True, alpha=0.3)

# Subplot 2: Prediction probabilities by true class
plt.subplot(1, 3, 2)
no_violation = validation_results[validation_results['rule_violation'] == 0]['predicted_probability']
violation = validation_results[validation_results['rule_violation'] == 1]['predicted_probability']

plt.hist(no_violation, bins=30, alpha=0.7, label='No Violation (True)', color='blue', edgecolor='black')
plt.hist(violation, bins=30, alpha=0.7, label='Violation (True)', color='red', edgecolor='black')
plt.title('Predicted Probabilities by True Class', fontweight='bold')
plt.xlabel('Predicted Probability')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True, alpha=0.3)

# Subplot 3: Box plot of predictions by true class
plt.subplot(1, 3, 3)
data_for_boxplot = [no_violation, violation]
labels = ['No Violation', 'Violation']
plt.boxplot(data_for_boxplot, labels=labels)
plt.title('Prediction Distribution by True Class', fontweight='bold')
plt.ylabel('Predicted Probability')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Calculate and display calibration metrics
from sklearn.calibration import calibration_curve

# Reliability diagram (calibration curve)
fraction_of_positives, mean_predicted_value = calibration_curve(
    validation_results['rule_violation'], 
    validation_results['predicted_probability'], 
    n_bins=10
)

plt.figure(figsize=(8, 6))
plt.plot(mean_predicted_value, fraction_of_positives, "s-", label="Model")
plt.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
plt.xlabel('Mean Predicted Probability')
plt.ylabel('Fraction of Positives')
plt.title('Calibration Plot (Reliability Diagram)', fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Print summary statistics
print("\\nSummary Statistics:")
print(f"Mean predicted probability: {validation_results['predicted_probability'].mean():.4f}")
print(f"Std predicted probability: {validation_results['predicted_probability'].std():.4f}")
print(f"Min predicted probability: {validation_results['predicted_probability'].min():.4f}")
print(f"Max predicted probability: {validation_results['predicted_probability'].max():.4f}")

# Most confident correct and incorrect predictions
correct_predictions = validation_results[validation_results['rule_violation'] == validation_results['predicted_binary']]
incorrect_predictions = validation_results[validation_results['rule_violation'] != validation_results['predicted_binary']]

print(f"\\nModel Performance Summary:")
print(f"Correct predictions: {len(correct_predictions)} ({len(correct_predictions)/len(validation_results)*100:.1f}%)")
print(f"Incorrect predictions: {len(incorrect_predictions)} ({len(incorrect_predictions)/len(validation_results)*100:.1f}%)")

if len(incorrect_predictions) > 0:
    print(f"\\nMost confident incorrect predictions:")
    # For incorrect predictions, show those with highest confidence (furthest from 0.5)
    incorrect_predictions['confidence'] = np.abs(incorrect_predictions['predicted_probability'] - 0.5)
    most_confident_wrong = incorrect_predictions.nlargest(3, 'confidence')
    for idx, row in most_confident_wrong.iterrows():
        print(f"True: {row['rule_violation']}, Pred: {row['predicted_binary']}, Prob: {row['predicted_probability']:.3f}")
        print(f"Comment: {row['body'][:100]}...")
        print(f"Rule: {row['rule'][:100]}...")
        print("-" * 50)

## Configuration Summary

This validation notebook provides a comprehensive evaluation of the TT-1 Qwen 2.5 0.5B model:

### Key Features:
1. **Controlled Training Split**: Uses 70% of training data for training, 30% for validation
2. **Comprehensive Metrics**: AUC, confusion matrix, classification report, ROC curve, precision-recall curve
3. **Threshold Analysis**: Tests multiple thresholds to find optimal operating point
4. **Calibration Analysis**: Evaluates how well predicted probabilities match actual outcomes
5. **Error Analysis**: Identifies most confident incorrect predictions for model improvement

### Configuration:
- **Training Percentage**: 70% of original training data
- **Validation Percentage**: 30% of original training data  
- **Model**: Qwen 2.5 0.5B with GPTQ quantization
- **LoRA Configuration**: r=16, alpha=32, dropout=0.1
- **Training**: 1 epoch with paged_adamw_8bit optimizer

### Output Files:
- `validation_results.csv`: Detailed per-sample results with predictions and probabilities
- `threshold_analysis.csv`: Performance metrics across different thresholds
- Comprehensive visualizations and analysis charts

This setup allows for proper validation without data leakage and provides insights for model improvement and deployment decisions.

# TT-1 Validation Notebook: Qwen 2.5 0.5B Model

This notebook trains on a controlled percentage of training data and validates on the remaining portion.
It provides detailed evaluation metrics including:
- AUC Score
- Confusion Matrix 
- Classification Report
- ROC Curve
- Precision-Recall Curve

**Training Strategy:**
- Train on X% of training data
- Validate on (100-X)% of training data
- Show comprehensive evaluation results

## Setup and Dependencies

In [None]:
!uv pip install --system --no-index --find-links='/kaggle/input/jigsaw-packages2/whls/' 'trl==0.21.0' 'optimum==1.27.0' 'auto-gptq==0.7.1' 'bitsandbytes==0.46.1' 'deepspeed==0.17.4' 'logits-processor-zoo==0.2.1' 'vllm==0.10.0'
!uv pip install --system --no-index --find-links='/kaggle/input/jigsaw-packages2/whls/' 'triton==3.2.0'
!uv pip install --system --no-index --find-links='/kaggle/input/jigsaw-packages2/whls/' 'clean-text'
!uv pip install --system --no-index -U --no-deps --find-links='/kaggle/input/jigsaw-packages2/whls/' 'peft' 'accelerate' 'datasets'
!pip install scikit-learn matplotlib seaborn

## Configuration Constants

In [None]:
%%writefile constants_validation.py
BASE_MODEL_PATH = "/kaggle/input/qwen2.5/transformers/0.5b-instruct-gptq-int4/1"
LORA_PATH = "output_validation/"
DATA_PATH = "/kaggle/input/jigsaw-agile-community-rules/"

POSITIVE_ANSWER = "Yes"
NEGATIVE_ANSWER = "No"
COMPLETE_PHRASE = "Answer:"
BASE_PROMPT = '''You are given a comment from reddit and a rule. Your task is to classify whether the comment violates the rule. Only respond Yes/No.'''

# Validation specific settings
TRAIN_PERCENTAGE = 0.7  # Use 70% of training data for training
VALIDATION_PERCENTAGE = 0.3  # Use 30% of training data for validation