## 1. Environment Setup

In [None]:
import os
import sys
import json
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
from pathlib import Path
import subprocess

# Set workspace root
WORKSPACE_ROOT = Path(os.getcwd())
ADELAIDET_ROOT = WORKSPACE_ROOT / "AdelaiDet"
MASKREFINE_ROOT = WORKSPACE_ROOT / "MaskRefineNet"
DETECTRON2_ROOT = WORKSPACE_ROOT / "detectron2"

print(f"Workspace Root: {WORKSPACE_ROOT}")
print(f"AdelaiDet Root: {ADELAIDET_ROOT}")
print(f"MaskRefineNet Root: {MASKREFINE_ROOT}")

In [None]:
# Configuration parameters
class Config:
    # Dataset configuration
    DATA_ROOT = "YOUR_DATA_ROOT"  # <-- Update this to your data root
    DATASET = "coco"  # or "BDD100K"
    SUBSET = "5p"  # Options: 1p, 2p, 5p, 10p, 20p, 30p, 50p
    
    # Training configuration
    NUM_GPUS = 1  # Will be auto-detected if not specified
    SEED = 1
    
    # Paths (auto-generated based on subset)
    @property
    def annotations_dir(self):
        return Path(self.DATA_ROOT) / "coco" / "annotations"
    
    @property
    def strong_json(self):
        return f"instances_train2017_{self.SUBSET}_s.json"
    
    @property
    def weak_json(self):
        return f"instances_train2017_{self.SUBSET}_w.json"
    
    @property
    def sw_refined_json(self):
        return f"instances_train2017_{self.SUBSET}_sw_refined.json"

config = Config()
print(f"Dataset: {config.DATASET}")
print(f"Subset: {config.SUBSET}")

In [None]:
# Auto-detect number of GPUs
try:
    import torch
    if torch.cuda.is_available():
        config.NUM_GPUS = torch.cuda.device_count()
        print(f"Detected {config.NUM_GPUS} GPU(s)")
        for i in range(config.NUM_GPUS):
            print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
    else:
        print("No GPU detected, using CPU (not recommended for training)")
except ImportError:
    print("PyTorch not installed")

## 2. Dataset Statistics Analysis

In [None]:
def load_coco_annotations(json_path):
    """Load COCO format annotations"""
    with open(json_path, 'r') as f:
        data = json.load(f)
    return data

def analyze_annotations(data, name="Dataset"):
    """Analyze annotation statistics"""
    stats = {
        'name': name,
        'num_images': len(data.get('images', [])),
        'num_annotations': len(data.get('annotations', [])),
        'num_categories': len(data.get('categories', [])),
    }
    
    # Category distribution
    category_counts = defaultdict(int)
    category_names = {cat['id']: cat['name'] for cat in data.get('categories', [])}
    
    for ann in data.get('annotations', []):
        cat_id = ann['category_id']
        category_counts[category_names.get(cat_id, f'cat_{cat_id}')] += 1
    
    stats['category_distribution'] = dict(category_counts)
    
    # Annotations per image
    image_ann_counts = defaultdict(int)
    for ann in data.get('annotations', []):
        image_ann_counts[ann['image_id']] += 1
    
    ann_counts = list(image_ann_counts.values())
    if ann_counts:
        stats['avg_annotations_per_image'] = np.mean(ann_counts)
        stats['max_annotations_per_image'] = max(ann_counts)
        stats['min_annotations_per_image'] = min(ann_counts)
    
    return stats

def print_stats(stats):
    """Print dataset statistics"""
    print(f"\n{'='*50}")
    print(f"Dataset: {stats['name']}")
    print(f"{'='*50}")
    print(f"  Images: {stats['num_images']:,}")
    print(f"  Annotations: {stats['num_annotations']:,}")
    print(f"  Categories: {stats['num_categories']}")
    if 'avg_annotations_per_image' in stats:
        print(f"  Avg annotations/image: {stats['avg_annotations_per_image']:.2f}")
        print(f"  Max annotations/image: {stats['max_annotations_per_image']}")
        print(f"  Min annotations/image: {stats['min_annotations_per_image']}")

In [None]:
# Analyze available annotation files
def analyze_all_subsets(annotations_dir):
    """Analyze all available annotation subsets"""
    subsets = ['1p', '2p', '5p', '10p', '20p', '30p', '50p']
    all_stats = []
    
    annotations_path = Path(annotations_dir)
    
    if not annotations_path.exists():
        print(f"Annotations directory not found: {annotations_path}")
        print("Please update config.DATA_ROOT to point to your data directory")
        return all_stats
    
    for subset in subsets:
        strong_file = annotations_path / f"instances_train2017_{subset}_s.json"
        weak_file = annotations_path / f"instances_train2017_{subset}_w.json"
        
        if strong_file.exists():
            data = load_coco_annotations(strong_file)
            stats = analyze_annotations(data, f"{subset} Strong")
            all_stats.append(stats)
            print_stats(stats)
        
        if weak_file.exists():
            data = load_coco_annotations(weak_file)
            stats = analyze_annotations(data, f"{subset} Weak")
            all_stats.append(stats)
            print_stats(stats)
    
    return all_stats

# Run analysis
all_stats = analyze_all_subsets(config.annotations_dir)

In [None]:
def plot_category_distribution(stats_list, top_n=20):
    """Plot category distribution for multiple datasets"""
    if not stats_list:
        print("No statistics available to plot")
        return
    
    fig, axes = plt.subplots(len(stats_list), 1, figsize=(14, 4*len(stats_list)))
    if len(stats_list) == 1:
        axes = [axes]
    
    for ax, stats in zip(axes, stats_list):
        cat_dist = stats.get('category_distribution', {})
        if not cat_dist:
            continue
            
        # Sort by count and take top N
        sorted_cats = sorted(cat_dist.items(), key=lambda x: x[1], reverse=True)[:top_n]
        categories, counts = zip(*sorted_cats)
        
        ax.bar(range(len(categories)), counts, color='steelblue')
        ax.set_xticks(range(len(categories)))
        ax.set_xticklabels(categories, rotation=45, ha='right')
        ax.set_title(f"{stats['name']} - Top {top_n} Categories")
        ax.set_ylabel('Count')
    
    plt.tight_layout()
    plt.show()

# Plot distributions
if all_stats:
    plot_category_distribution(all_stats[:2])  # Plot first 2 subsets

## 3. Training Pipeline Configuration

In [None]:
# Training hyperparameters for different subsets
TRAINING_CONFIGS = {
    '1p': {
        'learning_rate': 0.05,
        'decay_steps': (5000, 8000),
        'train_iter': 10001,
        'mrn_train_iters': 200000,
        'mrn_warm_iters': 2000,
    },
    '2p': {
        'learning_rate': 0.05,
        'decay_steps': (15000, 20000),
        'train_iter': 25001,
        'mrn_train_iters': 200000,
        'mrn_warm_iters': 2000,
    },
    '5p': {
        'learning_rate': 0.05,
        'decay_steps': (20000, 25000),
        'train_iter': 30001,
        'mrn_train_iters': 200000,
        'mrn_warm_iters': 2000,
    },
    '10p': {
        'learning_rate': 0.05,
        'decay_steps': (35000, 40000),
        'train_iter': 45001,
        'mrn_train_iters': 200000,
        'mrn_warm_iters': 2000,
    },
    '20p': {
        'learning_rate': 0.05,
        'decay_steps': (90000, 110000),
        'train_iter': 120001,
        'mrn_train_iters': 200000,
        'mrn_warm_iters': 2000,
    },
    '30p': {
        'learning_rate': 0.05,
        'decay_steps': (120000, 150000),
        'train_iter': 160001,
        'mrn_train_iters': 200000,
        'mrn_warm_iters': 2000,
    },
    '50p': {
        'learning_rate': 0.05,
        'decay_steps': (210000, 250000),
        'train_iter': 270001,
        'mrn_train_iters': 200000,
        'mrn_warm_iters': 2000,
    },
}

# Get config for current subset
train_config = TRAINING_CONFIGS.get(config.SUBSET, TRAINING_CONFIGS['5p'])
print(f"Training Configuration for {config.SUBSET}:")
for key, value in train_config.items():
    print(f"  {key}: {value}")

## 4. Step 1: Train Teacher Network

In [None]:
def generate_teacher_training_command(config, train_config):
    """Generate command for training teacher network"""
    exp_name = f"SOLOv2_R101_coco{config.SUBSET}_teacher"
    trainsets = f"('coco_2017_train_{config.SUBSET}_s',)"
    testsets = "('coco_2017_val',)"
    
    cmd = f"""
cd {ADELAIDET_ROOT}

export DETECTRON2_DATASETS={config.DATA_ROOT}

OMP_NUM_THREADS=1 python -W ignore tools/train_net.py \\
    --config-file configs/PointWSSIS/R101_teacher.yaml \\
    --num-gpus {config.NUM_GPUS} \\
    SEED {config.SEED} \\
    OUTPUT_DIR training_dir/{exp_name} \\
    DATASETS.TRAIN {trainsets} \\
    DATASETS.TEST {testsets} \\
    SOLVER.STEPS {train_config['decay_steps']} \\
    SOLVER.MAX_ITER {train_config['train_iter']} \\
    SOLVER.BASE_LR {train_config['learning_rate']} \\
    MODEL.SOLOV2.PROMPT point \\
    MODEL.SOLOV2.EVAL_PSEUDO_LABEL True \\
    TEST.EVAL_PERIOD 5000
"""
    return cmd, exp_name

teacher_cmd, teacher_exp_name = generate_teacher_training_command(config, train_config)
print("Teacher Network Training Command:")
print(teacher_cmd)

In [None]:
# Run teacher training (uncomment to execute)
# Note: This will take a significant amount of time depending on your hardware

RUN_TRAINING = False  # Set to True to run training

if RUN_TRAINING:
    os.chdir(ADELAIDET_ROOT)
    os.environ['DETECTRON2_DATASETS'] = config.DATA_ROOT
    
    # Run the training command
    result = subprocess.run(
        teacher_cmd.split('\n')[-2].strip().replace('\\\n', '').split(),
        capture_output=True,
        text=True
    )
    print(result.stdout)
    if result.stderr:
        print("Errors:", result.stderr)
else:
    print("Training is disabled. Set RUN_TRAINING = True to execute.")

## 5. Step 2: Generate Pseudo Labels (Inference)

In [None]:
def generate_inference_command(config, train_config, teacher_exp_name):
    """Generate command for generating pseudo labels"""
    testsets = f"('coco_2017_train_{config.SUBSET}_w',)"
    
    cmd = f"""
cd {ADELAIDET_ROOT}

export DETECTRON2_DATASETS={config.DATA_ROOT}

OMP_NUM_THREADS=1 python -W ignore tools/train_net.py \\
    --config-file configs/PointWSSIS/R101_teacher.yaml \\
    --num-gpus {config.NUM_GPUS} \\
    --eval-only \\
    MODEL.WEIGHTS training_dir/{teacher_exp_name}/model_final.pth \\
    OUTPUT_DIR inference_dir/{teacher_exp_name} \\
    MODEL.SOLOV2.FPN_SCALE_RANGES "((1,100000),(1,100000),(1,100000),(1,100000),(1,100000))" \\
    MODEL.SOLOV2.NMS_TYPE mask \\
    MODEL.SOLOV2.PROMPT point \\
    DATASETS.TEST {testsets}
"""
    return cmd

inference_cmd = generate_inference_command(config, train_config, teacher_exp_name)
print("Pseudo Label Generation Command:")
print(inference_cmd)

## 6. Step 3: Train MaskRefineNet

In [None]:
def generate_mrn_training_command(config, train_config, teacher_exp_name):
    """Generate command for training MaskRefineNet"""
    mrn_exp_name = f"MRN_{config.SUBSET}"
    
    cmd = f"""
cd {MASKREFINE_ROOT}

torchrun --standalone --nnodes=1 --nproc_per_node={config.NUM_GPUS} main.py \\
    --data_root {config.DATA_ROOT} \\
    --workspace results \\
    --exp_name {mrn_exp_name} \\
    --train_iters {train_config['mrn_train_iters']} \\
    --warm_iters {train_config['mrn_warm_iters']} \\
    --val_interval 5000 \\
    --weak_pth ../AdelaiDet/inference_dir/{teacher_exp_name}_strong_1/inference/instances_predictions.pth \\
               ../AdelaiDet/inference_dir/{teacher_exp_name}_strong_2/inference/instances_predictions.pth \\
    --gt_json {config.strong_json} \\
    --eval_pth ../AdelaiDet/inference_dir/{teacher_exp_name}/inference/instances_predictions.pth \\
    --amp
"""
    return cmd, mrn_exp_name

mrn_cmd, mrn_exp_name = generate_mrn_training_command(config, train_config, teacher_exp_name)
print("MaskRefineNet Training Command:")
print(mrn_cmd)

## 7. Step 4: Merge Refined Labels

In [None]:
def generate_merge_command(config, teacher_exp_name, mrn_exp_name):
    """Generate command for merging strong and refined weak labels"""
    
    cmd = f"""
cd {MASKREFINE_ROOT}

torchrun --standalone --nnodes=1 --nproc_per_node={config.NUM_GPUS} merge_strong_and_refined_weak_labels.py \\
    --data_root {config.DATA_ROOT} \\
    --ckpt results/{mrn_exp_name}/ckpt/best_AP.pt \\
    --dataset coco \\
    --size 256 \\
    --weak_pth ../AdelaiDet/inference_dir/{teacher_exp_name}/inference/instances_predictions.pth \\
    --weak_json {config.DATA_ROOT}/coco/annotations/instances_train2017_{config.SUBSET}_w.json \\
    --strong_json {config.DATA_ROOT}/coco/annotations/instances_train2017_{config.SUBSET}_s.json \\
    --save_path {config.DATA_ROOT}/coco/annotations/instances_train2017_{config.SUBSET}_sw_refined.json
"""
    return cmd

merge_cmd = generate_merge_command(config, teacher_exp_name, mrn_exp_name)
print("Label Merging Command:")
print(merge_cmd)

## 8. Step 5: Train Student Network

In [None]:
def generate_student_training_command(config):
    """Generate command for training student network"""
    exp_name = f"SOLOv2_R101_coco{config.SUBSET}_sw_refined"
    trainsets = f"('coco_2017_train_{config.SUBSET}_sw_refined',)"
    testsets = "('coco_2017_val',)"
    
    cmd = f"""
cd {ADELAIDET_ROOT}

export DETECTRON2_DATASETS={config.DATA_ROOT}

OMP_NUM_THREADS=1 python -W ignore tools/train_net.py \\
    --config-file configs/SOLOv2/R101_3x.yaml \\
    --num-gpus {config.NUM_GPUS} \\
    SEED {config.SEED} \\
    OUTPUT_DIR training_dir/{exp_name} \\
    DATASETS.TRAIN {trainsets} \\
    DATASETS.TEST {testsets} \\
    TEST.EVAL_PERIOD 5000
"""
    return cmd, exp_name

student_cmd, student_exp_name = generate_student_training_command(config)
print("Student Network Training Command:")
print(student_cmd)

## 9. Evaluation & Metrics

In [None]:
def generate_evaluation_command(config, exp_name, model_path):
    """Generate command for model evaluation"""
    testsets = "('coco_2017_val',)"
    
    cmd = f"""
cd {ADELAIDET_ROOT}

export DETECTRON2_DATASETS={config.DATA_ROOT}

OMP_NUM_THREADS=1 python -W ignore tools/train_net.py \\
    --config-file configs/SOLOv2/R101_3x.yaml \\
    --num-gpus {config.NUM_GPUS} \\
    --eval-only \\
    MODEL.WEIGHTS {model_path} \\
    OUTPUT_DIR evaluation_dir/{exp_name} \\
    DATASETS.TEST {testsets}
"""
    return cmd

# Evaluation command for the student network
student_model_path = f"training_dir/{student_exp_name}/model_final.pth"
eval_cmd = generate_evaluation_command(config, student_exp_name, student_model_path)
print("Evaluation Command:")
print(eval_cmd)

In [None]:
# Expected results comparison
EXPECTED_RESULTS = {
    '1p': 24.0,
    '2p': 25.3,
    '5p': 33.7,
    '10p': 35.8,
    '20p': 37.1,
    '30p': 38.0,
    '50p': 38.8,
}

def plot_expected_results():
    """Plot expected results from the paper"""
    subsets = list(EXPECTED_RESULTS.keys())
    mAPs = list(EXPECTED_RESULTS.values())
    
    plt.figure(figsize=(10, 6))
    bars = plt.bar(subsets, mAPs, color='steelblue', edgecolor='black')
    
    # Add value labels on bars
    for bar, mAP in zip(bars, mAPs):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
                 f'{mAP}%', ha='center', va='bottom', fontsize=11)
    
    plt.xlabel('Subset (% of fully labeled data)', fontsize=12)
    plt.ylabel('COCO test-dev mAP (%)', fontsize=12)
    plt.title('PointWSSIS Expected Results (from Paper)', fontsize=14)
    plt.ylim(0, 45)
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()

plot_expected_results()

## 10. Training Log Analysis

In [None]:
import re
from datetime import datetime

def parse_training_log(log_path):
    """Parse training log to extract metrics"""
    metrics = {
        'iterations': [],
        'total_loss': [],
        'lr': [],
        'time': [],
        'segm_AP': [],
    }
    
    if not Path(log_path).exists():
        print(f"Log file not found: {log_path}")
        return metrics
    
    with open(log_path, 'r') as f:
        for line in f:
            # Parse training iterations
            if 'total_loss' in line:
                # Extract iteration number
                iter_match = re.search(r'iter: (\d+)', line)
                loss_match = re.search(r'total_loss: ([\d.]+)', line)
                lr_match = re.search(r'lr: ([\d.e-]+)', line)
                
                if iter_match and loss_match:
                    metrics['iterations'].append(int(iter_match.group(1)))
                    metrics['total_loss'].append(float(loss_match.group(1)))
                    if lr_match:
                        metrics['lr'].append(float(lr_match.group(1)))
            
            # Parse evaluation metrics
            if 'segm/AP' in line:
                ap_match = re.search(r'segm/AP: ([\d.]+)', line)
                if ap_match:
                    metrics['segm_AP'].append(float(ap_match.group(1)))
    
    return metrics

def plot_training_metrics(metrics, title="Training Metrics"):
    """Plot training metrics"""
    if not metrics['iterations']:
        print("No training metrics to plot")
        return
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Loss curve
    axes[0, 0].plot(metrics['iterations'], metrics['total_loss'], 'b-', alpha=0.7)
    axes[0, 0].set_xlabel('Iteration')
    axes[0, 0].set_ylabel('Total Loss')
    axes[0, 0].set_title('Training Loss')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Learning rate
    if metrics['lr']:
        axes[0, 1].plot(metrics['iterations'], metrics['lr'], 'g-')
        axes[0, 1].set_xlabel('Iteration')
        axes[0, 1].set_ylabel('Learning Rate')
        axes[0, 1].set_title('Learning Rate Schedule')
        axes[0, 1].grid(True, alpha=0.3)
    
    # Smoothed loss
    if len(metrics['total_loss']) > 10:
        window = min(100, len(metrics['total_loss']) // 10)
        smoothed = np.convolve(metrics['total_loss'], np.ones(window)/window, mode='valid')
        axes[1, 0].plot(metrics['iterations'][window-1:], smoothed, 'r-')
        axes[1, 0].set_xlabel('Iteration')
        axes[1, 0].set_ylabel('Smoothed Loss')
        axes[1, 0].set_title('Smoothed Training Loss')
        axes[1, 0].grid(True, alpha=0.3)
    
    # AP over time
    if metrics['segm_AP']:
        axes[1, 1].plot(range(len(metrics['segm_AP'])), metrics['segm_AP'], 'mo-')
        axes[1, 1].set_xlabel('Evaluation Index')
        axes[1, 1].set_ylabel('Segmentation AP')
        axes[1, 1].set_title('Validation AP')
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.suptitle(title, fontsize=14)
    plt.tight_layout()
    plt.show()

# Example: Parse and plot training log
# log_path = ADELAIDET_ROOT / "training_dir" / teacher_exp_name / "log.txt"
# metrics = parse_training_log(log_path)
# plot_training_metrics(metrics, f"Teacher Network Training ({config.SUBSET})")

## 11. Complete Training Pipeline Script

In [None]:
def generate_complete_pipeline_script(config, train_config):
    """Generate a complete shell script for the entire training pipeline"""
    
    script = f"""#!/bin/bash
# PointWSSIS Complete Training Pipeline
# Subset: {config.SUBSET}
# Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

set -e  # Exit on error

# Configuration
ROOT="{config.DATA_ROOT}"
NGPUS={config.NUM_GPUS}
SUBSET="{config.SUBSET}"
SEED={config.SEED}

export DETECTRON2_DATASETS=${{ROOT}}

# Step 1: Train Teacher Network
echo "=== Step 1: Training Teacher Network ==="
cd {ADELAIDET_ROOT}

EXP_NAME="SOLOv2_R101_coco${{SUBSET}}_teacher"
TRAINSETS="('coco_2017_train_${{SUBSET}}_s',)"
TESTSETS="('coco_2017_val',)"

OMP_NUM_THREADS=1 python -W ignore tools/train_net.py \\
    --config-file configs/PointWSSIS/R101_teacher.yaml \\
    --num-gpus ${{NGPUS}} \\
    SEED ${{SEED}} \\
    OUTPUT_DIR training_dir/${{EXP_NAME}} \\
    DATASETS.TRAIN ${{TRAINSETS}} \\
    DATASETS.TEST ${{TESTSETS}} \\
    SOLVER.STEPS {train_config['decay_steps']} \\
    SOLVER.MAX_ITER {train_config['train_iter']} \\
    SOLVER.BASE_LR {train_config['learning_rate']} \\
    MODEL.SOLOV2.PROMPT point \\
    MODEL.SOLOV2.EVAL_PSEUDO_LABEL True \\
    TEST.EVAL_PERIOD 5000

# Step 2: Generate pseudo labels for weak data
echo "=== Step 2: Generating Pseudo Labels ==="
OMP_NUM_THREADS=1 python -W ignore tools/train_net.py \\
    --config-file configs/PointWSSIS/R101_teacher.yaml \\
    --num-gpus ${{NGPUS}} \\
    --eval-only \\
    MODEL.WEIGHTS training_dir/${{EXP_NAME}}/model_final.pth \\
    OUTPUT_DIR inference_dir/${{EXP_NAME}} \\
    MODEL.SOLOV2.FPN_SCALE_RANGES "((1,100000),(1,100000),(1,100000),(1,100000),(1,100000))" \\
    MODEL.SOLOV2.NMS_TYPE mask \\
    MODEL.SOLOV2.PROMPT point \\
    DATASETS.TEST "('coco_2017_train_${{SUBSET}}_w',)"

# Step 3: Generate predictions for strong data (for MRN training)
echo "=== Step 3: Generating Strong Data Predictions ==="
# Get intermediate checkpoints for diverse predictions
CKPT_1=$(ls training_dir/${{EXP_NAME}}/model_*.pth | head -n 1)
CKPT_2=$(ls training_dir/${{EXP_NAME}}/model_*.pth | tail -n 2 | head -n 1)

OMP_NUM_THREADS=1 python -W ignore tools/train_net.py \\
    --config-file configs/PointWSSIS/R101_teacher.yaml \\
    --num-gpus ${{NGPUS}} \\
    --eval-only \\
    MODEL.WEIGHTS ${{CKPT_1}} \\
    OUTPUT_DIR inference_dir/${{EXP_NAME}}_strong_1 \\
    MODEL.SOLOV2.NMS_TYPE mask \\
    MODEL.SOLOV2.PROMPT point_with_size \\
    DATASETS.TEST "('coco_2017_train_${{SUBSET}}_s',)"

OMP_NUM_THREADS=1 python -W ignore tools/train_net.py \\
    --config-file configs/PointWSSIS/R101_teacher.yaml \\
    --num-gpus ${{NGPUS}} \\
    --eval-only \\
    MODEL.WEIGHTS ${{CKPT_2}} \\
    OUTPUT_DIR inference_dir/${{EXP_NAME}}_strong_2 \\
    MODEL.SOLOV2.NMS_TYPE mask \\
    MODEL.SOLOV2.PROMPT point_with_size \\
    DATASETS.TEST "('coco_2017_train_${{SUBSET}}_s',)"

# Step 4: Train MaskRefineNet
echo "=== Step 4: Training MaskRefineNet ==="
cd {MASKREFINE_ROOT}

MRN_EXP_NAME="MRN_${{SUBSET}}"

torchrun --standalone --nnodes=1 --nproc_per_node=${{NGPUS}} main.py \\
    --data_root ${{ROOT}} \\
    --workspace results \\
    --exp_name ${{MRN_EXP_NAME}} \\
    --train_iters {train_config['mrn_train_iters']} \\
    --warm_iters {train_config['mrn_warm_iters']} \\
    --val_interval 5000 \\
    --weak_pth ../AdelaiDet/inference_dir/${{EXP_NAME}}_strong_1/inference/instances_predictions.pth \\
               ../AdelaiDet/inference_dir/${{EXP_NAME}}_strong_2/inference/instances_predictions.pth \\
    --gt_json instances_train2017_${{SUBSET}}_s.json \\
    --eval_pth ../AdelaiDet/inference_dir/${{EXP_NAME}}/inference/instances_predictions.pth \\
    --amp

# Step 5: Merge labels
echo "=== Step 5: Merging Strong and Refined Weak Labels ==="
torchrun --standalone --nnodes=1 --nproc_per_node=${{NGPUS}} merge_strong_and_refined_weak_labels.py \\
    --data_root ${{ROOT}} \\
    --ckpt results/${{MRN_EXP_NAME}}/ckpt/best_AP.pt \\
    --dataset coco \\
    --size 256 \\
    --weak_pth ../AdelaiDet/inference_dir/${{EXP_NAME}}/inference/instances_predictions.pth \\
    --weak_json ${{ROOT}}/coco/annotations/instances_train2017_${{SUBSET}}_w.json \\
    --strong_json ${{ROOT}}/coco/annotations/instances_train2017_${{SUBSET}}_s.json \\
    --save_path ${{ROOT}}/coco/annotations/instances_train2017_${{SUBSET}}_sw_refined.json

# Step 6: Train Student Network
echo "=== Step 6: Training Student Network ==="
cd {ADELAIDET_ROOT}

STUDENT_EXP_NAME="SOLOv2_R101_coco${{SUBSET}}_sw_refined"

OMP_NUM_THREADS=1 python -W ignore tools/train_net.py \\
    --config-file configs/SOLOv2/R101_3x.yaml \\
    --num-gpus ${{NGPUS}} \\
    SEED ${{SEED}} \\
    OUTPUT_DIR training_dir/${{STUDENT_EXP_NAME}} \\
    DATASETS.TRAIN "('coco_2017_train_${{SUBSET}}_sw_refined',)" \\
    DATASETS.TEST "('coco_2017_val',)" \\
    TEST.EVAL_PERIOD 5000

echo "=== Training Complete! ==="
echo "Student model saved at: training_dir/${{STUDENT_EXP_NAME}}/model_final.pth"
"""
    return script

# Generate and display the complete script
complete_script = generate_complete_pipeline_script(config, train_config)
print(complete_script)

In [None]:
# Save the pipeline script
script_path = WORKSPACE_ROOT / f"train_pipeline_{config.SUBSET}.sh"
with open(script_path, 'w') as f:
    f.write(complete_script)
print(f"Pipeline script saved to: {script_path}")
print(f"Run with: bash {script_path}")

## 12. Summary Statistics Dashboard

In [None]:
def create_summary_dashboard():
    """Create a summary dashboard of the training pipeline"""
    
    print("="*70)
    print("PointWSSIS Training Pipeline Summary")
    print("="*70)
    print(f"\nConfiguration:")
    print(f"  - Dataset: {config.DATASET}")
    print(f"  - Subset: {config.SUBSET} ({config.SUBSET.replace('p', '')}% fully labeled)")
    print(f"  - Data Root: {config.DATA_ROOT}")
    print(f"  - Number of GPUs: {config.NUM_GPUS}")
    
    print(f"\nTraining Hyperparameters ({config.SUBSET}):")
    for key, value in train_config.items():
        print(f"  - {key}: {value}")
    
    print(f"\nPipeline Steps:")
    print(f"  1. Train Teacher Network (SOLOv2)")
    print(f"     - Config: configs/PointWSSIS/R101_teacher.yaml")
    print(f"     - Iterations: {train_config['train_iter']}")
    print(f"  2. Generate Pseudo Labels for Weak Data")
    print(f"  3. Train MaskRefineNet")
    print(f"     - Iterations: {train_config['mrn_train_iters']}")
    print(f"  4. Merge Strong and Refined Weak Labels")
    print(f"  5. Train Student Network")
    
    print(f"\nExpected Results (from paper):")
    print(f"  - COCO test-dev mAP: {EXPECTED_RESULTS.get(config.SUBSET, 'N/A')}%")
    
    print(f"\nOutput Locations:")
    print(f"  - Teacher model: AdelaiDet/training_dir/{teacher_exp_name}/")
    print(f"  - MRN model: MaskRefineNet/results/{mrn_exp_name}/")
    print(f"  - Student model: AdelaiDet/training_dir/{student_exp_name}/")
    print("="*70)

create_summary_dashboard()