In [None]:
# ============================================================================
# FIXING NUMPY COMPATIBILITY ISSUE FIRST
# ============================================================================
import subprocess
import sys

print("=" * 60)
print("FIXING NUMPY COMPATIBILITY")
print("=" * 60)
print("Downgrading NumPy to version 1.x for compatibility...")

try:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "numpy<2"])
    print("‚úì NumPy downgraded successfully")
except Exception as e:
    print(f"‚ö† Warning: {e}")
    print("Continuing anyway...")

print("=" * 60 + "\n")

import os
import yaml
from pathlib import Path
from ultralytics import YOLO
import torch

# ============================================================================
# CONFIGURATION
# ============================================================================

# Paths (adjusted for correct dataset location)
DATASET_PATH = '/kaggle/input/military-object-dataset/military_object_dataset'
OUTPUT_PATH = '/kaggle/working'
WEIGHTS_PATH = f'{OUTPUT_PATH}/weights'

# Create output directories
os.makedirs(WEIGHTS_PATH, exist_ok=True)

# Training Configuration
CONFIG = {
    'model': 'yolov8m.pt',  # Medium model - good balance
    # Use 'yolov8n.pt' for faster training (30 min less)
    # Use 'yolov8l.pt' for better accuracy (if you have time)
    
    'epochs': 60,  # With early stopping, will stop around 50-70
    'imgsz': 640,
    'batch': 16,  # Adjust based on GPU memory
    'patience': 15,  # Early stopping patience
    'device': '0,1',  # Use both T4 GPUs
    'workers': 8,
    'optimizer': 'AdamW',
    'lr0': 0.001,
    'lrf': 0.01,
    'cos_lr': True,
    'close_mosaic': 10,  # Disable mosaic in last 10 epochs
}

# ============================================================================
# CHECK SYSTEM
# ============================================================================

print("=" * 60)
print("SYSTEM CHECK")
print("=" * 60)
print(f"PyTorch: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"GPU Count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
print("=" * 60)

# ============================================================================
# CREATE CORRECTED YAML
# ============================================================================

print("\n" + "=" * 60)
print("CREATING DATASET CONFIGURATION")
print("=" * 60)

# Read original YAML to get class names
original_yaml_path = f'{DATASET_PATH}/military_dataset.yaml'
try:
    with open(original_yaml_path, 'r') as f:
        original_yaml = yaml.safe_load(f)
        class_names = original_yaml.get('names', [f'class_{i}' for i in range(12)])
        num_classes = original_yaml.get('nc', 12)
        print(f"‚úì Found {num_classes} classes from original YAML")
        print(f"‚úì Classes: {class_names}")
except Exception as e:
    print(f"‚ö† Warning reading original YAML: {e}")
    print("‚ö† Using default class names")
    num_classes = 12
    class_names = [f'class_{i}' for i in range(num_classes)]

# Verify dataset structure
print("\nVerifying dataset structure...")
required_dirs = {
    'train/images': 0,
    'train/labels': 0,
    'val/images': 0,
    'val/labels': 0,
    'test/images': 0
}

for dir_name in required_dirs.keys():
    dir_path = Path(DATASET_PATH) / dir_name
    if dir_path.exists():
        count = len(list(dir_path.glob('*')))
        required_dirs[dir_name] = count
        print(f"‚úì {dir_name}: {count} files")
    else:
        print(f"‚úó {dir_name}: NOT FOUND")

# Create corrected YAML with proper absolute paths
data_yaml = {
    'path': DATASET_PATH,
    'train': 'train/images',
    'val': 'val/images',
    'test': 'test/images',
    'nc': num_classes,
    'names': class_names
}

data_yaml_path = f'{OUTPUT_PATH}/corrected_data.yaml'
with open(data_yaml_path, 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)
    
print(f"\n‚úì Created corrected YAML at: {data_yaml_path}")
print("=" * 60)

# ============================================================================
# DATA AUGMENTATION SETTINGS
# ============================================================================

# Strong augmentation for robustness
augmentation = {
    'hsv_h': 0.015,  # HSV-Hue augmentation
    'hsv_s': 0.7,    # HSV-Saturation augmentation
    'hsv_v': 0.4,    # HSV-Value augmentation
    'degrees': 10.0,  # Rotation (+/- deg)
    'translate': 0.1, # Translation (+/- fraction)
    'scale': 0.5,     # Scale (+/- gain)
    'shear': 0.0,     # Shear (+/- deg)
    'perspective': 0.0,  # Perspective (+/- fraction)
    'flipud': 0.0,    # Flip up-down probability
    'fliplr': 0.5,    # Flip left-right probability
    'mosaic': 1.0,    # Mosaic augmentation probability
    'mixup': 0.1,     # Mixup augmentation probability
    'copy_paste': 0.1 # Copy-paste augmentation probability
}

# ============================================================================
# INITIALIZE MODEL
# ============================================================================

print("\n" + "=" * 60)
print("INITIALIZING MODEL")
print("=" * 60)

# Load pretrained YOLO model
model = YOLO(CONFIG['model'])
print(f"‚úì Loaded model: {CONFIG['model']}")
print(f"‚úì Using GPUs: {CONFIG['device']}")

# ============================================================================
# CHECK FOR EXISTING TRAINING (AUTO-RESUME)
# ============================================================================

print("\n" + "=" * 60)
print("CHECKING FOR EXISTING TRAINING")
print("=" * 60)

last_checkpoint = f'{OUTPUT_PATH}/train/weights/last.pt'
resume_training = False

if Path(last_checkpoint).exists():
    print(f"‚úì Found existing checkpoint: {last_checkpoint}")
    
    try:
        # Load checkpoint to check epoch
        checkpoint = torch.load(last_checkpoint, map_location='cpu')
        last_epoch = checkpoint.get('epoch', -1)
        print(f"‚úì Last completed epoch: {last_epoch}")
        print(f"‚úì Will resume from epoch {last_epoch + 1}")
        
        resume_training = True
        model = YOLO(last_checkpoint)  # Load from checkpoint
        print("‚úì Loaded model from checkpoint")
    except Exception as e:
        print(f"‚ö† Could not load checkpoint: {e}")
        print("‚ö† Starting fresh training...")
        resume_training = False
else:
    print("‚úó No existing checkpoint found")
    print("‚úì Starting fresh training...")

print("=" * 60)

# ============================================================================
# START TRAINING
# ============================================================================

print("\n" + "=" * 60)
print("STARTING TRAINING")
print("=" * 60)
if resume_training:
    print(f"üîÑ RESUMING from checkpoint")
else:
    print(f"üÜï STARTING fresh training")
print(f"Estimated time: 2-3 hours")
print(f"Output directory: {OUTPUT_PATH}")
print(f"Using corrected YAML: {data_yaml_path}")
print("=" * 60 + "\n")

# Train the model
results = model.train(
    data=data_yaml_path,
    epochs=CONFIG['epochs'],
    imgsz=CONFIG['imgsz'],
    batch=CONFIG['batch'],
    patience=CONFIG['patience'],
    device=CONFIG['device'],
    workers=CONFIG['workers'],
    optimizer=CONFIG['optimizer'],
    lr0=CONFIG['lr0'],
    lrf=CONFIG['lrf'],
    cos_lr=CONFIG['cos_lr'],
    close_mosaic=CONFIG['close_mosaic'],
    project=OUTPUT_PATH,
    name='train',
    exist_ok=True,
    pretrained=True if not resume_training else False,
    resume=resume_training,  # Enable resume mode
    verbose=True,
    seed=42,
    deterministic=False,
    save=True,
    save_period=5,  # Save checkpoint every 10 epochs
    **augmentation
)

# ============================================================================
# VALIDATION
# ============================================================================

print("\n" + "=" * 60)
print("RUNNING VALIDATION")
print("=" * 60)

# Validate the best model
best_model_path = f'{OUTPUT_PATH}/train/weights/best.pt'
model = YOLO(best_model_path)

val_results = model.val(
    data=data_yaml_path,
    imgsz=CONFIG['imgsz'],
    batch=CONFIG['batch'],
    device=CONFIG['device'],
    plots=True,
    save_json=True,
    project=OUTPUT_PATH,
    name='validation'
)

# ============================================================================
# PRINT RESULTS
# ============================================================================

print("\n" + "=" * 60)
print("TRAINING COMPLETE!")
print("=" * 60)
print(f"Best model saved at: {best_model_path}")
print(f"\nValidation Results:")
print(f"  mAP@50: {val_results.box.map50:.4f}")
print(f"  mAP@50-95: {val_results.box.map:.4f}")
print(f"  Precision: {val_results.box.mp:.4f}")
print(f"  Recall: {val_results.box.mr:.4f}")
print("=" * 60)

# Save results summary
results_summary = {
    'model': CONFIG['model'],
    'epochs_trained': len(results.box.map) if hasattr(results.box, 'map') else CONFIG['epochs'],
    'mAP@50': float(val_results.box.map50),
    'mAP@50-95': float(val_results.box.map),
    'precision': float(val_results.box.mp),
    'recall': float(val_results.box.mr),
}

import json
with open(f'{OUTPUT_PATH}/training_summary.json', 'w') as f:
    json.dump(results_summary, f, indent=2)

print("\n‚úì Training summary saved!")
print("\nNext step: Run inference script to generate predictions")
print(f"\nFiles saved:")
print(f"  - Best weights: {best_model_path}")
print(f"  - Last weights: {OUTPUT_PATH}/train/weights/last.pt")
print(f"  - Training plots: {OUTPUT_PATH}/train/")
print(f"  - Validation results: {OUTPUT_PATH}/validation/")

FIXING NUMPY COMPATIBILITY
Downgrading NumPy to version 1.x for compatibility...
‚úì NumPy downgraded successfully



ModuleNotFoundError: No module named 'ultralytics'

# Validation

In [None]:
import os
import sys
import yaml
import json
import torch
import numpy as np
from pathlib import Path
from ultralytics import YOLO
import matplotlib.pyplot as plt
from datetime import datetime

# ============================================================================
# CONFIGURATION
# ============================================================================

# Paths (adjust these based on your setup)
DATASET_PATH = '/kaggle/input/military-object-dataset/military_object_dataset'
MODEL_PATH = '/kaggle/working/train/weights/best.pt'  # Path to trained model
OUTPUT_PATH = '/kaggle/working/validation_results'
DATA_YAML_PATH = '/kaggle/working/corrected_data.yaml'

# Validation Configuration
CONFIG = {
    'imgsz': 640,
    'batch': 16,
    'device': '0',  # Single GPU for validation
    'workers': 8,
    'conf_thres': 0.001,  # Low threshold for comprehensive evaluation
    'iou_thres': 0.6,
    'max_det': 300,
    'save_json': True,
    'save_txt': True,
    'plots': True,
    'verbose': True
}

# Create output directory
os.makedirs(OUTPUT_PATH, exist_ok=True)

# ============================================================================
# SYSTEM CHECK
# ============================================================================

print("=" * 60)
print("VALIDATION SYSTEM CHECK")
print("=" * 60)
print(f"PyTorch: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print("=" * 60)

# ============================================================================
# VERIFY FILES
# ============================================================================

print("\n" + "=" * 60)
print("VERIFYING FILES")
print("=" * 60)

# Check if model exists
if not Path(MODEL_PATH).exists():
    print(f"‚úó ERROR: Model not found at {MODEL_PATH}")
    print("\nAvailable models:")
    weights_dir = Path(MODEL_PATH).parent
    if weights_dir.exists():
        for f in weights_dir.glob('*.pt'):
            print(f"  - {f}")
    sys.exit(1)
else:
    print(f"‚úì Model found: {MODEL_PATH}")
    model_size = Path(MODEL_PATH).stat().st_size / 1e6
    print(f"  Size: {model_size:.2f} MB")

# Check if data YAML exists
if not Path(DATA_YAML_PATH).exists():
    print(f"‚úó ERROR: Data YAML not found at {DATA_YAML_PATH}")
    sys.exit(1)
else:
    print(f"‚úì Data YAML found: {DATA_YAML_PATH}")

# Load and verify YAML content
with open(DATA_YAML_PATH, 'r') as f:
    data_config = yaml.safe_load(f)
    print(f"  Classes: {data_config['nc']}")
    # Handle both list and dict formats for class names
    if isinstance(data_config['names'], dict):
        class_names_preview = list(data_config['names'].values())[:3]
    else:
        class_names_preview = data_config['names'][:3]
    print(f"  Class names: {class_names_preview}... (showing first 3)")

# Verify validation data exists
val_images_path = Path(data_config['path']) / data_config['val']
if val_images_path.exists():
    val_count = len(list(val_images_path.glob('*')))
    print(f"‚úì Validation images: {val_count} files")
else:
    print(f"‚úó ERROR: Validation images not found at {val_images_path}")
    sys.exit(1)

print("=" * 60)

# ============================================================================
# LOAD MODEL
# ============================================================================

print("\n" + "=" * 60)
print("LOADING MODEL")
print("=" * 60)

try:
    model = YOLO(MODEL_PATH)
    print(f"‚úì Model loaded successfully")
    
    # Get model info
    model_info = model.info(verbose=False)
    print(f"‚úì Model type: {model.model.__class__.__name__}")
    
except Exception as e:
    print(f"‚úó ERROR loading model: {e}")
    sys.exit(1)

print("=" * 60)

# ============================================================================
# RUN VALIDATION
# ============================================================================

print("\n" + "=" * 60)
print("RUNNING VALIDATION")
print("=" * 60)
print(f"Validation set: {val_images_path}")
print(f"Output directory: {OUTPUT_PATH}")
print(f"Batch size: {CONFIG['batch']}")
print(f"Image size: {CONFIG['imgsz']}")
print("=" * 60 + "\n")

# Run validation
val_results = model.val(
    data=DATA_YAML_PATH,
    imgsz=CONFIG['imgsz'],
    batch=CONFIG['batch'],
    device=CONFIG['device'],
    workers=CONFIG['workers'],
    conf=CONFIG['conf_thres'],
    iou=CONFIG['iou_thres'],
    max_det=CONFIG['max_det'],
    save_json=CONFIG['save_json'],
    save_txt=CONFIG['save_txt'],
    plots=CONFIG['plots'],
    verbose=CONFIG['verbose'],
    project=OUTPUT_PATH,
    name='val',
    exist_ok=True
)

# ============================================================================
# EXTRACT AND DISPLAY RESULTS
# ============================================================================

print("\n" + "=" * 60)
print("VALIDATION RESULTS")
print("=" * 60)

# Overall metrics
print("\nüìä Overall Metrics:")
print(f"  mAP@50:     {val_results.box.map50:.4f}")
print(f"  mAP@50-95:  {val_results.box.map:.4f}")
print(f"  Precision:  {val_results.box.mp:.4f}")
print(f"  Recall:     {val_results.box.mr:.4f}")

# Per-class metrics
print("\nüìã Per-Class Metrics:")
print(f"{'Class':<20} {'mAP@50':>10} {'mAP@50-95':>10} {'Precision':>10} {'Recall':>10}")
print("-" * 70)

# Handle both list and dict formats for class names
if isinstance(data_config['names'], dict):
    class_names = list(data_config['names'].values())
else:
    class_names = data_config['names']

for i, class_name in enumerate(class_names):
    if i < len(val_results.box.ap50):
        map50 = val_results.box.ap50[i]
        map50_95 = val_results.box.ap[i]
        # Get per-class precision and recall if available
        print(f"{class_name:<20} {map50:>10.4f} {map50_95:>10.4f} {'-':>10} {'-':>10}")

print("=" * 60)

# ============================================================================
# SAVE DETAILED RESULTS
# ============================================================================

print("\n" + "=" * 60)
print("SAVING RESULTS")
print("=" * 60)

# Create comprehensive results dictionary
results_dict = {
    'timestamp': datetime.now().isoformat(),
    'model_path': MODEL_PATH,
    'dataset_path': DATASET_PATH,
    'configuration': CONFIG,
    'overall_metrics': {
        'mAP@50': float(val_results.box.map50),
        'mAP@50-95': float(val_results.box.map),
        'precision': float(val_results.box.mp),
        'recall': float(val_results.box.mr),
    },
    'per_class_metrics': {}
}

# Add per-class metrics
for i, class_name in enumerate(class_names):
    if i < len(val_results.box.ap50):
        results_dict['per_class_metrics'][class_name] = {
            'mAP@50': float(val_results.box.ap50[i]) if val_results.box.ap50[i] == val_results.box.ap50[i] else 0.0,  # Check for NaN
            'mAP@50-95': float(val_results.box.ap[i]) if val_results.box.ap[i] == val_results.box.ap[i] else 0.0
        }

# Save to JSON
results_json_path = f'{OUTPUT_PATH}/validation_results.json'
with open(results_json_path, 'w') as f:
    json.dump(results_dict, f, indent=2)
print(f"‚úì Detailed results saved: {results_json_path}")

# Save summary text file
summary_path = f'{OUTPUT_PATH}/validation_summary.txt'
with open(summary_path, 'w') as f:
    f.write("VALIDATION SUMMARY\n")
    f.write("=" * 60 + "\n\n")
    f.write(f"Timestamp: {results_dict['timestamp']}\n")
    f.write(f"Model: {MODEL_PATH}\n\n")
    f.write("Overall Metrics:\n")
    f.write(f"  mAP@50:     {val_results.box.map50:.4f}\n")
    f.write(f"  mAP@50-95:  {val_results.box.map:.4f}\n")
    f.write(f"  Precision:  {val_results.box.mp:.4f}\n")
    f.write(f"  Recall:     {val_results.box.mr:.4f}\n\n")
    f.write("Per-Class Metrics:\n")
    for class_name, metrics in results_dict['per_class_metrics'].items():
        f.write(f"  {class_name}:\n")
        f.write(f"    mAP@50: {metrics['mAP@50']:.4f}\n")
        f.write(f"    mAP@50-95: {metrics['mAP@50-95']:.4f}\n")
print(f"‚úì Summary saved: {summary_path}")

print("=" * 60)

# ============================================================================
# ADDITIONAL ANALYSIS
# ============================================================================

print("\n" + "=" * 60)
print("ADDITIONAL ANALYSIS")
print("=" * 60)

# Find best and worst performing classes
per_class_map = results_dict['per_class_metrics']
if per_class_map:
    sorted_classes = sorted(per_class_map.items(), 
                           key=lambda x: x[1]['mAP@50'], 
                           reverse=True)
    
    print("\nüèÜ Top 3 Performing Classes:")
    for i, (class_name, metrics) in enumerate(sorted_classes[:3], 1):
        print(f"  {i}. {class_name}: mAP@50 = {metrics['mAP@50']:.4f}")
    
    print("\n‚ö†Ô∏è  Bottom 3 Performing Classes:")
    for i, (class_name, metrics) in enumerate(sorted_classes[-3:], 1):
        print(f"  {i}. {class_name}: mAP@50 = {metrics['mAP@50']:.4f}")

print("\n" + "=" * 60)

# ============================================================================
# FINAL SUMMARY
# ============================================================================

print("\n" + "=" * 60)
print("VALIDATION COMPLETE!")
print("=" * 60)
print(f"\nüìÅ Output Directory: {OUTPUT_PATH}")
print(f"\nGenerated Files:")
print(f"  ‚úì validation_results.json - Detailed metrics")
print(f"  ‚úì validation_summary.txt - Human-readable summary")
print(f"  ‚úì Confusion matrix and plots (in val/ subdirectory)")
print(f"\nüéØ Overall Performance:")
print(f"  mAP@50: {val_results.box.map50:.4f}")
print(f"  mAP@50-95: {val_results.box.map:.4f}")
print("=" * 60)

# Performance interpretation
if val_results.box.map50 > 0.7:
    print("\n‚úÖ Excellent performance! Model is ready for deployment.")
elif val_results.box.map50 > 0.5:
    print("\n‚úì Good performance! Consider fine-tuning for better results.")
else:
    print("\n‚ö†Ô∏è  Performance needs improvement. Consider:")
    print("  - Training for more epochs")
    print("  - Using a larger model")
    print("  - Adjusting hyperparameters")
    print("  - Collecting more training data")

print("\n" + "=" * 60)

# Test 

In [None]:
import os
import sys
import yaml
import json
import torch
import cv2
import numpy as np
from pathlib import Path
from ultralytics import YOLO
from tqdm import tqdm
from datetime import datetime
import pandas as pd

# ============================================================================
# CONFIGURATION
# ============================================================================

# Paths
DATASET_PATH = '/kaggle/input/military-object-dataset/military_object_dataset'
MODEL_PATH = '/kaggle/working/train/weights/best.pt'  # Trained model
OUTPUT_PATH = '/kaggle/working/inference_results'
DATA_YAML_PATH = '/kaggle/working/corrected_data.yaml'
TEST_IMAGES_PATH = f'{DATASET_PATH}/test/images'

# Inference Configuration
CONFIG = {
    'imgsz': 640,
    'conf_thres': 0.25,  # Confidence threshold
    'iou_thres': 0.45,   # NMS IOU threshold
    'max_det': 300,      # Maximum detections per image
    'device': '0',       # GPU device
    'agnostic_nms': False,
    'save_txt': True,
    'save_conf': True,
    'save_crop': False,  # Set to True to save cropped detections
    'visualize': True,   # Save visualization images
    'line_width': 2,
    'batch': 16
}

# Create output directories
os.makedirs(OUTPUT_PATH, exist_ok=True)
os.makedirs(f'{OUTPUT_PATH}/labels', exist_ok=True)
os.makedirs(f'{OUTPUT_PATH}/visualizations', exist_ok=True)

# ============================================================================
# SYSTEM CHECK
# ============================================================================

print("=" * 60)
print("INFERENCE SYSTEM CHECK")
print("=" * 60)
print(f"PyTorch: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
print("=" * 60)

# ============================================================================
# VERIFY FILES
# ============================================================================

print("\n" + "=" * 60)
print("VERIFYING FILES")
print("=" * 60)

# Check model
if not Path(MODEL_PATH).exists():
    print(f"‚úó ERROR: Model not found at {MODEL_PATH}")
    sys.exit(1)
else:
    print(f"‚úì Model found: {MODEL_PATH}")
    model_size = Path(MODEL_PATH).stat().st_size / 1e6
    print(f"  Size: {model_size:.2f} MB")

# Check data YAML
if not Path(DATA_YAML_PATH).exists():
    print(f"‚úó ERROR: Data YAML not found at {DATA_YAML_PATH}")
    sys.exit(1)
else:
    print(f"‚úì Data YAML found: {DATA_YAML_PATH}")

# Load YAML to get class names
with open(DATA_YAML_PATH, 'r') as f:
    data_config = yaml.safe_load(f)
    # Handle both list and dict formats for class names
    if isinstance(data_config['names'], dict):
        class_names = list(data_config['names'].values())
    else:
        class_names = data_config['names']
    num_classes = data_config['nc']
    print(f"  Classes: {num_classes}")
    print(f"  Class names loaded: {len(class_names)} classes")

# Check test images
test_path = Path(TEST_IMAGES_PATH)
if not test_path.exists():
    print(f"‚úó ERROR: Test images not found at {test_path}")
    sys.exit(1)
else:
    test_images = list(test_path.glob('*.jpg')) + list(test_path.glob('*.png'))
    print(f"‚úì Test images found: {len(test_images)} files")

print("=" * 60)

# ============================================================================
# LOAD MODEL
# ============================================================================

print("\n" + "=" * 60)
print("LOADING MODEL")
print("=" * 60)

try:
    model = YOLO(MODEL_PATH)
    print(f"‚úì Model loaded successfully")
except Exception as e:
    print(f"‚úó ERROR loading model: {e}")
    sys.exit(1)

print("=" * 60)

# ============================================================================
# RUN INFERENCE
# ============================================================================

print("\n" + "=" * 60)
print("RUNNING INFERENCE ON TEST SET")
print("=" * 60)
print(f"Test images: {len(test_images)}")
print(f"Output directory: {OUTPUT_PATH}")
print(f"Confidence threshold: {CONFIG['conf_thres']}")
print(f"IOU threshold: {CONFIG['iou_thres']}")
print("=" * 60 + "\n")

# Run predictions
results = model.predict(
    source=TEST_IMAGES_PATH,
    imgsz=CONFIG['imgsz'],
    conf=CONFIG['conf_thres'],
    iou=CONFIG['iou_thres'],
    max_det=CONFIG['max_det'],
    device=CONFIG['device'],
    agnostic_nms=CONFIG['agnostic_nms'],
    save_txt=CONFIG['save_txt'],
    save_conf=CONFIG['save_conf'],
    save_crop=CONFIG['save_crop'],
    line_width=CONFIG['line_width'],
    project=OUTPUT_PATH,
    name='predictions',
    exist_ok=True,
    verbose=True,
    stream=False
)

print("\n‚úì Inference complete!")

# ============================================================================
# PROCESS AND SAVE RESULTS
# ============================================================================

print("\n" + "=" * 60)
print("PROCESSING RESULTS")
print("=" * 60)

# Collect all predictions
all_predictions = []
detection_stats = {
    'total_images': len(test_images),
    'images_with_detections': 0,
    'total_detections': 0,
    'detections_per_class': {}
}

# Initialize detection counts for all classes
for class_name in class_names:
    detection_stats['detections_per_class'][class_name] = 0

print("\nProcessing predictions...")
for i, result in enumerate(tqdm(results)):
    img_path = result.path
    img_name = Path(img_path).stem
    
    # Get detections
    boxes = result.boxes
    
    if len(boxes) > 0:
        detection_stats['images_with_detections'] += 1
        detection_stats['total_detections'] += len(boxes)
        
        # Process each detection
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            conf = float(box.conf[0])
            cls = int(box.cls[0])
            
            # Safely get class name
            if cls < len(class_names):
                class_name = class_names[cls]
            else:
                class_name = f'class_{cls}'
                if class_name not in detection_stats['detections_per_class']:
                    detection_stats['detections_per_class'][class_name] = 0
            
            # Update class statistics
            detection_stats['detections_per_class'][class_name] += 1
            
            # Store prediction
            prediction = {
                'image': img_name,
                'class_id': cls,
                'class_name': class_name,
                'confidence': conf,
                'bbox': [float(x1), float(y1), float(x2), float(y2)],
                'width': float(x2 - x1),
                'height': float(y2 - y1)
            }
            all_predictions.append(prediction)
    
    # Save visualization if enabled
    if CONFIG['visualize'] and len(boxes) > 0:
        # Read original image
        img = cv2.imread(img_path)
        
        # Draw detections
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
            conf = float(box.conf[0])
            cls = int(box.cls[0])
            
            # Safely get class name
            if cls < len(class_names):
                class_name = class_names[cls]
            else:
                class_name = f'class_{cls}'
            
            # Draw box
            color = (0, 255, 0)  # Green
            cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
            
            # Draw label
            label = f"{class_name} {conf:.2f}"
            (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            cv2.rectangle(img, (x1, y1 - h - 10), (x1 + w, y1), color, -1)
            cv2.putText(img, label, (x1, y1 - 5), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
        
        # Save visualization
        vis_path = f'{OUTPUT_PATH}/visualizations/{img_name}.jpg'
        cv2.imwrite(vis_path, img)

print("\n‚úì Results processed!")

# ============================================================================
# SAVE PREDICTIONS IN MULTIPLE FORMATS
# ============================================================================

print("\n" + "=" * 60)
print("SAVING PREDICTIONS")
print("=" * 60)

# 1. Save as JSON
predictions_json_path = f'{OUTPUT_PATH}/predictions.json'
with open(predictions_json_path, 'w') as f:
    json.dump(all_predictions, f, indent=2)
print(f"‚úì JSON predictions saved: {predictions_json_path}")

# 2. Save as CSV
if all_predictions:
    df = pd.DataFrame(all_predictions)
    predictions_csv_path = f'{OUTPUT_PATH}/predictions.csv'
    df.to_csv(predictions_csv_path, index=False)
    print(f"‚úì CSV predictions saved: {predictions_csv_path}")

# 3. Save statistics
stats_path = f'{OUTPUT_PATH}/detection_statistics.json'
with open(stats_path, 'w') as f:
    json.dump(detection_stats, f, indent=2)
print(f"‚úì Statistics saved: {stats_path}")

# 4. Create submission format (YOLO format)
print("\nCreating YOLO format labels...")
for pred in all_predictions:
    img_name = pred['image']
    label_path = f"{OUTPUT_PATH}/labels/{img_name}.txt"
    
    # Read image to get dimensions
    img_path = test_path / f"{img_name}.jpg"
    if not img_path.exists():
        img_path = test_path / f"{img_name}.png"
    
    if img_path.exists():
        img = cv2.imread(str(img_path))
        h, w = img.shape[:2]
        
        # Convert to YOLO format (normalized)
        x1, y1, x2, y2 = pred['bbox']
        x_center = ((x1 + x2) / 2) / w
        y_center = ((y1 + y2) / 2) / h
        width = (x2 - x1) / w
        height = (y2 - y1) / h
        
        # Append to label file
        with open(label_path, 'a') as f:
            f.write(f"{pred['class_id']} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f} {pred['confidence']:.6f}\n")

print(f"‚úì YOLO format labels saved: {OUTPUT_PATH}/labels/")

print("=" * 60)

# ============================================================================
# GENERATE SUMMARY REPORT
# ============================================================================

print("\n" + "=" * 60)
print("GENERATING SUMMARY REPORT")
print("=" * 60)

summary_report = f"""
INFERENCE SUMMARY REPORT
{'=' * 60}

Timestamp: {datetime.now().isoformat()}
Model: {MODEL_PATH}
Test Images: {TEST_IMAGES_PATH}

Configuration:
  - Image Size: {CONFIG['imgsz']}
  - Confidence Threshold: {CONFIG['conf_thres']}
  - IOU Threshold: {CONFIG['iou_thres']}
  - Max Detections: {CONFIG['max_det']}

Results:
  - Total Images: {detection_stats['total_images']}
  - Images with Detections: {detection_stats['images_with_detections']}
  - Total Detections: {detection_stats['total_detections']}
  - Average Detections per Image: {detection_stats['total_detections'] / detection_stats['total_images']:.2f}

Detections per Class:
"""

for class_name, count in sorted(detection_stats['detections_per_class'].items(), 
                                key=lambda x: x[1], reverse=True):
    if count > 0:
        percentage = (count / detection_stats['total_detections']) * 100
        summary_report += f"  - {class_name}: {count} ({percentage:.1f}%)\n"

summary_report += f"\n{'=' * 60}\n"

# Save summary report
summary_path = f'{OUTPUT_PATH}/inference_summary.txt'
with open(summary_path, 'w') as f:
    f.write(summary_report)

print(summary_report)
print(f"‚úì Summary report saved: {summary_path}")

# ============================================================================
# FINAL SUMMARY
# ============================================================================

print("\n" + "=" * 60)
print("INFERENCE COMPLETE!")
print("=" * 60)
print(f"\nüìÅ Output Directory: {OUTPUT_PATH}")
print(f"\nGenerated Files:")
print(f"  ‚úì predictions.json - All predictions in JSON format")
print(f"  ‚úì predictions.csv - All predictions in CSV format")
print(f"  ‚úì labels/ - YOLO format label files")
print(f"  ‚úì detection_statistics.json - Detection statistics")
print(f"  ‚úì inference_summary.txt - Summary report")
if CONFIG['visualize']:
    print(f"  ‚úì visualizations/ - Annotated images")
print(f"\nüìä Detection Summary:")
print(f"  Total Detections: {detection_stats['total_detections']}")
print(f"  Images with Detections: {detection_stats['images_with_detections']}/{detection_stats['total_images']}")
print(f"  Detection Rate: {(detection_stats['images_with_detections']/detection_stats['total_images']*100):.1f}%")
print("\n" + "=" * 60)
print("\n‚úÖ All predictions have been generated successfully!")
print("Next step: Run create_submission.py to create the final ZIP file")
print("=" * 60)

# Submission

In [None]:
"""
Create Submission Package
Compresses all code, results, and documentation into a ZIP file
Compatible with train.py, validation.py, and inference.py workflow
"""

import os
import sys
import shutil
import zipfile
from pathlib import Path
from datetime import datetime
import json

# ============================================================================
# CONFIGURATION
# ============================================================================

# Paths
WORKING_DIR = '/kaggle/working'
OUTPUT_ZIP = f'{WORKING_DIR}/submission_{datetime.now().strftime("%Y%m%d_%H%M%S")}.zip'

# Files and directories to include
INCLUDE_ITEMS = {
    'code': [
        'train.py',
        'validation.py',
        'inference.py',
        'create_submission.py',
        'corrected_data.yaml'
    ],
    'results': [
        'training_summary.json',
        'train/weights/best.pt',
        'train/weights/last.pt',
        'train/results.csv',
        'train/results.png',
        'train/confusion_matrix.png',
        'train/confusion_matrix_normalized.png',
        'validation_results/validation_results.json',
        'validation_results/validation_summary.txt',
        'inference_results/predictions.json',
        'inference_results/predictions.csv',
        'inference_results/detection_statistics.json',
        'inference_results/inference_summary.txt',
        'inference_results/labels/'  # Directory
    ],
    'visualizations': [
        'train/val_batch0_pred.jpg',
        'train/val_batch1_pred.jpg',
        'train/val_batch2_pred.jpg',
        'inference_results/visualizations/'  # Directory (optional)
    ]
}

# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def get_dir_size(path):
    """Calculate directory size in MB"""
    total = 0
    try:
        for entry in os.scandir(path):
            if entry.is_file():
                total += entry.stat().st_size
            elif entry.is_dir():
                total += get_dir_size(entry.path)
    except Exception:
        pass
    return total / (1024 * 1024)  # Convert to MB

def add_to_zip(zipf, source_path, archive_name):
    """Add file or directory to zip"""
    source = Path(source_path)
    
    if not source.exists():
        return False
    
    try:
        if source.is_file():
            zipf.write(source, archive_name)
            return True
        elif source.is_dir():
            # Add directory and its contents
            for root, dirs, files in os.walk(source):
                for file in files:
                    file_path = Path(root) / file
                    archive_path = Path(archive_name) / file_path.relative_to(source)
                    zipf.write(file_path, archive_path)
            return True
    except Exception as e:
        print(f"  ‚ö† Warning: Could not add {source_path}: {e}")
        return False
    
    return False

# ============================================================================
# CREATE README
# ============================================================================

def create_readme():
    """Create README file for submission"""
    readme_content = f"""
# Military Object Detection - Submission Package

## Overview
This package contains the complete code, trained models, and results for the military object detection project using YOLOv8.

**Generated:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

## Contents

### üìÑ Code Files
- `train.py` - Main training script with YOLOv8
- `validation.py` - Comprehensive validation script
- `inference.py` - Inference script for test set predictions
- `create_submission.py` - This packaging script
- `corrected_data.yaml` - Dataset configuration

### üéØ Trained Models
- `train/weights/best.pt` - Best model based on validation mAP
- `train/weights/last.pt` - Last epoch checkpoint

### üìä Results
- `training_summary.json` - Training metrics and configuration
- `train/results.csv` - Epoch-by-epoch training metrics
- `train/results.png` - Training curves visualization
- `train/confusion_matrix.png` - Confusion matrix (unnormalized)
- `train/confusion_matrix_normalized.png` - Normalized confusion matrix

### ‚úÖ Validation Results
- `validation_results/validation_results.json` - Detailed validation metrics
- `validation_results/validation_summary.txt` - Human-readable summary
- Per-class performance metrics

### üîç Inference Results
- `inference_results/predictions.json` - All predictions in JSON format
- `inference_results/predictions.csv` - All predictions in CSV format
- `inference_results/labels/` - YOLO format label files
- `inference_results/detection_statistics.json` - Detection statistics
- `inference_results/inference_summary.txt` - Inference summary
- `inference_results/visualizations/` - Annotated test images (if included)

### üìà Visualizations
- Sample validation batch predictions
- Training and validation curves
- Confusion matrices

## Model Performance

Please refer to:
- `training_summary.json` for overall training metrics
- `validation_results/validation_summary.txt` for detailed validation performance
- `inference_results/inference_summary.txt` for test set statistics

## Usage

### Training
```bash
python train.py
```

### Validation
```bash
python validation.py
```

### Inference
```bash
python inference.py
```

### Create Submission Package
```bash
python create_submission.py
```

## Requirements
- Python 3.8+
- PyTorch
- Ultralytics YOLOv8
- OpenCV
- NumPy
- Pandas
- PyYAML

## Model Architecture
- **Base Model:** YOLOv8m (Medium)
- **Input Size:** 640x640
- **Classes:** 12 military object categories

## Notes
- All paths in scripts are configured for Kaggle environment
- Adjust paths if running in different environment
- Models are trained with strong data augmentation
- Early stopping enabled to prevent overfitting

## Contact
Centre of Excellence - AI Lab
Department of Computer Science & Engineering

---
*Generated automatically by create_submission.py*
"""
    
    readme_path = f'{WORKING_DIR}/README.md'
    with open(readme_path, 'w') as f:
        f.write(readme_content)
    
    return readme_path

# ============================================================================
# MAIN EXECUTION
# ============================================================================

print("=" * 80)
print("CREATING SUBMISSION PACKAGE")
print("=" * 80)
print(f"Working directory: {WORKING_DIR}")
print(f"Output ZIP: {OUTPUT_ZIP}")
print("=" * 80)

# Change to working directory
os.chdir(WORKING_DIR)

# Create README
print("\nüìù Creating README...")
readme_path = create_readme()
print(f"‚úì README created: {readme_path}")

# Create manifest
manifest = {
    'created': datetime.now().isoformat(),
    'description': 'Military Object Detection Submission Package',
    'included_files': {},
    'statistics': {}
}

# Create ZIP file
print(f"\nüì¶ Creating ZIP file: {OUTPUT_ZIP}")
print("=" * 80)

included_count = 0
skipped_count = 0
total_size = 0

with zipfile.ZipFile(OUTPUT_ZIP, 'w', zipfile.ZIP_DEFLATED, compresslevel=9) as zipf:
    
    # Add README first
    print("\nüìÑ Adding README...")
    zipf.write(readme_path, 'README.md')
    included_count += 1
    
    # Process code files
    print("\nüíª Adding Code Files:")
    code_added = []
    for item in INCLUDE_ITEMS['code']:
        if Path(item).exists():
            if add_to_zip(zipf, item, f'code/{item}'):
                print(f"  ‚úì {item}")
                code_added.append(item)
                included_count += 1
            else:
                print(f"  ‚úó {item} (failed)")
                skipped_count += 1
        else:
            print(f"  ‚äó {item} (not found)")
            skipped_count += 1
    
    manifest['included_files']['code'] = code_added
    
    # Process results
    print("\nüìä Adding Results:")
    results_added = []
    for item in INCLUDE_ITEMS['results']:
        if Path(item).exists():
            if add_to_zip(zipf, item, f'results/{Path(item).name}'):
                size = Path(item).stat().st_size if Path(item).is_file() else get_dir_size(item)
                print(f"  ‚úì {item} ({size/1e6:.2f} MB)")
                results_added.append(item)
                included_count += 1
                total_size += size
            else:
                print(f"  ‚úó {item} (failed)")
                skipped_count += 1
        else:
            print(f"  ‚äó {item} (not found)")
            skipped_count += 1
    
    manifest['included_files']['results'] = results_added
    
    # Process visualizations (optional)
    print("\nüñºÔ∏è  Adding Visualizations:")
    viz_added = []
    for item in INCLUDE_ITEMS['visualizations']:
        if Path(item).exists():
            if add_to_zip(zipf, item, f'visualizations/{Path(item).name}'):
                print(f"  ‚úì {item}")
                viz_added.append(item)
                included_count += 1
            else:
                print(f"  ‚úó {item} (failed)")
                skipped_count += 1
        else:
            print(f"  ‚äó {item} (optional, not found)")
    
    manifest['included_files']['visualizations'] = viz_added
    
    # Add manifest
    manifest['statistics'] = {
        'files_included': included_count,
        'files_skipped': skipped_count,
        'total_size_mb': total_size / 1e6
    }
    
    manifest_path = 'manifest.json'
    with open(manifest_path, 'w') as f:
        json.dump(manifest, f, indent=2)
    zipf.write(manifest_path, 'manifest.json')
    print(f"\nüìã Manifest added: manifest.json")

print("\n" + "=" * 80)

# Get final ZIP size
zip_size = Path(OUTPUT_ZIP).stat().st_size / (1024 * 1024)  # MB

# ============================================================================
# SUMMARY
# ============================================================================

print("\n" + "=" * 80)
print("SUBMISSION PACKAGE CREATED SUCCESSFULLY!")
print("=" * 80)
print(f"\nüì¶ Package Details:")
print(f"  Location: {OUTPUT_ZIP}")
print(f"  Size: {zip_size:.2f} MB")
print(f"  Files included: {included_count}")
print(f"  Files skipped: {skipped_count}")

print(f"\nüìÇ Package Contents:")
print(f"  ‚úì README.md - Complete documentation")
print(f"  ‚úì manifest.json - File listing and metadata")
print(f"  ‚úì code/ - All Python scripts and configuration")
print(f"  ‚úì results/ - Training, validation, and inference results")
print(f"  ‚úì visualizations/ - Sample predictions and plots")

print(f"\nüíæ Storage:")
print(f"  Compressed: {zip_size:.2f} MB")
print(f"  Original: {total_size/1e6:.2f} MB")
print(f"  Compression ratio: {(1 - zip_size/(total_size/1e6))*100:.1f}%")

print("\n" + "=" * 80)
print("‚úÖ Package ready for submission!")
print(f"üì• Download: {OUTPUT_ZIP}")
print("=" * 80)

# Print warning if model files are missing
if not Path('train/weights/best.pt').exists():
    print("\n‚ö†Ô∏è  WARNING: Best model weights not found!")
    print("   Make sure training completed successfully before creating submission.")

if skipped_count > 0:
    print(f"\n‚ö†Ô∏è  Note: {skipped_count} files were skipped (not found or failed to add)")
    print("   This is normal if you haven't run all scripts yet.")

print("\n" + "=" * 80)