<a href="https://colab.research.google.com/github/akashsiddharth1/Military_objects_detection_YOLO/blob/main/militaryobject_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Weapon Detection Using YOLO and Computer Vision
This notebook provides a complete code workflow—with clear comments and explanations—for weapon detection using the latest YOLO model, covering data analysis, model training, real-time deployment, and a Streamlit web app.

In [None]:
%matplotlib inline


In [None]:
!pip install ultralytics

In [None]:
!pip install opencv-python

In [None]:
!pip install pillow albumentations

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from ultralytics import YOLO
import time
from datetime import datetime
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
import json
from PIL import Image, ImageEnhance, ImageStat,  UnidentifiedImageError
import yaml
from collections import defaultdict, Counter
import warnings
warnings.filterwarnings('ignore')

In [None]:
from skimage.io import imread
from skimage.filters.rank import entropy
from skimage.morphology import disk
from skimage.color import rgb2gra
from tqdm import tqdm

In [None]:
from pathlib import Path
from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import albumentations as A
from albumentations.pytorch import ToTensorV2
import tempfile
import streamlit as st

# Data Collection and Preparation
**a. Directory Structure**

In [None]:
# Base dataset directory
dataset_path = Path('/content/drive/MyDrive/Files/military_object_dataset')

# Paths for each subset
train_images = dataset_path / 'train' / 'images'
train_labels = dataset_path / 'train' / 'labels'

val_images = dataset_path / 'val' / 'images'
val_labels = dataset_path / 'val' / 'labels'

test_images = dataset_path / 'test' / 'images'
test_labels = dataset_path / 'test' / 'labels'

In [None]:
# Check dataset integrity: images and labels count

def count_images_labels(base_path):
    subsets = ['train', 'val', 'test']
    for subset in subsets:
        images_dir = base_path / subset / 'images'
        labels_dir = base_path / subset / 'labels'

        image_files = [f for f in images_dir.iterdir() if f.suffix in ['.jpg', '.png']]
        label_files = [f for f in labels_dir.iterdir() if f.suffix == '.txt']

        print(f'--- {subset.upper()} ---')
        print(f'Images Count: {len(image_files)}')
        print(f'Labels Count: {len(label_files)}')

# Example Usage:
dataset_path = Path('/content/drive/MyDrive/Files/military_object_dataset')
count_images_labels(dataset_path)


In [None]:
# Function to verify each image has a corresponding label file

def verify_images_have_labels(base_path):
    subsets = ['train', 'val', 'test']
    for subset in subsets:
        images_dir = base_path / subset / 'images'
        labels_dir = base_path / subset / 'labels'

        # Collect image and label filenames (without extension)
        image_stems = {f.stem for f in images_dir.iterdir() if f.suffix in ['.jpg', '.png']}
        label_stems = {f.stem for f in labels_dir.iterdir() if f.suffix == '.txt'}

        # Identify images without corresponding labels
        missing_labels = image_stems - label_stems

        print(f'\n--- {subset.upper()} ---')
        if missing_labels:
            print(f'Images missing labels: {missing_labels}')
        else:
            print('✅ All images have corresponding label files.')

# Example usage:
dataset_path = Path('/content/drive/MyDrive/Files/military_object_dataset')
verify_images_have_labels(dataset_path)


# Resize Images to Uniform Size

In [None]:

subsets = ['train', 'val', 'test']
target_size = (640, 640)

for subset in subsets:
        images_dir = dataset_path / subset / 'images'
        for img_file in images_dir.iterdir():
            if img_file.suffix in ['.jpg', '.jpeg' '.png']:
                try:
                    img = Image.open(img_file)
                    img = img.resize(target_size)
                    # Convert to RGB before saving as JPEG to handle potential RGBA images
                    if img.mode == 'RGBA':
                        img = img.convert('RGB')
                    img.save(img_file)
                except Exception as e:
                    print(f"Could not process image {img_file}: {e}")

# **Find corrupt files(images)**

In [None]:
def find_corrupt_images(base_path):
    subsets = ['train', 'val', 'test']
    for subset in subsets:
        images_dir = base_path / subset / 'images'
        for img_file in images_dir.iterdir():
            if img_file.suffix in ['.jpg', '.png']:
                try:
                    img = Image.open(img_file)
                    img.verify()
                except (UnidentifiedImageError, OSError):
                    print(f"Corrupted image: {img_file}")

# Usage
find_corrupt_images(Path('/content/drive/MyDrive/Files/military_object_dataset'))


# EDA
Image Analysis

In [None]:
# Function to collect image info

def analyze_images(base_path):
    image_info = []

    subsets = ['train', 'val', 'test']
    for subset in subsets:
        images_dir = base_path / subset / 'images'
        for img_file in images_dir.iterdir():
            if img_file.suffix in ['.jpg', '.png']:
                try:
                    img = Image.open(img_file)
                    width, height = img.size
                    aspect_ratio = width / height
                    stat = ImageStat.Stat(img.convert('L'))
                    brightness = stat.mean[0]
                    image_info.append({
                        'subset': subset,
                        'filename': img_file.name,
                        'width': width,
                        'height': height,
                        'aspect_ratio': aspect_ratio,
                        'brightness': brightness
                    })
                except Exception as e:
                    print(f"Error processing {img_file}: {e}")

    return image_info


# Usage
dataset_path = Path('/content/drive/MyDrive/Files/military_object_dataset')
image_data = analyze_images(dataset_path)

# Image Size and Resolution Distribution

In [None]:
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.hist(df['width'], bins=20, color='skyblue')
plt.title('Image Width Distribution')
plt.subplot(1,2,2)
plt.hist(df['height'], bins=20, color='salmon')
plt.title('Image Height Distribution')
plt.show()

# Aspect Ratio Distribution

In [None]:
plt.hist(df['aspect_ratio'], bins=20, color='green')
plt.title('Image Aspect Ratio Distribution (W/H)')
plt.xlabel('Aspect Ratio')
plt.ylabel('Count')
plt.show()


# Check for Blurry Images

In [None]:
def detect_blurry_images(base_dir, threshold=100.0):
    blurry_images = []

    subsets = ['train', 'val', 'test']
    for subset in subsets:
        images_dir = base_dir / subset / 'images'
        for img_file in images_dir.iterdir():
            if img_file.suffix in ['.jpg', '.png']:
                img = cv2.imread(str(img_file), cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    variance = cv2.Laplacian(img, cv2.CV_64F).var()
                    if variance < threshold:
                        blurry_images.append({
                            'subset': subset,
                            'filename': img_file.name,
                            'blurriness': variance
                        })

    return blurry_images


# Usage
dataset_path = Path('/content/drive/MyDrive/Files/military_object_dataset')
blurry_images = detect_blurry_images(dataset_path)
print("\nPotential Blurry Images:")
for item in blurry_images:
    print(item)

# Check for Underexposed / Overexposed Images

In [None]:
def detect_exposure_issues(base_dir, dark_threshold=30, bright_threshold=220):
    underexposed = []
    overexposed = []

    subsets = ['train', 'val', 'test']
    for subset in subsets:
        images_dir = base_dir / subset / 'images'
        for img_file in images_dir.iterdir():
            if img_file.suffix in ['.jpg', '.png']:
                img = Image.open(img_file).convert('L')  # Grayscale
                stat = ImageStat.Stat(img)
                brightness = stat.mean[0]

                if brightness < dark_threshold:
                    underexposed.append((subset, img_file.name, brightness))
                elif brightness > bright_threshold:
                    overexposed.append((subset, img_file.name, brightness))

    return underexposed, overexposed


underexposed, overexposed = detect_exposure_issues(dataset_path)

print("\nUnderexposed Images:")
for item in underexposed:
    print(item)

print("\nOverexposed Images:")
for item in overexposed:
    print(item)

# Check for Noise

In [None]:
def detect_noisy_images(base_dir, entropy_threshold=7.0):
    noisy_images = []

    subsets = ['train', 'val', 'test']
    for subset in subsets:
        images_dir = base_dir / subset / 'images'
        for img_file in images_dir.iterdir():
            if img_file.suffix in ['.jpg', '.png']:
                img = imread(str(img_file))
                if img.ndim == 3:
                    # Check if the image has an alpha channel (4 channels)
                    if img.shape[-1] == 4:
                        img = img[..., :3]  # Remove alpha channel
                    img = rgb2gray(img)
                img = (img * 255).astype(np.uint8)
                entropy_img = entropy(img, disk(5))
                mean_entropy = entropy_img.mean()

                if mean_entropy > entropy_threshold:
                    noisy_images.append({
                        'subset': subset,
                        'filename': img_file.name,
                        'entropy': mean_entropy
                    })

    return noisy_images


noisy_images = detect_noisy_images(dataset_path)
print("\nPotential Noisy Images (High Entropy):")
for item in noisy_images:
    print(item)

# Image Size, Aspect Ratio, Quality (EDA)

# 2. Annotation Analysis:
a. Number of Annotations per Image

In [None]:
records = []

for subset in subsets:
    labels_dir = dataset_path / subset / 'labels'
    images_dir = dataset_path / subset / 'images'

    for label_file in labels_dir.glob('*.txt'):
        img_file = images_dir / (label_file.stem + '.jpg')
        if not img_file.exists():
            img_file = images_dir / (label_file.stem + '.png')
        if not img_file.exists():
            continue

        try:
            img = Image.open(img_file)
            img_width, img_height = img.size
        except UnidentifiedImageError:
            print(f"Skipping corrupted image: {img_file}")
            continue
        except Exception as e:
            print(f"Error loading {img_file}: {e}")
            continue

        with open(label_file, 'r') as f:
            lines = f.readlines()

        for line in lines:
            parts = line.strip().split()
            if len(parts) >= 5:
                try:
                    # Safely attempt float conversion
                    class_id, cx, cy, w, h = map(float, parts[:5])
                except ValueError:
                    print(f"Skipping invalid label line in {label_file}: {line.strip()}")
                    continue

                box_width = w * img_width
                box_height = h * img_height
                aspect_ratio = box_width / box_height if box_height != 0 else 0

                records.append({
                    'subset': subset,
                    'filename': label_file.stem,
                    'class_id': class_id,
                    'box_width': box_width,
                    'box_height': box_height,
                    'aspect_ratio': aspect_ratio
                })

df_annotations = pd.DataFrame(records)
df_annotations.head()


In [None]:
# Class Frequency & Co-occurrence

sns.countplot(data=df_annotations, x='class_id', hue='subset')
plt.title('Class Distribution per Split')
plt.show()

In [None]:
# Bounding box size distribution
plt.figure(figsize=(10, 4))
sns.histplot(df_annotations['box_width'], color='blue', label='Width', kde=True)
sns.histplot(df_annotations['box_height'], color='orange', label='Height', kde=True
plt.legend()
plt.title('Bounding Box Width & Height Distribution (Pixels)')
plt.show()


In [None]:
# Percentage size relative to standard YOLO image size (assuming 640x640)
df_annotations['area_percent'] = (df_annotations['box_width'] * df_annotations['box_height']) / (640*640) * 100

plt.figure(figsize=(8, 4))
sns.histplot(df_annotations['area_percent'], kde=True)
plt.title('Bounding Box Area as % of Image (640x640)')
plt.xlabel('Bounding Box Area % of Image')
plt.show()


In [None]:
# Bounding box aspect ratio

plt.figure(figsize=(8, 4))
sns.histplot(df_annotations['aspect_ratio'], kde=True)
plt.title('Bounding Box Aspect Ratio Distribution (Width / Height)')
plt.xlabel('Aspect Ratio')
plt.show()


# Class Distribution analysis

# Class frequency

In [None]:
# Define dataset path
dataset_path = Path('./dataset')
subsets = ['train', 'val', 'test']

# Count class IDs from label files
class_counts = Counter()

for subset in subsets:
    labels_dir = dataset_path / subset / 'labels'
    label_files = list(labels_dir.glob('*.txt'))

    for label_file in label_files:
        with open(label_file, 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) >= 5:
                    try:
                        class_id = int(float(parts[0]))  # Ensure it's an int
                        class_counts[class_id] += 1
                    except ValueError:
                        print(f"Skipping invalid line in {label_file}: {line.strip()}")
                        continue

# Class names as per your mapping
class_names = {
    0: 'camouflage_soldier',
    1: 'weapon',
    2: 'military_tank',
    3: 'military_truck',
    4: 'military_vehicle',
    5: 'civilian',
    6: 'soldier',
    7: 'civilian_vehicle',
    8: 'trench'
}

# Convert to DataFrame for visualization
class_data = pd.DataFrame([
    {'class_id': class_id, 'class_name': class_names[class_id], 'count': count}
    for class_id, count in sorted(class_counts.items())
])

print(class_data)

# Plotting (Optional)
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.bar(class_data['class_name'], class_data['count'], color='skyblue')
plt.xticks(rotation=45)
plt.ylabel('Number of Annotations')
plt.title('Class Distribution in Dataset')
plt.show()


# CO - occurrence

In [None]:
# Paths
dataset_path = Path('./dataset')
subsets = ['train', 'val', 'test']

# Initialize dictionary to store sets of classes per image
image_classes = defaultdict(set)

for subset in subsets:
    labels_dir = dataset_path / subset / 'labels'
    label_files = list(labels_dir.glob('*.txt'))

    for label_file in label_files:
        with open(label_file, 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) >= 5:
                    try:
                        class_id = int(float(parts[0]))
                        image_classes[label_file.name].add(class_id)
                    except ValueError:
                        continue

# Build co-occurrence matrix
num_classes = 9
co_occurrence = np.zeros((num_classes, num_classes), dtype=int)

for classes_in_img in image_classes.values():
    classes_in_img = list(classes_in_img)
    for i in range(len(classes_in_img)):
        for j in range(len(classes_in_img)):
            co_occurrence[classes_in_img[i], classes_in_img[j]] += 1

# Class names as per your mapping
class_names = [
    'camouflage_soldier', 'weapon', 'military_tank', 'military_truck',
    'military_vehicle', 'civilian', 'soldier', 'civilian_vehicle', 'trench'
]

# Convert to DataFrame for heatmap
df_co_occurrence = pd.DataFrame(co_occurrence, index=class_names, columns=class_names)

# 🔥 Plotting heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df_co_occurrence, annot=True, fmt='d', cmap='Blues')
plt.title('Class Co-occurrence Heatmap')
plt.ylabel('Class')
plt.xlabel('Class')
plt.show()


# **Visualization**

In [None]:
import random

# Class names (as per your dataset)
class_names = [
    'camouflage_soldier', 'weapon', 'military_tank', 'military_truck',
    'military_vehicle', 'civilian', 'soldier', 'civilian_vehicle', 'trench'
]

# Dataset path
dataset_path = Path('./dataset')
subset = 'train'  # Change to 'val' or 'test' if needed

images_dir = dataset_path / subset / 'images'
labels_dir = dataset_path / subset / 'labels'

# Pick random sample images
img_files = list(images_dir.glob('*.jpg')) + list(images_dir.glob('*.png'))
sample_files = random.sample(img_files, min(5, len(img_files)))  # Visualize 5 samples

# Plot images with annotations
for img_file in sample_files:
    img = cv2.imread(str(img_file))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h, w = img.shape[:2]

    label_file = labels_dir / (img_file.stem + '.txt')
    if label_file.exists():
        with open(label_file, 'r') as f:
            for line in f.readlines():
                class_id, cx, cy, bw, bh = map(float, line.split())
                x_center, y_center = cx * w, cy * h
                box_w, box_h = bw * w, bh * h
                x1 = int(x_center - box_w / 2)
                y1 = int(y_center - box_h / 2)
                x2 = int(x_center + box_w / 2)
                y2 = int(y_center + box_h / 2)

                class_name = class_names[int(class_id)]
                color = (255, 0, 0)

                cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
                cv2.putText(img, class_name, (x1, max(y1 - 10, 0)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

    plt.figure(figsize=(8, 8))
    plt.imshow(img)
    plt.title(f'{subset.upper()}: {img_file.name}')
    plt.axis('off')
    plt.show()


# Heatmap

In [None]:
# Dataset path and subset
dataset_path = Path('./dataset')
subsets = ['train', 'val', 'test']

# Heatmap canvas size (match image size if fixed, else average like 640x640)
canvas_w, canvas_h = 640, 640
heatmap = np.zeros((canvas_h, canvas_w))

# Collect bounding box centers across all subsets
for subset in subsets:
    images_dir = dataset_path / subset / 'images'
    labels_dir = dataset_path / subset / 'labels'
    img_files = list(images_dir.glob('*.jpg')) + list(images_dir.glob('*.png'))

    for img_file in tqdm(img_files, desc=f'Processing {subset}'):
        img = cv2.imread(str(img_file))
        if img is None:
            continue
        h, w = img.shape[:2]
        label_file = labels_dir / (img_file.stem + '.txt')
        if label_file.exists():
            with open(label_file, 'r') as f:
                for line in f.readlines():
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        class_id, cx, cy, bw, bh = map(float, parts[:5])
                        x_center = int(cx * canvas_w)
                        y_center = int(cy * canvas_h)
                        # Increase heatmap density at this point
                        if 0 <= x_center < canvas_w and 0 <= y_center < canvas_h:
                            heatmap[y_center, x_center] += 1

# Apply Gaussian blur for smoother heatmap visualization
heatmap = cv2.GaussianBlur(heatmap, (0, 0), sigmaX=10, sigmaY=10)

# Plot the heatmap
plt.figure(figsize=(8, 8))
plt.imshow(heatmap, cmap='hot', interpolation='nearest')
plt.title('Object Density Heatmap Across Dataset')
plt.colorbar(label='Density of Objects')
plt.show()


# Bounding Box Size / Area Per Class


In [None]:
# Dataset path and subsets
dataset_path = Path('./dataset')
subsets = ['train', 'val', 'test']

# Store records of bounding box widths and heights (in pixels)
records = []

for subset in subsets:
    labels_dir = dataset_path / subset / 'labels'
    images_dir = dataset_path / subset / 'images'

    for label_file in labels_dir.glob('*.txt'):
        img_file = images_dir / (label_file.stem + '.jpg')
        if not img_file.exists():
            img_file = images_dir / (label_file.stem + '.png')
        if not img_file.exists():
            continue
        img = cv2.imread(str(img_file))
        if img is None:
            continue
        img_h, img_w = img.shape[:2]

        with open(label_file, 'r') as f:
            lines = f.readlines()

        for line in lines:
            parts = line.strip().split()
            if len(parts) >= 5:
                class_id, cx, cy, bw, bh = map(float, parts[:5])
                width_pixels = bw * img_w
                height_pixels = bh * img_h
                area_pixels = width_pixels * height_pixels
                records.append({
                    'subset': subset,
                    'class_id': int(class_id),
                    'width': width_pixels,
                    'height': height_pixels,
                    'area': area_pixels
                })

df_boxes = pd.DataFrame(records)


# Plot Boxplots of Bounding Box Areas Per Class

In [None]:
class_names = [
    'camouflage_soldier', 'weapon', 'military_tank', 'military_truck',
    'military_vehicle', 'civilian', 'soldier', 'civilian_vehicle', 'trench'
]
df_boxes['class_name'] = df_boxes['class_id'].apply(lambda x: class_names[x])

plt.figure(figsize=(12, 6))
df_boxes.boxplot(column='area', by='class_name', grid=False, rot=45)
plt.title('Bounding Box Area Distribution per Class')
plt.ylabel('Area (pixels)')
plt.xlabel('Class Name')
plt.suptitle('')
plt.show()


# Plot Histograms of Widths & Heights Per Class

In [None]:
import seaborn as sns

plt.figure(figsize=(12, 6))
sns.histplot(data=df_boxes, x='width', hue='class_name', bins=50, element='step', stat='density')
plt.title('Bounding Box Width Distribution by Class')
plt.show()

plt.figure(figsize=(12, 6))
sns.histplot(data=df_boxes, x='height', hue='class_name', bins=50, element='step', stat='density')
plt.title('Bounding Box Height Distribution by Class')
plt.show()


# **DATASET SPLITS**

In [None]:

dataset_path = Path('/content/drive/MyDrive/Files/military_object_dataset')
subsets = ['train', 'val', 'test']

split_data = []

for subset in subsets:
    images_dir = dataset_path / subset / 'images'
    labels_dir = dataset_path / subset / 'labels'

    images = list(images_dir.glob('*.jpg')) + list(images_dir.glob('*.png'))
    labels = list(labels_dir.glob('*.txt'))

    annotation_count = 0
    class_ids = []

    for label_file in labels:
        with open(label_file, 'r') as f:
            lines = f.readlines()
        annotation_count += len(lines)
        for line in lines:
            parts = line.strip().split()
            if len(parts) >= 1:
                class_ids.append(int(parts[0]))

    split_data.append({
        'Subset': subset,
        'Images': len(images),
        'Annotations': annotation_count,
        'Unique Classes': sorted(set(class_ids)),
        'Total Unique Classes': len(set(class_ids))
    })

df_splits = pd.DataFrame(split_data)
df_splits

In [None]:
# visulaize split data
sns.barplot(data=df_splits, x='Subset', y='Images')
plt.title('Number of Images in Each Split')
plt.show()

sns.barplot(data=df_splits, x='Subset', y='Annotations')
plt.title('Number of Annotations in Each Split')
plt.show()

# Visualize Class Distribution Across Splits

In [None]:
sns.countplot(data=df_annotations, x='class_id', hue='subset')
plt.title('Class Distribution per Split')
plt.show()

# Check for Data Leakage (Same Images in Multiple Splits)

In [None]:
subset_files = {subset: set() for subset in subsets}

for subset in subsets:
    images_dir = dataset_path / subset / 'images'
    img_files = list(images_dir.glob('*.jpg')) + list(images_dir.glob('*.png'))
    subset_files[subset] = {img_file.stem for img_file in img_files}

train_val_overlap = subset_files['train'] & subset_files['val']
train_test_overlap = subset_files['train'] & subset_files['test']
val_test_overlap = subset_files['val'] & subset_files['test']

print('Train-Val Overlap:', len(train_val_overlap), train_val_overlap)
print('Train-Test Overlap:', len(train_test_overlap), train_test_overlap)
print('Val-Test Overlap:', len(val_test_overlap), val_test_overlap)


# Data Augmentation Analysis

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import matplotlib.pyplot as plt
import random
from pathlib import Path

# Sample transform (safe for bounding boxes)
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.5),
    A.MotionBlur(p=0.2),
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

# Sample image and labels
dataset_path = Path('./dataset/train')
images_dir = dataset_path / 'images'
labels_dir = dataset_path / 'labels'

img_files = list(images_dir.glob('*.jpg')) + list(images_dir.glob('*.png'))
img_file = random.choice(img_files)
label_file = labels_dir / (img_file.stem + '.txt')

img = cv2.imread(str(img_file))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
h, w = img.shape[:2]

# Load YOLO annotations
bboxes = []
labels = []
with open(label_file, 'r') as f:
    for line in f.readlines():
        parts = list(map(float, line.strip().split()))
        labels.append(int(parts[0]))
        bboxes.append(parts[1:])

# Apply augmentation
augmented = transform(image=img, bboxes=bboxes, class_labels=labels)
aug_img = augmented['image']
aug_bboxes = augmented['bboxes']
aug_labels = augmented['class_labels']

# Visualize result
def draw_boxes(image, bboxes, labels, color=(255, 0, 0)):
    img_copy = image.copy()
    for (cx, cy, bw, bh), cls in zip(bboxes, labels):
        x1 = int((cx - bw / 2) * w)
        y1 = int((cy - bh / 2) * h)
        x2 = int((cx + bw / 2) * w)
        y2 = int((cy + bh / 2) * h)
        cv2.rectangle(img_copy, (x1, y1), (x2, y2), color, 2)
        cv2.putText(img_copy, f'{cls}', (x1, max(y1 - 10, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
    return img_copy

fig, axes = plt.subplots(1, 2, figsize=(16, 8))
axes[0].imshow(draw_boxes(img, bboxes, labels))
axes[0].set_title('Original')
axes[1].imshow(draw_boxes(aug_img, aug_bboxes, aug_labels, color=(0, 255, 0)))
axes[1].set_title('Augmented')
for ax in axes: ax.axis('off')
plt.show()


# Challenges and Insights
Detecting small objects (like weapons) is a common challenge in object detection because:

Small bounding boxes contain less information

In [None]:
small_threshold = 0.02  # Define < 2% of image area as small

# Assume images are roughly 640x640
image_area = 640 * 640

df_boxes['is_small'] = df_boxes['area'] / image_area < small_threshold

# Count small vs. large objects per class
small_objects_count = df_boxes.groupby(['class_name', 'is_small']).size().unstack(fill_value=0)

# Plot percentage of small objects per class
small_objects_percent = (small_objects_count[True] / (small_objects_count[True] + small_objects_count[False])) * 100
small_objects_percent = small_objects_percent.sort_values(ascending=False)

plt.figure(figsize=(10, 6))
small_objects_percent.plot(kind='bar', color='orange')
plt.ylabel('% of Small Objects (<2% area)')
plt.title('Percentage of Small Objects by Class')
plt.show()


# Class Imbalance

In [None]:
transform = A.Compose([
    A.HorizontalFlip(p=1.0),
    A.RandomBrightnessContrast(p=0.5),
    A.RandomScale(scale_limit=0.3, p=1.0),
    A.Rotate(limit=15, p=0.7)
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))


# Annotation Quality

In [None]:
# Identify images without label files
missing_labels = []

for subset in ['train', 'val', 'test']:
    images_dir = dataset_path / subset / 'images'
    labels_dir = dataset_path / subset / 'labels'
    img_files = list(images_dir.glob('*.jpg')) + list(images_dir.glob('*.png'))

    for img_file in img_files:
        label_file = labels_dir / (img_file.stem + '.txt')
        if not label_file.exists():
            missing_labels.append(img_file.name)

print(f'Missing labels: {missing_labels}')


In [None]:
# Sanity check each label line is in YOLO format
errors = []

for subset in ['train', 'val', 'test']:
    labels_dir = dataset_path / subset / 'labels'
    for label_file in labels_dir.glob('*.txt'):
        with open(label_file, 'r') as f:
            for idx, line in enumerate(f):
                parts = line.strip().split()
                if len(parts) != 5:
                    errors.append(f'{label_file} line {idx+1}: Invalid format')

if errors:
    for e in errors: print(e)
else:
    print('All label files are properly formatted.')


# TRAIN MODEL

In [None]:
yaml_content = """
path: /content/drive/MyDrive/Files/military_object_dataset
train: train/images
val: val/images
test: test/images
nc: 9
names: [camouflage_soldier, weapon, military_tank, military_truck, military_vehicle, civilian, soldier, civilian_vehicle, trench]
"""

with open('data.yaml', 'w') as f:
    f.write(yaml_content)


In [None]:
import torch
print(torch.cuda.is_available())

In [None]:
model = YOLO('yolov8n.pt')  # You can use yolov8n.pt, yolov8s.pt, etc. (n=Nano, s=Small)

model.train(
    data='data.yaml',
    epochs=5,
    imgsz=416,
    batch=16,
    device='cuda'
)

# REAL TIME OBJECT DETECTION
Inference on Single Images

In [None]:
# Load your trained YOLOv8 model (best.pt after training)
model = YOLO('/content/drive/MyDrive/Files/military_object_dataset/best.pt')

# Inference on a single image
results = model.predict(source='000003.jpg', save=True, conf=0.5)

# Visualize the result in Colab
# The results are saved to runs/detect/predictX where X is an incrementing number
# We need to find the latest predict directory.
import glob
latest_predict_dir = max(glob.glob('runs/detect/predict*'), key=os.path.getctime)
predicted_image_path = os.path.join(latest_predict_dir, '000003.jpg') # assuming the image name remains the same

img = cv2.imread(predicted_image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.axis('off')
plt.show()

In [None]:
# Inference on a video file
model.predict(source='your_video.mp4', save=True, conf=0.5)

# Classify Classes as Threat / Non-Threat

In [None]:
# Map class indices to names (YOLO class IDs)
CLASS_NAMES = {
    0: 'camouflage_soldier',
    1: 'weapon',
    2: 'military_tank',
    3: 'military_truck',
    4: 'military_vehicle',
    5: 'civilian',
    6: 'soldier',
    7: 'civilian_vehicle',
    8: 'trench',
    9: 'person',
    10: 'wheeled_vehicle',
    11: 'tracked_vehicle'
}

# Define Threats and Non-Threats
THREAT_CLASSES = {0, 1, 2, 3, 4, 9, 10, 11}      # IDs for threats
NON_THREAT_CLASSES = {5, 6, 7, 8}     # IDs for non-threats

def classify_threat(class_id):
    if class_id in THREAT_CLASSES:
        return 'Threat'
    elif class_id in NON_THREAT_CLASSES:
        return 'Non-Threat'
    else:
        return 'Unknown'

model = YOLO('/content/drive/MyDrive/Files/military_object_dataset/best.pt')
results = model.predict(source='/content/drive/MyDrive/Files/military_object_dataset/test/images/006010.jpg', conf=0.5, save=True)

# Find the latest predict directory
import glob
latest_predict_dir = max(glob.glob('runs/detect/predict*'), key=os.path.getctime)
predicted_image_path = os.path.join(latest_predict_dir, '006010.jpg')

img = cv2.imread(predicted_image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

for r in results:
    for box in r.boxes:
        class_id = int(box.cls[0])
        label = CLASS_NAMES[class_id]
        threat_status = classify_threat(class_id)
        color = (255, 0, 0) if threat_status == 'Threat' else (0, 255, 0)

        xyxy = box.xyxy[0].cpu().numpy().astype(int)
        cv2.rectangle(img, (xyxy[0], xyxy[1]), (xyxy[2], xyxy[3]), color, 2)
        cv2.putText(img, f"{label} ({threat_status})", (xyxy[0], xyxy[1] - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

plt.imshow(img)
plt.axis('off')
plt.show()

# Performance Evaluation:
a. Evaluate the model's performance using metrics like precision, recall, and mean average precision (mAP).


In [None]:
# Load your trained model
model = YOLO('/kaggle/working/runs/detect/train2/weights/best.pt')

# Evaluate on your validation or test set
metrics = model.val()

# This will automatically compute: Precision, Recall, mAP@0.5, mAP@0.5:0.95, etc.

In [None]:
#validation
metrics = results.box.map    # List of mAPs at different IoUs
print(f'mAP@0.5: {metrics[0]:.3f}')
print(f'mAP@0.5:0.95: {metrics[-1]:.3f}')
print(f'Precision: {results.box.precision:.3f}')
print(f'Recall: {results.box.recall:.3f}')
print(f'Inference Time per Image (ms): {results.speed['inference']:.3f}')


# STREAMLIT APP

In [None]:
# Load YOLO model
model = YOLO('best.pt')  # Replace with your trained YOLO model path

# Helper to convert detection results to threat / non-threat
def classify_threat(classes):
    threat_classes = ['weapon', 'enemy_soldier', 'military_tank', 'military_truck']
    if any(cls in classes for cls in threat_classes):
        return 'Threat Detected'
    else:
        return 'Non-Threat'

# Streamlit App UI
st.title("Weapon & Threat Detection System")
st.sidebar.header("Upload Media")

upload_option = st.sidebar.radio("Choose Input Type", ["Image", "Video"])

if upload_option == "Image":
    uploaded_file = st.sidebar.file_uploader("Upload an Image", type=['jpg', 'jpeg', 'png'])

    if uploaded_file:
        img = Image.open(uploaded_file)
        st.image(img, caption='Original Image', use_column_width=True)

        img_array = np.array(img.convert('RGB'))

        # YOLO Inference
        results = model.predict(img_array, conf=0.3)
        res_img = results[0].plot()

        # Extract detected classes
        detected_classes = [model.names[int(cls)] for cls in results[0].boxes.cls]

        # Display
        st.image(res_img, caption='Detection Result', use_column_width=True)
        st.write(f"**Classification:** {classify_threat(detected_classes)}")
        st.write(f"**Detected Classes:** {detected_classes}")

        # Download button
        res_pil = Image.fromarray(res_img)
        tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
        res_pil.save(tmp_file.name)
        st.download_button(label="Download Annotated Image", data=open(tmp_file.name, 'rb').read(), file_name='result.png', mime='image/png')

if upload_option == "Video":
    uploaded_video = st.sidebar.file_uploader("Upload a Video", type=['mp4', 'mov', 'avi'])
    if uploaded_video:
        tfile = tempfile.NamedTemporaryFile(delete=False)
        tfile.write(uploaded_video.read())
        cap = cv2.VideoCapture(tfile.name)

        stframe = st.empty()
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
        out = None

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            results = model.predict(frame, conf=0.3)
            res_frame = results[0].plot()

            if out is None:
                h, w = res_frame.shape[:2]
                out = cv2.VideoWriter(out_file.name, fourcc, 20.0, (w, h))

            out.write(res_frame)
            stframe.image(res_frame, channels="BGR")

        cap.release()
        out.release()

        st.success("Video processing completed.")
        st.download_button("Download Processed Video", open(out_file.name, 'rb').read(), file_name='result.mp4', mime='video/mp4')
