In [1]:
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117
!pip install pandas opencv-python matplotlib seaborn albumentations

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu117
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting

In [2]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory


In [3]:
!kaggle datasets download -d a2015003713/militaryaircraftdetectiondataset

Dataset URL: https://www.kaggle.com/datasets/a2015003713/militaryaircraftdetectiondataset
License(s): unknown
Downloading militaryaircraftdetectiondataset.zip to /content
100% 9.51G/9.52G [02:01<00:00, 81.4MB/s]
100% 9.52G/9.52G [02:01<00:00, 84.1MB/s]


In [4]:
!unzip militaryaircraftdetectiondataset.zip -d dataset

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: dataset/dataset/df5e269f25d0372689b4a12cb51ed562.jpg  
  inflating: dataset/dataset/df5f38964aae67210dc9cad88476985b.csv  
  inflating: dataset/dataset/df5f38964aae67210dc9cad88476985b.jpg  
  inflating: dataset/dataset/df669a9da7e9ec575e9f528a74ec80fd.csv  
  inflating: dataset/dataset/df669a9da7e9ec575e9f528a74ec80fd.jpg  
  inflating: dataset/dataset/df6f3d90d68172d816df993a354fb33c.csv  
  inflating: dataset/dataset/df6f3d90d68172d816df993a354fb33c.jpg  
  inflating: dataset/dataset/df77147bd2b008eca6324316904aa468.csv  
  inflating: dataset/dataset/df77147bd2b008eca6324316904aa468.jpg  
  inflating: dataset/dataset/df7acfc08352bffa2603e9f7bb49faa5.csv  
  inflating: dataset/dataset/df7acfc08352bffa2603e9f7bb49faa5.jpg  
  inflating: dataset/dataset/df7b362393a13c8e7711246a67eab19e.csv  
  inflating: dataset/dataset/df7b362393a13c8e7711246a67eab19e.jpg  
  inflating: dataset/dataset/df7f3061133b071c018351

In [5]:
import pandas as pd
import os
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import albumentations as A
from albumentations.pytorch import ToTensorV2
import yaml

In [15]:
config_path = 'config.yaml' # CONFIG FILE KE LIYE PATH
print(f"Loading configuration from: {config_path}")
try:
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f) # LOADING CONFIG FILE
        print("Configuration loaded successfully.")
except FileNotFoundError:
    print(f"Error: Configuration file not found at '{config_path}'. Using default settings.")
    config = { # DEFAULT CONFIGURATIONS
        'dataset_path': '/content/dataset',
        'model_save_path': 'models/best_model.pth',
        'checkpoint_path': 'models/checkpoint_epoch_{epoch}.pth',
        'detection_threshold': 0.8,
        'friendly_models': ['AH64', 'CH47', 'SH60'],
        'enemy_models': ['Mi28', 'Ka52', 'другие'],
        'num_epochs': 10,
        'batch_size': 4,
        'learning_rate': 0.005,
        'step_size_lr_scheduler': 3,
        'gamma_lr_scheduler': 0.1
    }

dataset_path = os.path.join(config['dataset_path'], 'dataset')
model_save_path = config['model_save_path']
checkpoint_path = config['checkpoint_path']
detection_threshold = config['detection_threshold']
friendly_models = config['friendly_models']
enemy_models = config['enemy_models']
num_epochs = config['num_epochs']
batch_size = config['batch_size']
learning_rate = config['learning_rate']
step_size_lr_scheduler = config['step_size_lr_scheduler']
gamma_lr_scheduler = config['gamma_lr_scheduler']

# Combine all CSV files
all_csv_files = [f for f in os.listdir(dataset_path) if f.endswith('.csv')]
dfs = [pd.read_csv(os.path.join(dataset_path, csv_file)) for csv_file in all_csv_files]
combined_df = pd.concat(dfs, ignore_index=True)

# Add .jpg extension to filenames
combined_df['filename'] = combined_df['filename'] + '.jpg'

# Check for missing images and log them to a file
missing_images = []
for img_file in combined_df['filename']:
    if not os.path.exists(os.path.join(dataset_path, img_file)):
        missing_images.append(img_file)

if missing_images:
    print(f"Warning: {len(missing_images)} images missing from dataset")
    with open('missing_images.txt', 'w') as f:
        for img in missing_images:
            f.write(img + '\n')
    print("Missing images list saved to 'missing_images.txt'")
else:
    print("All images present in dataset")

# Class distribution
print("Calculating class distribution...")
class_dist = combined_df['class'].value_counts() # Count occurrences of each class
plt.figure(figsize=(12,6)) # Set figure size
class_dist.plot(kind='bar') # Plot class distribution as a bar chart
plt.title('Class Distribution of Helicopters') # Set plot title
plt.xlabel('Helicopter Model') # Set x-axis label
plt.ylabel('Count') # Set y-axis label
plt.show() # Show the plot

# Visualize sample with bounding boxes
def plot_sample(image_name):
    print(f"Attempting to load image from path: {dataset_path}") # Debug print: dataset path
    files_in_dataset_path = os.listdir(dataset_path) # List files in dataset directory
    print(f"Files in dataset_path ({dataset_path}): {files_in_dataset_path}") # Debug print: files in dataset path

    img_path = os.path.join(dataset_path, image_name) # Construct full image path
    print(f"Full image path being used: {img_path}") # Debug print: full image path

    try: # Try to load and convert the image
        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) # Load image using OpenCV and convert to RGB
    except Exception as e: # Handle potential errors during image loading
        print(f"Error loading image with cv2.imread: {e}") # Debug print: image loading error
        return # Exit the function if image loading fails

    if img is None: # Check if image loading was successful
        print(f"cv2.imread failed to load image from: {img_path}") # Debug print: image load failure
        return # Exit if image is not loaded

    annotations = combined_df[combined_df['filename'] == image_name] # Get annotations for the given image

    fig, ax = plt.subplots(1, figsize=(12,8)) # Create a figure and axes for plotting
    ax.imshow(img) # Display the image

    for _, row in annotations.iterrows(): # Iterate over annotations for the image
        box = [row['xmin'], row['ymin'], row['xmax'], row['ymax']] # Extract bounding box coordinates
        rect = patches.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1], linewidth=2, edgecolor='r', facecolor='none') # Create a rectangle patch for bounding box
        ax.add_patch(rect) # Add bounding box patch to the axes
        plt.text(box[0], box[1]-10, row['class'], color='red', fontsize=12) # Add class label text
    plt.show() # Show the plot


# Plot random sample
sample_image = combined_df.sample()['filename'].values[0] # Get a random image filename from DataFrame
plot_sample(sample_image) # Plot the sample image

def plot_random_samples(df, num_samples=10):
    sample_images = df.sample(num_samples)['filename'].values # Get filenames of random sample images
    for img_name in sample_images:
        plot_sample(img_name) # Plot each sample image
    print(f"Plotted {num_samples} random sample images.")

plot_random_samples(combined_df, num_samples=10)

# STEP 4: DATA SPLITTING AND REDUCING TRAINING DATASET SIZE
print("STEP 4: DATA SPLITTING AND REDUCING TRAINING DATASET SIZE")
from sklearn.model_selection import train_test_split  # Import train_test_split function

print("Splitting data into training and validation sets...")
train_df, val_df = train_test_split(combined_df, test_size=0.2, random_state=42,
                                    stratify=combined_df['class'])  # Split DataFrame into training and validation sets
print("Data splitting complete.")
print(f"Training set size: {len(train_df)}")  # Print training set size
print(f"Validation set size: {len(val_df)}")  # Print validation set size

# Reduce the training dataset to 25%
train_df = train_df.sample(frac=0.25, random_state=42)  # Reduce to 25%
train_df = train_df.reset_index(drop=True)  # Reset index
print(f"Reduced training set size: {len(train_df)}")


Output hidden; open in https://colab.research.google.com to view.

In [16]:
combined_df

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,4b945b373e4045c346459d7d1791423c.jpg,2506,1684,C5,89,179,452,491
1,cb52897bc8ee7f30133d548a65e974d9.jpg,1200,800,Mig31,606,425,715,475
2,f91fa86c40a1cc9065fc43d9d43c3142.jpg,2649,2000,Vulcan,1372,878,1671,1174
3,086447973ba9c4441178c29391ea5f3e.jpg,679,453,C2,67,126,527,320
4,b299f60c514e2bd54ea0442e90e93644.jpg,900,572,Tu95,453,162,578,284
...,...,...,...,...,...,...,...,...
33539,08d093dfc01bcad80330482154a6c21c.jpg,1280,852,WZ7,36,122,1252,549
33540,117431f94751e210fedd35d6f85e863d.jpg,4096,2731,MQ9,1870,1359,2181,1427
33541,cb894067f429d09fc2a67c8c55baf133.jpg,615,410,Vulcan,258,155,520,295
33542,05d0fe71ec940ead25015d3f3118a300.jpg,1024,623,An22,43,153,969,371


In [17]:
print("STEP 5: DATASET AND DATALOADER")


def parse_annotations(df):
    """Parses annotations from DataFrame to a dictionary."""
    image_annotations = {}
    for index, row in df.iterrows():
        filename = row['filename']
        if filename not in image_annotations:
            image_annotations[filename] = []
        image_annotations[filename].append(
            {'xmin': row['xmin'], 'ymin': row['ymin'], 'xmax': row['xmax'], 'ymax': row['ymax'], 'class': row['class']}
        )
    return image_annotations


train_annotations = parse_annotations(train_df)
val_annotations = parse_annotations(val_df)


class CustomDataset(Dataset):
    def __init__(self, image_dir, annotation_dict, transform=None):
        self.image_dir = image_dir
        self.annotation_dict = annotation_dict
        self.transform = transform
        self.image_filenames = list(annotation_dict.keys())

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_name = self.image_filenames[idx]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert("RGB")  # Load with PIL

        annotations = self.annotation_dict[img_name]
        boxes = []
        labels = []

        for ann in annotations:
            boxes.append([ann['xmin'], ann['ymin'], ann['xmax'], ann['ymax']])
            labels.append(ann['class'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Convert class names to numerical labels
        class_names = sorted(combined_df['class'].unique())
        labels = [class_names.index(label) for label in labels]
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([idx])

        if self.transform is not None:
            image = self.transform(image)

        return image, target


# Define image transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# Create datasets
train_dataset = CustomDataset(dataset_path, train_annotations, transform=transform)
val_dataset = CustomDataset(dataset_path, val_annotations, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                          collate_fn=lambda batch: tuple(zip(*batch)))
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                        collate_fn=lambda batch: tuple(zip(*batch)))

print("Dataset and DataLoader created.")
print("STEP 5 COMPLETE\n")


STEP 5: DATASET AND DATALOADER
Dataset and DataLoader created.
STEP 5 COMPLETE



In [18]:
# STEP 6: MODEL SETUP
print("STEP 6: MODEL SETUP")
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# Load a pre-trained Faster RCNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one
num_classes = len(combined_df['class'].unique()) + 1  # Add 1 for the background class
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Move model to the GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

print("Model loaded and modified.")
print("STEP 6 COMPLETE\n")

STEP 6: MODEL SETUP




Model loaded and modified.
STEP 6 COMPLETE



In [19]:
!pip install Pillow
from PIL import Image



In [21]:
                                                         # STEP 7: TRAINING
print("STEP 7: TRAINING")
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

# Define the optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=learning_rate, momentum=0.9, weight_decay=0.0005)
lr_scheduler = StepLR(optimizer, step_size=step_size_lr_scheduler, gamma=gamma_lr_scheduler)

# Training loop
num_epochs = config['num_epochs']
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        losses = model(images, targets)
        loss = sum(loss for loss in losses.values())
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    lr_scheduler.step()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(train_loader)}")

    # Save checkpoint
    torch.save(model.state_dict(), checkpoint_path.format(epoch=epoch + 1))

# Save the final model
torch.save(model.state_dict(), model_save_path)
print("Model training complete and model saved.")
print("STEP 7 COMPLETE\n")

STEP 7: TRAINING


KeyboardInterrupt: 

In [22]:
# STEP 7: TRAINING
print("STEP 7: TRAINING")
import torch.optim as optim
from torch.optim.lr_scheduler import OneCycleLR  # More aggressive LR scheduler
from torch.cuda import amp  # Import for mixed precision
import os  # Import the os module

# Create the directories if they don't exist
model_dir = os.path.dirname(model_save_path)
checkpoint_dir = os.path.dirname(checkpoint_path.format(epoch=1))  # Format with a dummy epoch

if not os.path.exists(model_dir):
    os.makedirs(model_dir, exist_ok=True)

if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir, exist_ok=True)


# Define the optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.AdamW(params, lr=learning_rate, weight_decay=0.0005)  # AdamW optimizer

# Define a more aggressive learning rate scheduler
lr_scheduler = OneCycleLR(optimizer,
                           max_lr=learning_rate * 10,  # Peak LR
                           steps_per_epoch=len(train_loader),
                           epochs=num_epochs,
                           pct_start=0.2,  # Warmup phase
                           div_factor=10,  # Initial LR = max_lr/div_factor
                           final_div_factor=1000)  # Final LR = max_lr/final_div_factor

# Training loop
num_epochs = config['num_epochs']
scaler = amp.GradScaler()  # Initialize GradScaler
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        with amp.autocast(enabled=True):  # Enable mixed precision
            losses = model(images, targets)
            loss = sum(loss for loss in losses.values())

        scaler.scale(loss).backward()  # Scale the loss
        scaler.step(optimizer)  # Update the weights
        scaler.update()  # Update the scaler
        lr_scheduler.step()  # Step the LR scheduler

        epoch_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(train_loader)}")

    # Save checkpoint
    torch.save(model.state_dict(), checkpoint_path.format(epoch=epoch + 1))

# Save the final model
torch.save(model.state_dict(), model_save_path)
print("Model training complete and model saved.")

STEP 7: TRAINING


  scaler = amp.GradScaler()  # Initialize GradScaler
  with amp.autocast(enabled=True):  # Enable mixed precision


Epoch 1/10, Loss: nan


KeyboardInterrupt: 