In [4]:
# 1. Import & Mount Drive
from google.colab import drive
import os, json, zipfile, requests, random
import torch, torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import numpy as np
from collections import defaultdict
from sklearn.metrics import classification_report
from torch.cuda.amp import autocast, GradScaler

# Mount Google Drive
drive.mount('/content/drive')

# 2. Download COCO 2017 val images and annotations
data_url = "http://images.cocodataset.org/zips/val2017.zip"
annotations_url = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"

def download_and_extract(url, output_dir):
    filename = url.split("/")[-1]
    output_path = os.path.join(output_dir, filename)

    if not os.path.exists(output_path):
        response = requests.get(url, stream=True)
        total_size = int(response.headers.get("content-length", 0))
        with open(output_path, "wb") as file, tqdm(desc=filename, total=total_size, unit="B", unit_scale=True) as pb:
            for data in response.iter_content(chunk_size=1024):
                file.write(data)
                pb.update(len(data))

    with zipfile.ZipFile(output_path, "r") as zip_ref:
        zip_ref.extractall(output_dir)

download_and_extract(data_url, "/content/")
download_and_extract(annotations_url, "/content/")

# 3. Load annotations
with open("/content/annotations/instances_val2017.json", "r") as f:
    coco_data = json.load(f)

# 4. Prepare all 80 categories
category_id_to_name = {cat["id"]: cat["name"] for cat in coco_data["categories"]}
name_to_index = {name: idx for idx, name in enumerate(category_id_to_name.values())}
selected_categories = list(name_to_index.keys())
selected_cat_ids = list(category_id_to_name.keys())

# 5. Filter and link images with single label
image_to_labels = defaultdict(set)
for ann in coco_data["annotations"]:
    if ann["category_id"] in selected_cat_ids:
        image_to_labels[ann["image_id"].__int__()].add(ann["category_id"])

filtered_images = [img for img in coco_data["images"] if len(image_to_labels[img["id"]]) == 1]
random.shuffle(filtered_images)
filtered_images = filtered_images[:200]  # Reduced dataset size

# 6. Split dataset
train_size = int(0.7 * len(filtered_images))
val_size = int(0.2 * len(filtered_images))
train_images = filtered_images[:train_size]
val_images = filtered_images[train_size:train_size+val_size]
test_images = filtered_images[train_size+val_size:]

# 7. Improved Transforms with additional augmentations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),  # Reduced rotation angle
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 8. Custom Dataset
class COCODataset(Dataset):
    def __init__(self, images, root_dir, transform):
        self.images = images
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_data = self.images[idx]
        img_path = os.path.join(self.root_dir, img_data["file_name"])
        image = Image.open(img_path).convert("RGB")
        image_id = img_data["id"]

        label_id = list(image_to_labels[image_id])[0]
        label_name = category_id_to_name[label_id]
        label = name_to_index[label_name]

        if self.transform:
            image = self.transform(image)

        return image, label

# 9. Data Loaders
train_loader = DataLoader(COCODataset(train_images, "/content/val2017", transform), batch_size=16, shuffle=True, num_workers=4)
val_loader = DataLoader(COCODataset(val_images, "/content/val2017", transform), batch_size=16, num_workers=4)
test_loader = DataLoader(COCODataset(test_images, "/content/val2017", transform), batch_size=16, num_workers=4)

# 10. Use EfficientNet-B4 (smaller model for faster training)
model = models.efficientnet_b4(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(selected_categories))

# 11. Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-5)  # Lower learning rate for stability

# 12. Training with Mixed Precision
scaler = GradScaler()

def train_model_with_amp(model, train_loader, val_loader, epochs=5):  # Reduced epochs to 5
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass with mixed precision
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                val_loss += criterion(outputs, labels).item()

        print(f"Epoch {epoch+1}, Train Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

# Training the model
train_model_with_amp(model, train_loader, val_loader, epochs=5)  # Reduced epochs to 5

# 13. Evaluation
def evaluate_model(model, test_loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    unique_labels = sorted(set(all_labels))
    used_class_names = [selected_categories[i] for i in unique_labels]

    print("\nDetailed Classification Report:\n")
    print(classification_report(all_labels, all_preds, labels=unique_labels, target_names=used_class_names, digits=4))

evaluate_model(model, test_loader)

# 14. Save model
model_path = "/content/drive/MyDrive/efficientnet_coco_final.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Downloading: "https://download.pytorch.org/models/efficientnet_b4_rwightman-23ab8bcd.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b4_rwightman-23ab8bcd.pth
100%|██████████| 74.5M/74.5M [00:00<00:00, 201MB/s]
  scaler = GradScaler()
  with autocast():


Epoch 1, Train Loss: 4.3828, Val Loss: 4.3764
Epoch 2, Train Loss: 4.3885, Val Loss: 4.3856
Epoch 3, Train Loss: 4.3781, Val Loss: 4.3705
Epoch 4, Train Loss: 4.3694, Val Loss: 4.3691
Epoch 5, Train Loss: 4.3597, Val Loss: 4.3589

Detailed Classification Report:

               precision    recall  f1-score   support

     airplane     0.0000    0.0000    0.0000       1.0
        train     0.0000    0.0000    0.0000       2.0
         boat     0.0000    0.0000    0.0000       1.0
traffic light     0.0000    0.0000    0.0000       1.0
 fire hydrant     0.0000    0.0000    0.0000       1.0
         bird     0.0000    0.0000    0.0000       1.0
        horse     0.0000    0.0000    0.0000       1.0
     elephant     0.0000    0.0000    0.0000       2.0
         bear     0.0000    0.0000    0.0000       1.0
        zebra     0.0000    0.0000    0.0000       2.0
     suitcase     0.0000    0.0000    0.0000       1.0
       toilet     0.0000    0.0000    0.0000       1.0
 refrigerator     0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved to /content/drive/MyDrive/efficientnet_coco_final.pth
