In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Install Detectron2 (specific version for compatibility)
!pip install torch torchvision torchaudio
!pip install 'git+https://github.com/facebookresearch/detectron2.git'

# Install COCO API (for dataset handling)
!pip install pycocotools



Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-c2prqssh
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-c2prqssh
  Resolved https://github.com/facebookresearch/detectron2.git to commit 9604f5995cc628619f0e4fd913453b4d7d61db3f
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
import torch, detectron2
print(torch.__version__)  # Should match Colab's default (e.g., 2.x)
print(detectron2.__version__)  # Should print a version number

2.5.1+cu124
0.6


In [None]:
import os
import torch
import detectron2
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer, HookBase
from detectron2.config import get_cfg
from detectron2.evaluation import COCOEvaluator
from detectron2.utils.logger import setup_logger
from detectron2.model_zoo import get_config_file, get_checkpoint_url
import numpy as np
import time
from google.colab import drive

In [None]:
drive_folder = "/content/drive/MyDrive/Colab Notebooks/Datasets/seat_dataset/train"  # Change as needed
os.makedirs(drive_folder, exist_ok=True)

print("Training started...")
setup_logger()


Training started...


<Logger detectron2 (DEBUG)>

In [None]:
# Paths
dataset_folder = r"/content/drive/MyDrive/Colab Notebooks/Datasets/seat_dataset"  # Update this if dataset is elsewhere
train_json =r'/content/drive/MyDrive/Colab Notebooks/Datasets/seat_dataset/annotations/train_annotations.coco.json' # os.path.join(dataset_folder, "train_annotations.json")
val_json =r'/content/drive/MyDrive/Colab Notebooks/Datasets/seat_dataset/annotations/val_annotations.coco.json'  #os.path.join(dataset_folder, "val_annotations.json")
train_images = os.path.join(dataset_folder, "train")
val_images = os.path.join(dataset_folder, "valid")
output_dir = os.path.join(drive_folder, "output")  # Save output to Google Drive

In [None]:
# Register datasets
register_coco_instances("my_dataset_train", {}, train_json, train_images)
register_coco_instances("my_dataset_val", {}, val_json, val_images)

In [None]:
# Get metadata
train_metadata = MetadataCatalog.get("my_dataset_train")
val_metadata = MetadataCatalog.get("my_dataset_val")


In [None]:
# Configuration
cfg = get_cfg()
cfg.merge_from_file(get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ("my_dataset_val",)
cfg.DATALOADER.NUM_WORKERS = 2  # Reduce for Colab
cfg.MODEL.WEIGHTS = get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2  # Reduce for Colab
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 3750
cfg.SOLVER.STEPS = (2500, 3200)
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.TEST.EVAL_PERIOD = 500
cfg.OUTPUT_DIR = output_dir
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# Create output directory
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)


In [None]:
# Early Stopping & Model Saving Hook
class EarlyStoppingHook(HookBase):
    def __init__(self, patience=3):  # Stop if no improvement for 3 evals
        self.patience = patience
        self.best_loss = float('inf')
        self.counter = 0

    def after_step(self):
        if self.trainer.iter % cfg.TEST.EVAL_PERIOD == 0:
            eval_results = self.trainer.storage.latest()
            val_loss = eval_results.get("total_loss", None)

            print("val_loss", val_loss)

            # Check if val_loss is not None and is a tuple, then access the first element
            if val_loss is not None:
                val_loss_value = val_loss[0]  # Access the actual validation loss value

                # Print the validation loss correctly
                print(f"Iteration {self.trainer.iter}: Validation Loss = {val_loss_value:.4f}")

                model_path = os.path.join(cfg.OUTPUT_DIR, f"model_iter_{self.trainer.iter}.pth")
                torch.save(self.trainer.model.state_dict(), model_path)
                print(f"Model saved at: {model_path}")

                # Compare and save the best model based on the loss
                if val_loss_value < self.best_loss:
                    self.best_loss = val_loss_value
                    self.counter = 0
                    # Save the best model
                    torch.save(self.trainer.model.state_dict(), os.path.join(cfg.OUTPUT_DIR, "best_model.pth"))
                else:
                    self.counter += 1
                    if self.counter >= self.patience:
                        print("Early stopping triggered. Stopping training.")
                        self.trainer.iter = cfg.SOLVER.MAX_ITER  # Force training to stop

In [None]:
# Custom Trainer
class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "eval")
        os.makedirs(output_folder, exist_ok=True)
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.append(EarlyStoppingHook(patience=3))  # Add early stopping & model saving
        return hooks

In [None]:
# Train
trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[02/24 10:21:39 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[02/24 10:21:40 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


val_loss (2.5457489900290966, 0)
Iteration 0: Validation Loss = 2.5457
Model saved at: /content/drive/MyDrive/Colab Notebooks/Datasets/seat_dataset/train/output/model_iter_0.pth
[02/24 10:21:50 d2.utils.events]:  eta: 0:24:34  iter: 19  total_loss: 2.036  loss_cls: 0.7847  loss_box_reg: 0.09022  loss_mask: 0.6936  loss_rpn_cls: 0.342  loss_rpn_loc: 0.03972    time: 0.3941  last_time: 0.3741  data_time: 0.0165  last_data_time: 0.0058   lr: 4.9953e-06  max_mem: 1770M
[02/24 10:22:08 d2.utils.events]:  eta: 0:25:27  iter: 39  total_loss: 2.005  loss_cls: 0.6788  loss_box_reg: 0.1009  loss_mask: 0.6924  loss_rpn_cls: 0.4811  loss_rpn_loc: 0.04567    time: 0.4490  last_time: 0.4074  data_time: 0.0228  last_data_time: 0.0047   lr: 9.9902e-06  max_mem: 1770M
[02/24 10:22:16 d2.utils.events]:  eta: 0:25:01  iter: 59  total_loss: 1.735  loss_cls: 0.4817  loss_box_reg: 0.1419  loss_mask: 0.6882  loss_rpn_cls: 0.3507  loss_rpn_loc: 0.04257    time: 0.4306  last_time: 0.3622  data_time: 0.0097  la

In [None]:
# Save final model
final_model_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
torch.save(trainer.model.state_dict(), final_model_path)
print(f"Training complete. Model saved at: {final_model_path}")


Training complete. Model saved at: /content/drive/MyDrive/Colab Notebooks/Datasets/seat_dataset/train/output/model_final.pth


In [None]:
import os
import torch
import detectron2
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer, HookBase
from detectron2.config import get_cfg
from detectron2.evaluation import COCOEvaluator
from detectron2.utils.logger import setup_logger
from detectron2.model_zoo import get_config_file, get_checkpoint_url
import numpy as np
import time
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

drive_folder = "/content/drive/My Drive/detectron2_training"  # Change as needed
os.makedirs(drive_folder, exist_ok=True)

print("Training started...")
setup_logger()

# Paths
dataset_folder = "/content/dataset"  # Update this if dataset is elsewhere
train_json = os.path.join(dataset_folder, "train_annotations.json")
val_json = os.path.join(dataset_folder, "val_annotations.json")
train_images = os.path.join(dataset_folder, "train")
val_images = os.path.join(dataset_folder, "val")
output_dir = os.path.join(drive_folder, "output")  # Save output to Google Drive

# Register datasets
register_coco_instances("my_dataset_train", {}, train_json, train_images)
register_coco_instances("my_dataset_val", {}, val_json, val_images)

# Get metadata
train_metadata = MetadataCatalog.get("my_dataset_train")
val_metadata = MetadataCatalog.get("my_dataset_val")

# Configuration
cfg = get_cfg()
cfg.merge_from_file(get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ("my_dataset_val",)
cfg.DATALOADER.NUM_WORKERS = 2  # Reduce for Colab
cfg.MODEL.WEIGHTS = get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2  # Reduce for Colab
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 3750
cfg.SOLVER.STEPS = (2500, 3200)
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.TEST.EVAL_PERIOD = 500
cfg.OUTPUT_DIR = output_dir
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Create output directory
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

# Early Stopping & Model Saving Hook
class EarlyStoppingHook(HookBase):
    def __init__(self, patience=3):  # Stop if no improvement for 3 evals
        self.patience = patience
        self.best_loss = float('inf')
        self.counter = 0

    def after_step(self):
        if self.trainer.iter % cfg.TEST.EVAL_PERIOD == 0:
            eval_results = self.trainer.storage.latest()
            val_loss = eval_results.get("total_loss", None)
            if val_loss is not None:
                print(f"Iteration {self.trainer.iter}: Validation Loss = {val_loss:.4f}")
                model_path = os.path.join(cfg.OUTPUT_DIR, f"model_iter_{self.trainer.iter}.pth")
                torch.save(self.trainer.model.state_dict(), model_path)
                print(f"Model saved at: {model_path}")
                if val_loss < self.best_loss:
                    self.best_loss = val_loss
                    self.counter = 0
                    # Save the best model
                    torch.save(self.trainer.model.state_dict(), os.path.join(cfg.OUTPUT_DIR, "best_model.pth"))
                else:
                    self.counter += 1
                    if self.counter >= self.patience:
                        print("Early stopping triggered. Stopping training.")
                        self.trainer.iter = cfg.SOLVER.MAX_ITER  # Force training to stop

# Custom Trainer
class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "eval")
        os.makedirs(output_folder, exist_ok=True)
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.append(EarlyStoppingHook(patience=3))  # Add early stopping & model saving
        return hooks

# Train
trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# Save final model
final_model_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
torch.save(trainer.model.state_dict(), final_model_path)
print(f"Training complete. Model saved at: {final_model_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Training started...


AssertionError: Attribute 'json_file' in the metadata of 'my_dataset_train' cannot be set to a different value!
/content/drive/MyDrive/Colab Notebooks/Datasets/seat_dataset/annotations/train_annotations.coco.json != /content/dataset/train_annotations.json

In [None]:
import os
import torch
import detectron2
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer, HookBase
from detectron2.config import get_cfg
from detectron2.evaluation import COCOEvaluator
from detectron2.utils.logger import setup_logger
from detectron2.model_zoo import get_config_file, get_checkpoint_url
import numpy as np
import time
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

drive_folder = "/content/drive/My Drive/detectron2_training"  # Change as needed
os.makedirs(drive_folder, exist_ok=True)

print("Training started...")
setup_logger()

# Paths
dataset_folder = "/content/dataset"  # Update this if dataset is elsewhere
train_json = os.path.join(dataset_folder, "train_annotations.json")
val_json = os.path.join(dataset_folder, "val_annotations.json")
train_images = os.path.join(dataset_folder, "train")
val_images = os.path.join(dataset_folder, "val")
output_dir = os.path.join(drive_folder, "output")  # Save output to Google Drive

# Register datasets
register_coco_instances("my_dataset_train", {}, train_json, train_images)
register_coco_instances("my_dataset_val", {}, val_json, val_images)

# Get metadata
train_metadata = MetadataCatalog.get("my_dataset_train")
val_metadata = MetadataCatalog.get("my_dataset_val")

# Configuration
cfg = get_cfg()
cfg.merge_from_file(get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ("my_dataset_val",)
cfg.DATALOADER.NUM_WORKERS = 2  # Reduce for Colab
cfg.MODEL.WEIGHTS = get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2  # Reduce for Colab
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 3750
cfg.SOLVER.STEPS = (2500, 3200)
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.TEST.EVAL_PERIOD = 500
cfg.OUTPUT_DIR = output_dir
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Create output directory
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

# Early Stopping Hook
class EarlyStoppingHook(HookBase):
    def __init__(self, patience=3):  # Stop if no improvement for 3 evals
        self.patience = patience
        self.best_loss = float('inf')
        self.counter = 0

    def after_step(self):
        if self.trainer.iter % cfg.TEST.EVAL_PERIOD == 0:
            eval_results = self.trainer.storage.latest()
            val_loss = eval_results.get("total_loss", None)
            if val_loss is not None:
                print(f"Iteration {self.trainer.iter}: Validation Loss = {val_loss:.4f}")
                if val_loss < self.best_loss:
                    self.best_loss = val_loss
                    self.counter = 0
                    # Save the best model
                    torch.save(self.trainer.model.state_dict(), os.path.join(cfg.OUTPUT_DIR, "best_model.pth"))
                else:
                    self.counter += 1
                    if self.counter >= self.patience:
                        print("Early stopping triggered. Stopping training.")
                        self.trainer.iter = cfg.SOLVER.MAX_ITER  # Force training to stop

# Custom Trainer
class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "eval")
        os.makedirs(output_folder, exist_ok=True)
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.append(EarlyStoppingHook(patience=3))  # Add early stopping
        return hooks

# Train
trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# Save final model
final_model_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
torch.save(trainer.model.state_dict(), final_model_path)
print(f"Training complete. Model saved at: {final_model_path}")
