In [1]:
import itertools
import os

import pytorch_lightning as pl
from pytorch_lightning import Trainer
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, SubsetRandomSampler
from transformers import DeformableDetrConfig, DeformableDetrForObjectDetection

from detr_config import Config
from detr_dataset import collate_fn, get_train_dataset, get_test_dataset
from detr_model import DETRModel

The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


In [2]:
# Dataset

train_dataset = get_train_dataset()
test_dataset = get_test_dataset()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [3]:

# HyperParameters

params_num_queries = [10, 50,100]
params_d_model = [64, 128, 256]
params_encoder_decoder_layers = [2,4,6]

hyperparameters = itertools.product(*[
    params_num_queries,
    params_d_model,
    params_encoder_decoder_layers
])

Config.CHECKPOINT = "SenseTime/deformable-detr"


# Hyperparameter Search

for num_queries, d_model, encoder_decoder_layers in hyperparameters:
    print('(Num Queries, Dim model, Enc-Dec Layers): ', 
            f'({num_queries}, {d_model}, {encoder_decoder_layers})' )

    # Model Construction

    config = DeformableDetrConfig.from_pretrained(
        Config.CHECKPOINT,
        num_labels=1,
        id2label = {0:'Mass'}, 
        label2id={'Mass': 0},
        num_queries = num_queries,
        d_model = d_model,
        num_head = 8,
        encoder_layers = encoder_decoder_layers,
        decoder_layers = encoder_decoder_layers,
        position_embedding_type  = 'sine',
        decoder_ffn_dim = 2048,
        encoder_ffn_dim = 2048,
    )

    detr_model = DeformableDetrForObjectDetection.from_pretrained(
        Config.CHECKPOINT,
        config = config,
        ignore_mismatched_sizes=True
    )

    model = DETRModel(detr_model=detr_model)


    # Training with K-fold Cross Validation 

    k_fold = KFold(n_splits=10, shuffle=True, random_state=123456)

    for fold, (train_idx, valid_idx) in enumerate(k_fold.split(train_dataset)):
        print(f"Fold {fold + 1}")

        train_loader = DataLoader(
            dataset = train_dataset,
            batch_size = Config.BATCH_SIZE,
            collate_fn=collate_fn,
            sampler = SubsetRandomSampler(train_idx),
        )

        valid_loader = DataLoader(
            dataset = train_dataset,
            batch_size = Config.BATCH_SIZE,
            collate_fn=collate_fn,
            sampler = SubsetRandomSampler(valid_idx),
        )

        checkpoint_callback = pl.callbacks.ModelCheckpoint(
            save_top_k = 1,
            save_last = True,
            monitor = "valid_loss",
            mode = "min"
        )

        early_stopping_callback = pl.callbacks.EarlyStopping(
            monitor = 'valid_loss',
            patience = 15
        )

        version = os.path.join(
            'deformable',
            f'queries={num_queries}_dmodel={d_model}_layers={encoder_decoder_layers}',
            f'fold_{fold+1}'
        )

        logger = pl.loggers.TensorBoardLogger(
            save_dir = './',
            version = version
        )

        trainer = Trainer(
            max_epochs = Config.EPOCHS, 
            log_every_n_steps = 5, 
            callbacks = [
                checkpoint_callback, 
                early_stopping_callback
            ],
            accelerator = Config.ACCELERATOR,
            logger = logger
        )
        
        trainer.fit(
            model, 
            train_dataloaders = train_loader, 
            val_dataloaders = valid_loader
        )

        break # Fold
    break # Hyperparameter


(Num Queries, Dim model, Enc-Dec Layers):  (10, 64, 2)


Some weights of the model checkpoint at SenseTime/deformable-detr were not used when initializing DeformableDetrForObjectDetection: ['model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked', 'model.decoder.layers.3.encoder_attn.value_proj.bias', 'model.decoder.layers.5.self_attn.out_proj.bias', 'model.decoder.layers.4.fc1.weight', 'model.decoder.layers.2.self_attn_layer_norm.bias', 'model.decoder.layers.3.self_attn.out_proj.bias', 'model.decoder.layers.2.final_layer_norm.weight', 'model.decoder.layers.3.self_attn_layer_norm.bias', 'model.encoder.layers.2.self_attn.sampling_offsets.bias', 'model.encoder.layers.4.self_attn.value_proj.weight', 'model.decoder.layers.3.final_layer_norm.weight', 'model.decoder.layers.2.fc2.bias', 'model.decoder.layers.3.encoder_attn_layer_norm.bias', 'model.decoder.layers.5.self_attn.k_proj.bias', 'model.encoder.layers.4.fc2.bias', 'model.decoder.layers.5.fc1.weight', 'model.encoder.layers.4.self_attn_layer_norm.weight', 'model.encoder.l

Fold 1


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type                             | Params
----------------------------------------------------------------
0 | detr_model | DeformableDetrForObjectDetection | 26.1 M
----------------------------------------------------------------
25.9 M    Trainable params
222 K     Non-trainable params
26.1 M    Total params
104.399   Total estimated model params size (MB)


<generator object Module.named_parameters at 0x0000020820448C40>


ValueError: dictionary update sequence element #0 has length 1; 2 is required