In [2]:
import itertools
import os

import pytorch_lightning as pl
from pytorch_lightning import Trainer
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, SubsetRandomSampler
from transformers import DetrConfig, DetrForObjectDetection

from detr_config import Config
from detr_dataset import collate_fn, get_train_dataset, get_test_dataset
from detr_model import DETRModel


In [4]:
# Dataset

train_dataset = get_train_dataset()
test_dataset = get_test_dataset()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [5]:

# HyperParameters

params_num_queries = [10, 50, 100]
params_d_model = [64, 128, 256]
params_encoder_decoder_layers = [2,4,6]

hyperparameters = itertools.product(*[
    params_num_queries,
    params_d_model,
    params_encoder_decoder_layers
])


# Hyperparameter Search

for num_queries, d_model, encoder_decoder_layers in hyperparameters:
    print('(Num Queries, Dim model, Enc-Dec Layers): ', 
            f'({num_queries}, {d_model}, {encoder_decoder_layers})' )

    # Model Construction

    config = DetrConfig.from_pretrained(
        Config.CHECKPOINT,
        num_labels=1,
        id2label = {0:'Mass'}, 
        label2id={'Mass': 0},
        num_queries = num_queries,
        d_model = d_model,
        num_head = 8,
        encoder_layers = encoder_decoder_layers,
        decoder_layers = encoder_decoder_layers,
        position_embedding_type  = 'sine',
        decoder_ffn_dim = 2048,
        encoder_ffn_dim = 2048,
    )

    detr_model = DetrForObjectDetection.from_pretrained(
        Config.CHECKPOINT,
        config = config,
        ignore_mismatched_sizes=True
    )

    model = DETRModel(detr_model=detr_model)


    # Training with K-fold Cross Validation 

    k_fold = KFold(n_splits=10, shuffle=True, random_state=123456)

    for fold, (train_idx, valid_idx) in enumerate(k_fold.split(train_dataset)):
        print(f"Fold {fold + 1}")

        train_loader = DataLoader(
            dataset = train_dataset,
            batch_size = Config.BATCH_SIZE,
            collate_fn=collate_fn,
            sampler = SubsetRandomSampler(train_idx),
        )

        valid_loader = DataLoader(
            dataset = train_dataset,
            batch_size = Config.BATCH_SIZE,
            collate_fn=collate_fn,
            sampler = SubsetRandomSampler(valid_idx),
        )

        checkpoint_callback = pl.callbacks.ModelCheckpoint(
            save_top_k = 1,
            save_last = True,
            monitor = "valid_loss",
            mode = "min"
        )

        early_stopping_callback = pl.callbacks.EarlyStopping(
            monitor = 'valid_loss',
            patience = 15
        )

        version = os.path.join(
            f'queries={num_queries}_dmodel={d_model}_layers={encoder_decoder_layers}',
            f'fold_{fold+1}'
        )

        logger = pl.loggers.TensorBoardLogger(
            save_dir = './',
            version = version
        )

        trainer = Trainer(
            max_epochs = Config.EPOCHS, 
            log_every_n_steps = 5, 
            callbacks = [
                checkpoint_callback, 
                early_stopping_callback
            ],
            accelerator = Config.ACCELERATOR,
            logger = logger
        )
        
        trainer.fit(
            model, 
            train_dataloaders = train_loader, 
            val_dataloaders = valid_loader
        )

        break # Fold
    break # Hyperparameter


(Num Queries, Dim model, Enc-Dec Layers):  (50, 64, 2)


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.encoder.layers.4.self_attn.q_proj.weight', 'model.decoder.layers.5.fc2.weight', 'model.encoder.layers.3.self_attn.k_proj.weight', 'model.encoder.layers.5.self_attn.q_proj.bias', 'model.decoder.layers.2.final_layer_norm.bias', 'model.decoder.layers.5.final_layer_norm.bias', 'model.encoder.layers.5.self_attn.v_proj.bias', 'model.decoder.layers.3.self_attn.out_proj.bias', 'model.decoder.layers.5.encoder_attn.q_proj.weight', 'model.decoder.layers.2.encoder_attn_layer_norm.bias', 'model.decoder.layers.5.self_attn.out_proj.bias', 'model.decoder.layers.3.encoder_attn.v_proj.bias', 'model.decoder.layers.3.self_attn.k_proj.weight', 'model.decoder.layers.2.self_attn.out_proj.bias', 'model.decoder.layers.4.self_attn.v_proj.weight', 'model.encoder.layers.2.self_attn.q_proj.bias', 'model.decoder.layers.2.self_attn.k_proj.bias', 'model.decoder.layers.4.self_attn.q_proj.bias

Fold 1



  | Name       | Type                   | Params
------------------------------------------------------
0 | detr_model | DetrForObjectDetection | 24.8 M
------------------------------------------------------
24.5 M    Trainable params
222 K     Non-trainable params
24.8 M    Total params
99.025    Total estimated model params size (MB)


<generator object Module.named_parameters at 0x000002213D037E40>
