In [1]:
import os
import json
import argparse
import torch
import random
import numpy as np
from pathlib import Path
from datetime import datetime
from model.Efficientnet_Det import EfficientNetEnc_Det

from torch.utils.tensorboard import SummaryWriter
from model.FFTRadNet import FFTRadNet
from dataset.dataset import RADIal
from dataset.encoder import ra_encoder
from dataset.dataloader import CreateDataLoaders
import pkbar
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from loss import pixor_loss
from utils.evaluation import run_evaluation
import torch.nn as nn
import torch
 
torch.cuda.empty_cache()

parser = argparse.ArgumentParser(description='FFTRadNet Training')
parser.add_argument('-f')
parser.add_argument('-c', '--config', default='config/config_FFTRadNet_192_56-Det.json',type=str,                            help='Path to the config file (default: config.json)')
parser.add_argument('-r', '--resume', default=None, type=str,
                            help='Path to the .pth model checkpoint to resume training')

args = parser.parse_args()
config = json.load(open(args.config))
resume=args.resume

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!pip install pkbar
!pip install shapely
!pip install efficientnet_pytorch



In [None]:
# Setup random seed
torch.manual_seed(config['seed'])
np.random.seed(config['seed'])
random.seed(config['seed'])
torch.cuda.manual_seed(config['seed'])

# create experience name
curr_date = datetime.now()
exp_name = config['name'] + '___' + curr_date.strftime('%b-%d-%Y___%H:%M:%S')
print(exp_name)

# Create directory structure
output_folder = Path("Efficientnet_DET_seq")
output_folder.mkdir(parents=True, exist_ok=True)
(output_folder / exp_name).mkdir(parents=True, exist_ok=True)
# and copy the config file
with open(output_folder / exp_name / 'config.json', 'w') as outfile:
    json.dump(config, outfile)

# set device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Initialize tensorboard
writer = SummaryWriter(output_folder / exp_name)

# Load the dataset
enc = ra_encoder(geometry = config['dataset']['geometry'], 
                    statistics = config['dataset']['statistics'],
                    regression_layer = 2)

dataset = RADIal(root_dir = config['dataset']['root_dir'],
                    statistics= config['dataset']['statistics'],
                    encoder=enc.encode,
                    difficult=True)

train_loader, val_loader, test_loader = CreateDataLoaders(dataset,config['dataloader'],config['seed'])


# Create the model
# net = FFTRadNet(blocks = config['model']['backbone_block'],
#                     mimo_layer  = config['model']['MIMO_output'],
#                     channels = config['model']['channels'], 
#                     regression_layer = 2, 
#                     detection_head = config['model']['DetectionHead'], 
#                     segmentation_head = config['model']['SegmentationHead'])
net = EfficientNetEnc_Det(n_channels=32, n_classes=1, detection_head=True)

net.to('cuda')


# Optimizer
lr = float(config['optimizer']['lr'])
step_size = int(config['lr_scheduler']['step_size'])
gamma = float(config['lr_scheduler']['gamma'])
optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr)
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

num_epochs=int(config['num_epochs'])
num_epochs=100



print('===========  Optimizer  ==================:')
print('      LR:', lr)
print('      step_size:', step_size)
print('      gamma:', gamma)
print('      num_epochs:', num_epochs)
print('')

# Train
startEpoch = 0
global_step = 0
history = {'train_loss':[],'val_loss':[],'lr':[],'mAP':[],'mAR':[]}
best_mAP = 0

freespace_loss = nn.BCEWithLogitsLoss(reduction='mean')


# if resume:
#     print('===========  Resume training  ==================:')
#     dict = torch.load(resume)
#     net.load_state_dict(dict['net_state_dict'])
#     optimizer.load_state_dict(dict['optimizer'])
#     scheduler.load_state_dict(dict['scheduler'])
#     startEpoch = dict['epoch']+1
#     history = dict['history']
#     global_step = dict['global_step']
#
#     print('       ... Start at epoch:',startEpoch)


for epoch in range(startEpoch,num_epochs):

    kbar = pkbar.Kbar(target=len(train_loader), epoch=epoch, num_epochs=num_epochs, width=20, always_stateful=False)

    ###################
    ## Training loop ##
    ###################
    net.train()
    running_loss = 0.0

    for i, data in enumerate(train_loader):
        inputs = data[0].to('cuda').float()
        label_map = data[1].to('cuda').float()
        

        # reset the gradient
        optimizer.zero_grad()

        # forward pass, enable to track our gradient
        with torch.set_grad_enabled(True):
            outputs = net(inputs)


        classif_loss,reg_loss = pixor_loss(outputs['Detection'], label_map,config['losses'])           


        classif_loss *= config['losses']['weight'][0]
        reg_loss *= config['losses']['weight'][1]


        loss = classif_loss + reg_loss 

        writer.add_scalar('Loss/train', loss.item(), global_step)
        writer.add_scalar('Loss/train_clc', classif_loss.item(), global_step)
        writer.add_scalar('Loss/train_reg', reg_loss.item(), global_step)

        # backprop
        loss.backward()
        optimizer.step()

        # statistics
        running_loss += loss.item() * inputs.size(0)

        kbar.update(i, values=[("loss", loss.item()), ("class", classif_loss.item()), ("reg", reg_loss.item())])


        global_step += 1


    scheduler.step()

    history['train_loss'].append(running_loss / len(train_loader.dataset))
    history['lr'].append(scheduler.get_last_lr()[0])


    ######################
    ## validation phase ##
    ######################

    eval = run_evaluation(net,val_loader,enc,check_perf=(epoch>=10),
                            detection_loss=pixor_loss,segmentation_loss=None,
                            losses_params=config['losses'])

    history['val_loss'].append(eval['loss'])
    history['mAP'].append(eval['mAP'])
    history['mAR'].append(eval['mAR'])

    kbar.add(1, values=[("val_loss", eval['loss']),("mAP", eval['mAP']),("mAR", eval['mAR'])])


    writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], global_step)
    writer.add_scalar('Loss/test', eval['loss'], global_step)
    writer.add_scalar('Metrics/mAP', eval['mAP'], global_step)
    writer.add_scalar('Metrics/mAR', eval['mAR'], global_step)

    # Saving all checkpoint as the best checkpoint for multi-task is a balance between both --> up to the user to decide
    name_output_file = config['name']+'_epoch_detection_{:02d}_loss_{:.4f}_AP_{:.4f}_AR_{:.4f}.pth'.format(epoch, eval['loss'],eval['mAP'],eval['mAR'])
    filename = output_folder / exp_name / name_output_file

    checkpoint={}
    checkpoint['net_state_dict'] = net.state_dict()
    checkpoint['optimizer'] = optimizer.state_dict()
    checkpoint['scheduler'] = scheduler.state_dict()
    checkpoint['epoch'] = epoch
    checkpoint['history'] = history
    checkpoint['global_step'] = global_step

    torch.save(checkpoint,filename)

    print('')





FFTRadNet_RA_192_56___Jun-07-2024___07:56:12
      Mode: sequence
      Training: 6230
      Validation: 986
      Test: 1035

Loaded pretrained weights for efficientnet-b2
      LR: 0.0001
      step_size: 10
      gamma: 0.9
      num_epochs: 100

Epoch: 1/100

Epoch: 2/100

Epoch: 3/100

Epoch: 4/100

Epoch: 5/100

Epoch: 6/100

Epoch: 7/100

Epoch: 8/100

Epoch: 9/100

Epoch: 10/100

Epoch: 11/100

Epoch: 12/100

Epoch: 13/100

Epoch: 14/100

Epoch: 15/100

Epoch: 16/100

Epoch: 17/100
  79/1558 [>...................] - ETA: 12:20 - loss: 68.6754 - class: 9.4813 - reg: 59.1942

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)




Epoch: 20/100

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)




Epoch: 24/100

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)




Epoch: 27/100

Epoch: 28/100

Epoch: 29/100
 259/1558 [==>.................] - ETA: 11:27 - loss: 60.4148 - class: 7.0228 - reg: 53.3921

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)




Epoch: 30/100

Epoch: 31/100

Epoch: 32/100

Epoch: 33/100