# Import dependencies

In [1]:
import os
import re
import gc
from tqdm.auto import tqdm

import numpy as np
import pandas as pd
import cv2
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import matplotlib.pyplot as plt

# Import more dependencies

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import segmentation_models_pytorch as smp

from detector import *

# Configuration

In [33]:
class Config:
    # Model
    arch = 'resnest50'
    heads = {'hm': 1,
             'wh': 2,
             'reg': 2}
    head_conv = 64
    reg_offset = True
    cat_spec_wh = False
    load_model = '../exp/ctdet/resnest50_fpn/model_best_125.pth'
    
    # Image
    img_size = 1024
    in_scale = 1024 / img_size
    down_ratio = 4
    
    mean = [0.214556, 0.317253, 0.315290], 
    std = [0.193879, 0.238036, 0.245211]
    num_classes = 1
    
    pad = 31
    
    # Test
    
    batch_size = 4
    K = 128
    
    fix_res = False
    test_scales = [1]
    flip_test = False
    nms = False
    gpus = [-1]
    
opt = Config()

# Create model & load pretrained weight

In [4]:
class PoseFPNNet(nn.Module):
    def __init__(self, base_name, heads, head_conv=256):
        super(PoseFPNNet, self).__init__()

        base = smp.FPN(base_name, encoder_weights=None, decoder_dropout=0, decoder_segmentation_channels=64, upsampling=1)
        self.encoder = base.encoder
        self.decoder = base.decoder

        self.heads = heads
        for head in self.heads:
            classes = self.heads[head]
            fc = nn.Sequential(
                nn.Conv2d(64, head_conv,
                          kernel_size=3, padding=1, bias=True),
                nn.ReLU(inplace=True),
                nn.Conv2d(head_conv, classes,
                          kernel_size=1, stride=1,
                          padding=0, bias=True))
            if 'hm' in head:
                fc[-1].bias.data.fill_(-2.19)
            else:
                fill_fc_weights(fc)
            self.__setattr__(head, fc)

        del base

    def forward(self, x):
        features = self.encoder(x)
        x = self.decoder(*features)

        z = {}
        for head in self.heads:
            z[head] = self.__getattr__(head)(x)
        return [z]

    def freeze_backbone(self):
        for p in self.encoder.parameters():
            p.requires_grad = False
        for p in self.decoder.parameters():
            p.requires_grad = False

    def freeze_head(self, heads):
        for head in heads:
            for p in self.__getattr__(head).parameters():
                p.requires_grad = False

    def set_mode(self, mode, is_freeze_bn=False):
        self.mode = mode
        if mode in ['eval', 'valid', 'test']:
            self.eval()
        elif mode in ['train']:
            self.train()
            if is_freeze_bn==True: ##freeze
                for m in self.modules():
                    if isinstance(m, nn.BatchNorm2d):
                        m.eval()
                        # m.weight.requires_grad = False
                        # m.bias.requires_grad   = False

def fill_fc_weights(layers):
    for m in layers.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.normal_(m.weight, std=0.001)
            # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
            # torch.nn.init.xavier_normal_(m.weight.data)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)


def get_pose_net(base_name, heads, head_conv):
    model = PoseFPNNet(base_name, heads, head_conv)
    return model

In [5]:
model = get_pose_net(opt.arch, opt.heads, opt.head_conv)
checkpoint = torch.load(opt.load_model, map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['state_dict'])

del checkpoint
gc.collect()

0

# Preapre labels

In [6]:
DIR_INPUT = '../../input'
DIR_TRAIN = f'{DIR_INPUT}/train'
DIR_TEST = f'{DIR_INPUT}/test'

train_df = pd.read_csv(f'{DIR_INPUT}/train.csv')
train_df.shape

(147793, 5)

In [7]:
train_df['x'] = -1
train_df['y'] = -1
train_df['w'] = -1
train_df['h'] = -1

def expand_bbox(x):
    r = np.array(re.findall("([0-9]+[.]?[0-9]*)", x))
    if len(r) == 0:
        r = [-1, -1, -1, -1]
    return r

train_df[['x', 'y', 'w', 'h']] = np.stack(train_df['bbox'].apply(lambda x: expand_bbox(x)))
train_df.drop(columns=['bbox'], inplace=True)
train_df['x'] = train_df['x'].astype(np.float)
train_df['y'] = train_df['y'].astype(np.float)
train_df['w'] = train_df['w'].astype(np.float)
train_df['h'] = train_df['h'].astype(np.float)

train_df.head()

Unnamed: 0,image_id,width,height,source,x,y,w,h
0,b6ab77fd7,1024,1024,usask_1,834.0,222.0,56.0,36.0
1,b6ab77fd7,1024,1024,usask_1,226.0,548.0,130.0,58.0
2,b6ab77fd7,1024,1024,usask_1,377.0,504.0,74.0,160.0
3,b6ab77fd7,1024,1024,usask_1,834.0,95.0,109.0,107.0
4,b6ab77fd7,1024,1024,usask_1,26.0,144.0,124.0,117.0


# Define detector

In [11]:
detector = CtdetDetector(opt, model)

Creating model...


In [12]:
ret = detector.run('../data/wheat/images/073d83544.jpg')

In [20]:
ret.keys()

dict_keys(['results', 'tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'])

In [32]:
ret['results'][1][3]

array([9.6229865e+02, 3.3121985e+02, 1.0263778e+03, 4.3293738e+02,
       6.3836569e-01], dtype=float32)