In [7]:
import pandas as pd
from torch.utils import data
import numpy as np
import torch
import os
from PIL import Image
import random
import cv2

HEIGHT=288
WIDTH=512
mag = 1
sigma = 2.5

def genHeatMap(w, h, cx, cy, r, mag):
    if cx < 0 or cy < 0:
      return np.zeros((h, w))
    x, y = np.meshgrid(np.linspace(1, w, w), np.linspace(1, h, h))
    heatmap = ((y - (cy + 1))**2) + ((x - (cx + 1))**2)
    heatmap[heatmap <= r**2] = 1
    heatmap[heatmap > r**2] = 0
    return heatmap*mag


def getData(mode):
    if mode == 'train':
        img = pd.read_csv('tracknet_train_list_x_1.csv')
        label = pd.read_csv('tracknet_train_list_y_1.csv')
        return np.squeeze(img.values), np.squeeze(label.values)
    else:
        img = pd.read_csv('tracknet_test_list_x_1.csv')
        label = pd.read_csv('tracknet_test_list_y_1.csv')
        return np.squeeze(img.values), np.squeeze(label.values)


class TrackNetLoader(data.Dataset):
    def __init__(self, root, mode):
        self.root = root
        self.img_name, self.label_name = getData(mode)
        self.mode = mode
        img = Image.open(self.img_name[0][0]).convert('LA')
        w, h = img.size
        self.ratio = h / HEIGHT
        print("> Found %d data..." % (len(self.img_name)))

    def __len__(self):
        """'return the size of dataset"""
        return len(self.img_name)

    def __getitem__(self, index):
        img_path = self.img_name[index]
        label_path = self.label_name[index]
        img_all = []
        label_all = []
        for i in range(3):
            x = Image.open(img_path[i]).convert('RGB')
            x = x.resize((WIDTH, HEIGHT))
            
            x = np.asarray(x).transpose(2, 0, 1) / 255.0
            #x = x.resize((WIDTH, HEIGHT, 3))

            img_all.append(x[0])
            img_all.append(x[1])
            img_all.append(x[2])

            y = Image.open(label_path[i])
            y = np.asarray(y) / 255.0
            label_all.append(y)

        img_all = np.asarray(img_all)
        label_all = np.asarray(label_all)
        '''
        if self.mode == 'train':
          if random.random() < 0.5:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
        '''
        return img_all, label_all

'''
img = pd.read_csv('tracknet_train_list_x.csv')
label = pd.read_csv('tracknet_train_list_y.csv')
img = np.squeeze(img.values)
label = np.squeeze(label.values)

img_path = img[0]
label_path = label[0]
img_all = []
label_all = []

img_all = np.asarray(img_all)
label_all = np.asarray(label_all)
img_all = (img_all/255)
print((img_all))
print((label_all).shape)
'''

"\nimg = pd.read_csv('tracknet_train_list_x.csv')\nlabel = pd.read_csv('tracknet_train_list_y.csv')\nimg = np.squeeze(img.values)\nlabel = np.squeeze(label.values)\n\nimg_path = img[0]\nlabel_path = label[0]\nimg_all = []\nlabel_all = []\n\nimg_all = np.asarray(img_all)\nlabel_all = np.asarray(label_all)\nimg_all = (img_all/255)\nprint((img_all))\nprint((label_all).shape)\n"

In [8]:
import os
import sys
import json
import torch
from torch.utils.data import TensorDataset, DataLoader
import torchvision.models as models
import numpy as np
import itertools
import cv2
import math

import time

class TrackNet3(torch.nn.Module):
	def __init__(self, input_height=288, input_width=512 ): #input_height = 288, input_width = 512
		super(TrackNet3, self).__init__()

		#Layer1
		self.conv1 = torch.nn.Conv2d(9, 64, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor1 = torch.nn.BatchNorm2d(64)

		#Layer2
		self.conv2 = torch.nn.Conv2d(64, 64, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor2 = torch.nn.BatchNorm2d(64)

		#Layer3
		self.max3 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

		#Layer4
		self.conv4 = torch.nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor4 = torch.nn.BatchNorm2d(128)

		#Layer5
		self.conv5 = torch.nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor5 = torch.nn.BatchNorm2d(128)

		#Layer6
		self.max6 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

		#Layer7
		self.conv7 = torch.nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor7 = torch.nn.BatchNorm2d(256)

		#Layer8
		self.conv8 = torch.nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor8 = torch.nn.BatchNorm2d(256)

		#Layer9
		self.conv9 = torch.nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor9 = torch.nn.BatchNorm2d(256)

		#Layer10
		self.max10 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

		#Layer11
		self.conv11 = torch.nn.Conv2d(256, 512, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor11 = torch.nn.BatchNorm2d(512)

		#Layer12
		self.conv12 = torch.nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor12 = torch.nn.BatchNorm2d(512)

		#Layer13
		self.conv13 = torch.nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor13 = torch.nn.BatchNorm2d(512)

		#Layer14
		#upsample (2,2) 13 layer output和9 layer的output concat axis =1

		#Layer15
		self.conv15 = torch.nn.Conv2d(768, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor15 = torch.nn.BatchNorm2d(256)

		#Layer16
		self.conv16 = torch.nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor16 = torch.nn.BatchNorm2d(256)

		#Layer17
		self.conv17 = torch.nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor17 = torch.nn.BatchNorm2d(256)

		#Layer18
		#upsample (2,2) 17 layer output和5 layer的output concat axis =1

		#Layer19
		self.conv19 = torch.nn.Conv2d(384, 128, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor19 = torch.nn.BatchNorm2d(128)

		#Layer20
		self.conv20 = torch.nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor20 = torch.nn.BatchNorm2d(128)

		#Layer21
		#upsample (2,2) 20 layer output和2 layer的output concat axis =1

		#Layer22
		self.conv22 = torch.nn.Conv2d(192, 64, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor22 = torch.nn.BatchNorm2d(64)

		#Layer23
		self.conv23 = torch.nn.Conv2d(64, 64, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor23 = torch.nn.BatchNorm2d(64)

		#Layer24
		self.conv24 = torch.nn.Conv2d(64, 3, kernel_size = 1, stride = 1)

		self.upsample = torch.nn.UpsamplingBilinear2d(scale_factor=2)
		self.act = torch.nn.ReLU()

	def forward(self, x):
		x = self.conv1(x)
		x = self.act(x)
		x = self.nor1(x)

		x = self.conv2(x)
		x = self.act(x)
		x1 = self.nor2(x)

		x = self.max3(x1)

		x = self.conv4(x)
		x = self.act(x)
		x = self.nor4(x)

		x = self.conv5(x)
		x = self.act(x)
		x2 = self.nor5(x)

		x = self.max6(x2)

		x = self.conv7(x)
		x = self.act(x)
		x = self.nor7(x)

		x = self.conv8(x)
		x = self.act(x)
		x = self.nor8(x)

		x = self.conv9(x)
		x = self.act(x)
		x3 = self.nor9(x)

		x = self.max10(x3)

		x = self.conv11(x)
		x = self.act(x)
		x = self.nor11(x)

		x = self.conv12(x)
		x = self.act(x)
		x = self.nor12(x)

		x = self.conv13(x)
		x = self.act(x)
		x = self.nor13(x)

		x = torch.cat((self.upsample(x), x3), 1)

		x = self.conv15(x)
		x = self.act(x)
		x = self.nor15(x)

		x = self.conv16(x)
		x = self.act(x)
		x = self.nor16(x)

		x = self.conv17(x)
		x = self.act(x)
		x = self.nor17(x)

		x = torch.cat((self.upsample(x), x2), 1)

		x = self.conv19(x)
		x = self.act(x)
		x = self.nor19(x)

		x = self.conv20(x)
		x = self.act(x)
		x = self.nor20(x)

		x = torch.cat((self.upsample(x), x1), 1)

		x = self.conv22(x)
		x = self.act(x)
		x = self.nor22(x)

		x = self.conv23(x)
		x = self.act(x)
		x = self.nor23(x)

		x = self.conv24(x)
		x = torch.sigmoid(x)

		return x

In [38]:
import os
import sys
import json
import torch
from torch.utils.data import TensorDataset, DataLoader
import torchvision.models as models
import numpy as np
import itertools
import cv2
import math

class TrackNet3(torch.nn.Module):
	def __init__(self, input_height=288, input_width=512 ): #input_height = 288, input_width = 512
		super(TrackNet3, self).__init__()

		#Layer1
		self.conv1 = torch.nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor1 = torch.nn.BatchNorm2d(64)

		#Layer2
		self.conv2 = torch.nn.Conv2d(64, 64, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor2 = torch.nn.BatchNorm2d(64)

		#Layer3
		self.max3 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

		#Layer4
		self.conv4 = torch.nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor4 = torch.nn.BatchNorm2d(128)

		#Layer5
		self.conv5 = torch.nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor5 = torch.nn.BatchNorm2d(128)

		#Layer6
		self.max6 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

		#Layer7
		self.conv7 = torch.nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor7 = torch.nn.BatchNorm2d(256)

		#Layer8
		self.conv8 = torch.nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor8 = torch.nn.BatchNorm2d(256)

		#Layer9
		self.conv9 = torch.nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor9 = torch.nn.BatchNorm2d(256)

		#Layer10
		self.max10 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

		#Layer11
		self.conv11 = torch.nn.Conv2d(256, 512, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor11 = torch.nn.BatchNorm2d(512)

		#Layer12
		self.conv12 = torch.nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor12 = torch.nn.BatchNorm2d(512)

		#Layer13
		self.conv13 = torch.nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor13 = torch.nn.BatchNorm2d(512)

		#Layer14
		#upsample (2,2) 13 layer output和9 layer的output concat axis =1

		#Layer15
		self.conv15 = torch.nn.Conv2d(768, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor15 = torch.nn.BatchNorm2d(256)

		#Layer16
		self.conv16 = torch.nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor16 = torch.nn.BatchNorm2d(256)

		#Layer17
		self.conv17 = torch.nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor17 = torch.nn.BatchNorm2d(256)

		#Layer18
		#upsample (2,2) 17 layer output和5 layer的output concat axis =1

		#Layer19
		self.conv19 = torch.nn.Conv2d(384, 128, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor19 = torch.nn.BatchNorm2d(128)

		#Layer20
		self.conv20 = torch.nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor20 = torch.nn.BatchNorm2d(128)

		#Layer21
		#upsample (2,2) 20 layer output和2 layer的output concat axis =1

		#Layer22
		self.conv22 = torch.nn.Conv2d(192, 64, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor22 = torch.nn.BatchNorm2d(64)

		#Layer23
		self.conv23 = torch.nn.Conv2d(64, 64, kernel_size = 3, stride = 1, padding=(1,1))
		self.nor23 = torch.nn.BatchNorm2d(64)

		#Layer24
		self.conv24 = torch.nn.Conv2d(64, 3, kernel_size = 1, stride = 1)

		self.upsample = torch.nn.UpsamplingBilinear2d(scale_factor=2)
		self.act = torch.nn.ReLU()

	def forward(self, x):
		x = self.conv1(x)
		x = self.act(x)
		x = self.nor1(x)

		x = self.conv2(x)
		x = self.act(x)
		x1 = self.nor2(x)

		x = self.max3(x1)

		x = self.conv4(x)
		x = self.act(x)
		x = self.nor4(x)

		x = self.conv5(x)
		x = self.act(x)
		x2 = self.nor5(x)

		x = self.max6(x2)

		x = self.conv7(x)
		x = self.act(x)
		x = self.nor7(x)

		x = self.conv8(x)
		x = self.act(x)
		x = self.nor8(x)

		x = self.conv9(x)
		x = self.act(x)
		x3 = self.nor9(x)

		x = self.max10(x3)

		x = self.conv11(x)
		x = self.act(x)
		x = self.nor11(x)

		x = self.conv12(x)
		x = self.act(x)
		x = self.nor12(x)

		x = self.conv13(x)
		x = self.act(x)
		x = self.nor13(x)

		x = torch.cat((self.upsample(x), x3), 1)

		x = self.conv15(x)
		x = self.act(x)
		x = self.nor15(x)

		x = self.conv16(x)
		x = self.act(x)
		x = self.nor16(x)

		x = self.conv17(x)
		x = self.act(x)
		x = self.nor17(x)

		x = torch.cat((self.upsample(x), x2), 1)

		x = self.conv19(x)
		x = self.act(x)
		x = self.nor19(x)

		x = self.conv20(x)
		x = self.act(x)
		x = self.nor20(x)

		x = torch.cat((self.upsample(x), x1), 1)

		#x = self.conv22(x)
		#x = self.act(x)
		#x = self.nor22(x)

		#x = self.conv23(x)
		#x = self.act(x)
		#x = self.nor23(x)

		#x = self.conv24(x)
		x = torch.sigmoid(x)

		return x

In [39]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('GPU Use : ',torch.cuda.is_available())

GPU Use :  True


In [40]:
model = TrackNet3()
model.to(device)

optimizer = torch.optim.Adadelta(model.parameters(), lr = 1, rho=0.9, eps=1e-06, weight_decay=0)

In [44]:
time_list = []

for i in range(100):
    data = np.random.rand(3,288,512).astype('float32')/255
    start = time.time()
    
    unit = torch.from_numpy(np.asarray([data])).to(device)
    with torch.no_grad():
        torch.cuda.synchronize()

        h_pred = model(unit)
        torch.cuda.synchronize()
        
        end = time.time()
        
        time_list.append(1/(end - start))

print(h_pred.size())


torch.Size([1, 192, 288, 512])


In [42]:
1/np.mean(time_list)

0.039695875228016424

In [43]:
np.mean(time_list)

25.191534240167687

In [None]:
gray_10 : 0.049831402065306396
gray_6 : 0.05023368626709554    
gray_3 : 0.04918814090584544

In [47]:
from torchvision import models
import torch.nn as nn
import timm


def convert_to_inplace_relu(model):
    for m in model.modules():
        if isinstance(m, nn.ReLU):
            m.inplace = True


class ResNet(nn.Module):
    def __init__(self, slug='r50', pretrained=True):
        super().__init__()
        if not pretrained:
            print("Caution, not loading pretrained weights.")

        if slug == 'r18':
            self.resnet = models.resnet18(pretrained=pretrained)
            num_bottleneck_filters = 512
        elif slug == 'r34':
            self.resnet = models.resnet34(pretrained=pretrained)
            num_bottleneck_filters = 512
        elif slug == 'r50':
            self.resnet = models.resnet50(pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif slug == 'r101':
            self.resnet = models.resnet101(pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif slug == 'r152':
            self.resnet = models.resnet152(pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif slug == 'rx50':
            self.resnet = models.resnext50_32x4d(pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif slug == 'rx101':
            self.resnet = models.resnext101_32x8d(pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif slug == 'r50d':
            self.resnet = timm.create_model('gluon_resnet50_v1d',
                                            pretrained=pretrained)
            convert_to_inplace_relu(self.resnet)
            num_bottleneck_filters = 2048
        elif slug == 'r101d':
            self.resnet = timm.create_model('gluon_resnet101_v1d',
                                            pretrained=pretrained)
            convert_to_inplace_relu(self.resnet)
            num_bottleneck_filters = 2048

        else:
            assert False, "Bad slug: %s" % slug

        self.outplanes = num_bottleneck_filters

    def forward(self, x):
        size = x.size()
        assert size[-1] % 32 == 0 and size[-2] % 32 == 0, \
            "image resolution has to be divisible by 32 for resnet"

        enc0 = self.resnet.conv1(x)
        enc0 = self.resnet.bn1(enc0)
        enc0 = self.resnet.relu(enc0)
        enc0 = self.resnet.maxpool(enc0)

        enc1 = self.resnet.layer1(enc0)
        enc2 = self.resnet.layer2(enc1)
        enc3 = self.resnet.layer3(enc2)
        enc4 = self.resnet.layer4(enc3)

        return enc1, enc2, enc3, enc4

    def freeze_bn(self):
        for layer in self.modules():
            if isinstance(layer, nn.BatchNorm2d):
                layer.eval()

    def freeze_stages(self, stage):
        if stage >= 0:
            self.resnet.bn1.eval()
            for m in [self.resnet.conv1, self.resnet.bn1]:
                for param in m.parameters():
                    param.requires_grad = False
        for i in range(1, stage + 1):
            layer = getattr(self.resnet, 'layer{}'.format(i))
            layer.eval()
            for param in layer.parameters():
                param.requires_grad = False

In [None]:
from model.decoder import Decoder
from model.head import Head
from model.fpn import FPN
from loss.utils import map2coords
import torch
from torch import nn
import torch.nn.functional as F


def gather_feature(fmap, index, mask=None, use_transform=False):
    if use_transform:
        # change a (N, C, H, W) tenor to (N, HxW, C) shape
        batch, channel = fmap.shape[:2]
        fmap = fmap.view(batch, channel, -1).permute((0, 2, 1)).contiguous()

    dim = fmap.size(-1)
    index = index.unsqueeze(len(index.shape)).expand(*index.shape, dim)
    fmap = fmap.gather(dim=1, index=index)
    if mask is not None:
        # this part is not called in Res18 dcn COCO
        mask = mask.unsqueeze(2).expand_as(fmap)
        fmap = fmap[mask]
        fmap = fmap.reshape(-1, dim)
    return fmap


class CenterNet(nn.Module):
    def __init__(self, cfg):
        super(CenterNet, self).__init__()
        self.backbone = ResNet(cfg.slug)
        if cfg.fpn:
            self.fpn = FPN(self.backbone.outplanes)
        self.upsample = Decoder(self.backbone.outplanes if not cfg.fpn else 2048, cfg.bn_momentum)
        self.head = Head(channel=cfg.head_channel, num_classes=cfg.num_classes)

        self._fpn = cfg.fpn
        self.down_stride = cfg.down_stride
        self.score_th = cfg.score_th
        self.CLASSES_NAME = cfg.CLASSES_NAME

    def forward(self, x):
        feats = self.backbone(x)
        if self._fpn:
            feat = self.fpn(feats)
        else:
            feat = feats[-1]
        return self.head(self.upsample(feat))

    @torch.no_grad()
    def inference(self, img, infos, topK=40, return_hm=False, th=None):
        feats = self.backbone(img)
        if self._fpn:
            feat = self.fpn(feats)
        else:
            feat = feats[-1]
        pred_hm, pred_wh, pred_offset = self.head(self.upsample(feat))

        _, _, h, w = img.shape
        b, c, output_h, output_w = pred_hm.shape
        pred_hm = self.pool_nms(pred_hm)
        scores, index, clses, ys, xs = self.topk_score(pred_hm, K=topK)

        reg = gather_feature(pred_offset, index, use_transform=True)
        reg = reg.reshape(b, topK, 2)
        xs = xs.view(b, topK, 1) + reg[:, :, 0:1]
        ys = ys.view(b, topK, 1) + reg[:, :, 1:2]

        wh = gather_feature(pred_wh, index, use_transform=True)
        wh = wh.reshape(b, topK, 2)

        clses = clses.reshape(b, topK, 1).float()
        scores = scores.reshape(b, topK, 1)

        half_w, half_h = wh[..., 0:1] / 2, wh[..., 1:2] / 2
        bboxes = torch.cat([xs - half_w, ys - half_h, xs + half_w, ys + half_h], dim=2)

        detects = []
        for batch in range(b):
            mask = scores[batch].gt(self.score_th if th is None else th)

            batch_boxes = bboxes[batch][mask.squeeze(-1), :]
            # batch_boxes[:, [0, 2]] *= infos[batch]['raw_width'] / output_w
            # batch_boxes[:, [1, 3]] *= infos[batch]['raw_height'] / output_h
            batch_boxes[:, [0, 2]] *= w / output_w
            batch_boxes[:, [1, 3]] *= h / output_h

            batch_scores = scores[batch][mask]

            batch_clses = clses[batch][mask]
            batch_clses = [self.CLASSES_NAME[int(cls.item())] for cls in batch_clses]

            detects.append([batch_boxes, batch_scores, batch_clses, pred_hm[batch] if return_hm else None])
        return detects

    def pool_nms(self, hm, pool_size=3):
        pad = (pool_size - 1) // 2
        hm_max = F.max_pool2d(hm, pool_size, stride=1, padding=pad)
        keep = (hm_max == hm).float()
        return hm * keep

    def topk_score(self, scores, K):
        batch, channel, height, width = scores.shape

        # get topk score and its index in every H x W(channel dim) feature map
        topk_scores, topk_inds = torch.topk(scores.reshape(batch, channel, -1), K)

        topk_inds = topk_inds % (height * width)
        topk_ys = (topk_inds / width).int().float()
        topk_xs = (topk_inds % width).int().float()

        # get all topk in in a batch
        topk_score, index = torch.topk(topk_scores.reshape(batch, -1), K)
        # div by K because index is grouped by K(C x K shape)
        topk_clses = (index / K).int()
        topk_inds = gather_feature(topk_inds.view(batch, -1, 1), index).reshape(batch, K)
        topk_ys = gather_feature(topk_ys.reshape(batch, -1, 1), index).reshape(batch, K)
        topk_xs = gather_feature(topk_xs.reshape(batch, -1, 1), index).reshape(batch, K)

        return topk_score, topk_inds, topk_clses, topk_ys, topk_xs

In [65]:
import torchvision.models as models
import torchsummary
resnet101 = models.resnet101(pretrained=True)

resnet101.to(device)

torchsummary.summary(resnet101, (3, 288, 512),device='cuda')

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [61]:
time_list = []
data = np.random.rand(3,480,640).astype('float32')/255
start = time.time()

unit = torch.from_numpy(np.asarray([data])).to(device)
with torch.no_grad():
    torch.cuda.synchronize()

    h_pred = resnet101(unit)
    torch.cuda.synchronize()

    end = time.time()

    time_list.append(1/(end - start))

In [59]:
time_list

[19.551768341856118]

In [73]:
resnet18 = models.resnet18(pretrained=True)

resnet18.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/drcl_yang/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100.0%


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [74]:
torchsummary.summary(resnet18, (3, 288, 512),device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 144, 256]           9,408
       BatchNorm2d-2         [-1, 64, 144, 256]             128
              ReLU-3         [-1, 64, 144, 256]               0
         MaxPool2d-4          [-1, 64, 72, 128]               0
            Conv2d-5          [-1, 64, 72, 128]          36,864
       BatchNorm2d-6          [-1, 64, 72, 128]             128
              ReLU-7          [-1, 64, 72, 128]               0
            Conv2d-8          [-1, 64, 72, 128]          36,864
       BatchNorm2d-9          [-1, 64, 72, 128]             128
             ReLU-10          [-1, 64, 72, 128]               0
       BasicBlock-11          [-1, 64, 72, 128]               0
           Conv2d-12          [-1, 64, 72, 128]          36,864
      BatchNorm2d-13          [-1, 64, 72, 128]             128
             ReLU-14          [-1, 64, 

In [106]:
backbone = models.resnet18(pretrained=True)

backbone.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [112]:
res5c_output = None

def res5c_hook(module, input_, output):
    global res5c_output
    res5c_output = output

backbone.layer2[1].bn2.register_forward_hook(res5c_hook)


<torch.utils.hooks.RemovableHandle at 0x7f9c8b5405d0>

In [113]:
time_list = []
data = np.random.rand(3,480,640).astype('float32')/255
start = time.time()

unit = torch.from_numpy(np.asarray([data])).to(device)
with torch.no_grad():
    torch.cuda.synchronize()

    h_pred = backbone(unit)
    torch.cuda.synchronize()

    end = time.time()

    time_list.append(1/(end - start))

In [1]:
import os
import sys
import json
import torch
import argparse
from torch.utils.data import TensorDataset, DataLoader
import torchvision.models as models
import numpy as np
from dataloader_custom import TrackNetLoader
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools
import cv2
import math
from PIL import Image
import time
from network import *
from utils import *


In [5]:
import gc

torch.cuda.empty_cache()
gc.collect()

5955

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('GPU Use : ', torch.cuda.is_available())

model = efficientnet_b3()
model.to(device)

GPU Use :  True


EfficientNet(
  (stage1): Sequential(
    (0): Conv2d(9, 38, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(38, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)
  )
  (stage2): Sequential(
    (0): SepConv(
      (residual): Sequential(
        (0): Conv2d(38, 38, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=38, bias=False)
        (1): BatchNorm2d(38, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)
        (2): Swish(
          (sigmoid): Sigmoid()
        )
      )
      (se): SEBlock(
        (squeeze): AdaptiveAvgPool2d(output_size=(1, 1))
        (excitation): Sequential(
          (0): Linear(in_features=38, out_features=152, bias=True)
          (1): Swish(
            (sigmoid): Sigmoid()
          )
          (2): Linear(in_features=152, out_features=38, bias=True)
          (3): Sigmoid()
        )
      )
      (project): Sequential(
        (0): Conv2d(38, 19, kernel_size=(1, 1), stride=(1, 1

In [4]:
time_list = []
data = np.random.rand(9,288,522).astype('float32')/255
start = time.time()

unit = torch.from_numpy(np.asarray([data])).to(device)
with torch.no_grad():
    torch.cuda.synchronize()

    h_pred = model(unit)
    torch.cuda.synchronize()

    end = time.time()

    time_list.append(1/(end - start))

RuntimeError: The size of tensor a (33) must match the size of tensor b (34) at non-singleton dimension 3