# YOLOX Custom Pre-Trained Weights

I trained a custom YOLOX model with two classes (`person`,`employee`). I trained the model for 10 epochs and received a `*.pth` file for each epoch.

On below cells, we will load the 10th epoch file and load its pre-trained weights to load it to our model.

From below, we will see that the format is the same as a regular detection checkpoint file.

In [3]:
import torch
data_path = r"D:\Github\mmtracking\mmtracking\train_configs\det\yolox\cfg000\logs\latest.pth"
data = torch.load(data_path)
data.keys()

dict_keys(['meta', 'state_dict', 'optimizer'])

# YOLOX Architecture

I want to perform transfer learning on YOLOX. To do this, I want to freeze all weights except for that of the head. As such, I need to make sure I can freeze all this weights except for the head

In [1]:
import torch
from mmdet.apis import init_detector

# build module by passing pretrained weights on config file
config = "D:\Github\mmtracking\mmtracking\configs\det\det_format\yolox_tiny_8x8_300e_coco.py"
checkpoint = None
device = torch.device("cuda:0")

model = init_detector(config, checkpoint, device)
model 

  from .autonotebook import tqdm as notebook_tqdm


YOLOX(
  (backbone): CSPDarknet(
    (stem): Focus(
      (conv): ConvModule(
        (conv): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(24, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): Swish()
      )
    )
    (stage1): Sequential(
      (0): ConvModule(
        (conv): Conv2d(24, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): Swish()
      )
      (1): CSPLayer(
        (main_conv): ConvModule(
          (conv): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(24, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (activate): Swish()
        )
        (short_conv): ConvModule(
          (conv): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(24, 

In [7]:
for param in model.parameters():
    param.requires_grad = False

list(model.bbox_head.parameters())

[Parameter containing:
 tensor([[[[ 0.0704, -0.0003,  0.0159],
           [-0.0067, -0.0503,  0.0158],
           [ 0.0222,  0.0347,  0.0174]],
 
          [[ 0.0089,  0.0149,  0.0116],
           [ 0.0717,  0.0007,  0.0354],
           [-0.0260, -0.0376,  0.0122]],
 
          [[-0.0516, -0.0832, -0.0203],
           [-0.1305, -0.0300,  0.0258],
           [-0.0022,  0.0073, -0.0344]],
 
          ...,
 
          [[ 0.1244,  0.0064,  0.0092],
           [-0.0190, -0.0622, -0.0171],
           [-0.0156,  0.0831, -0.0109]],
 
          [[-0.0361,  0.0636,  0.0060],
           [-0.0646, -0.0304, -0.0590],
           [-0.0095, -0.0705,  0.0625]],
 
          [[-0.0620,  0.0736,  0.0299],
           [-0.0489, -0.0270, -0.0566],
           [ 0.0038, -0.0290,  0.0294]]],
 
 
         [[[-0.0926, -0.0045,  0.0073],
           [ 0.0410,  0.0350,  0.0964],
           [-0.0513, -0.0374, -0.0762]],
 
          [[-0.0030, -0.0211,  0.0473],
           [ 0.0020,  0.0533, -0.0285],
           [ 0.0

In [8]:
for param in model.bbox_head.parameters():
    param.requires_grad=True

list(model.bbox_head.parameters())[0]

Parameter containing:
tensor([[[[ 0.0704, -0.0003,  0.0159],
          [-0.0067, -0.0503,  0.0158],
          [ 0.0222,  0.0347,  0.0174]],

         [[ 0.0089,  0.0149,  0.0116],
          [ 0.0717,  0.0007,  0.0354],
          [-0.0260, -0.0376,  0.0122]],

         [[-0.0516, -0.0832, -0.0203],
          [-0.1305, -0.0300,  0.0258],
          [-0.0022,  0.0073, -0.0344]],

         ...,

         [[ 0.1244,  0.0064,  0.0092],
          [-0.0190, -0.0622, -0.0171],
          [-0.0156,  0.0831, -0.0109]],

         [[-0.0361,  0.0636,  0.0060],
          [-0.0646, -0.0304, -0.0590],
          [-0.0095, -0.0705,  0.0625]],

         [[-0.0620,  0.0736,  0.0299],
          [-0.0489, -0.0270, -0.0566],
          [ 0.0038, -0.0290,  0.0294]]],


        [[[-0.0926, -0.0045,  0.0073],
          [ 0.0410,  0.0350,  0.0964],
          [-0.0513, -0.0374, -0.0762]],

         [[-0.0030, -0.0211,  0.0473],
          [ 0.0020,  0.0533, -0.0285],
          [ 0.0107,  0.0550,  0.0232]],

         

In [5]:
list(model.bbox_head.parameters())

[Parameter containing:
 tensor([[[[ 0.0704, -0.0003,  0.0159],
           [-0.0067, -0.0503,  0.0158],
           [ 0.0222,  0.0347,  0.0174]],
 
          [[ 0.0089,  0.0149,  0.0116],
           [ 0.0717,  0.0007,  0.0354],
           [-0.0260, -0.0376,  0.0122]],
 
          [[-0.0516, -0.0832, -0.0203],
           [-0.1305, -0.0300,  0.0258],
           [-0.0022,  0.0073, -0.0344]],
 
          ...,
 
          [[ 0.1244,  0.0064,  0.0092],
           [-0.0190, -0.0622, -0.0171],
           [-0.0156,  0.0831, -0.0109]],
 
          [[-0.0361,  0.0636,  0.0060],
           [-0.0646, -0.0304, -0.0590],
           [-0.0095, -0.0705,  0.0625]],
 
          [[-0.0620,  0.0736,  0.0299],
           [-0.0489, -0.0270, -0.0566],
           [ 0.0038, -0.0290,  0.0294]]],
 
 
         [[[-0.0926, -0.0045,  0.0073],
           [ 0.0410,  0.0350,  0.0964],
           [-0.0513, -0.0374, -0.0762]],
 
          [[-0.0030, -0.0211,  0.0473],
           [ 0.0020,  0.0533, -0.0285],
           [ 0.0

In [4]:
list(model.parameters())[0]

[Parameter containing:
 tensor([[[[-2.3688e-02, -5.8365e-02, -6.3470e-02],
           [ 1.0824e-01, -9.2033e-02, -6.6051e-02],
           [ 6.9751e-03,  1.3026e-02,  9.2592e-03]],
 
          [[-1.1865e-02,  9.3924e-02,  6.5384e-04],
           [-2.5372e-01,  7.1661e-02, -1.0275e-01],
           [ 7.5341e-03, -4.6367e-03, -1.1382e-01]],
 
          [[-1.2815e-01,  1.5308e-02, -1.0328e-02],
           [-3.2702e-02,  1.4613e-01,  1.4799e-01],
           [-2.3435e-01, -1.4049e-01, -2.5970e-03]],
 
          ...,
 
          [[ 7.3458e-02, -1.0477e-01,  6.0291e-02],
           [ 1.8257e-02,  6.7739e-02, -1.3320e-01],
           [ 3.9571e-02, -4.9357e-02, -2.6695e-03]],
 
          [[ 3.4897e-03, -6.6213e-02,  2.9794e-02],
           [ 2.2334e-02, -8.1425e-02, -6.4227e-02],
           [-8.0590e-02, -7.3783e-02,  9.1577e-02]],
 
          [[-7.8385e-02,  6.5655e-02, -7.3153e-02],
           [-2.8844e-02,  2.4029e-02, -8.8947e-02],
           [ 3.8886e-02,  9.3592e-02,  1.4353e-01]]],
 
 
   

# `CLASSES` checkpoint?

It seems that `init_model` for building tracking modules does NOT load `self.CLASSES` if no checkpoint is given. As such, below we will create a checkpoint that contains `CLASSES`, load it during building and see if this fixes the no classes warning.

Below worked! The only thing is that it is going to give you an error that `reid.{layer}` and `detector.{layer}` weights are missing. 

In [14]:
import torch
out = "D:\Github\mmtracking\mmtracking\checkpoint\CLASSES_checkpoint.pth"
from mmdet.datasets.coco import CocoDataset
CLASSES = CocoDataset.CLASSES
data = dict(meta = dict(CLASSES = CLASSES))
torch.save(data, out)


# Custom COCO

Below I will import some COCO annotations with three labels:
* Erick
* Sarah
* person

I want to transform it so that 2 labels exist: 
* Erick
* person


In [15]:
import json
coco_path = r"D:\Photos\Erick_photos\Erick Pictures\22_07_11\cvat_coco_annts\annotations\instances_default.json"

with open(coco_path, 'r') as file:
    data = json.load(file)

data.keys()

dict_keys(['licenses', 'info', 'categories', 'images', 'annotations'])

In [17]:
data["categories"]

[{'id': 1, 'name': 'Erick', 'supercategory': ''},
 {'id': 2, 'name': 'Sarah', 'supercategory': ''},
 {'id': 3, 'name': 'person', 'supercategory': ''}]

In [18]:
data["annotations"][0]

{'id': 1,
 'image_id': 1,
 'category_id': 1,
 'segmentation': [],
 'area': 1696806.8228,
 'bbox': [173.7, 639.09, 986.12, 1720.69],
 'iscrowd': 0,
 'attributes': {'occluded': False, 'rotation': 0.0}}

In [None]:
del data["categories"][2] # delete "person" category
data["categories"][1]["name"] = "person" # replace "Sarah" category with "person"
annts = data["annotations"]
for annt in annts:
    if annt["category_id"] == 3:
        annt["category_id"] = 2

# Visualize only `Person` category?

It seems to me that when we visualize mot results, the `show_result` methods automatically filters out all classes that are not `Person`. As such, below we will explore this concept

In [1]:
# deepsort
from mmtrack.apis import inference_mot, init_model
from mmdet.datasets.pipelines import Compose

config = "D:\Github\mmtracking\mmtracking\configs\mot\deepsort\deepsort_yolox_tiny_8x8_mot17-private-half_v2.py"
checkpoint = None
device = "cuda:0"
score_thr =  .01
frame_id = 0

model = init_model(config, checkpoint, device=device)

?model.show_result

  from .autonotebook import tqdm as notebook_tqdm
2022-07-11 08:36:56,781 - mmtrack - INFO - initialize YOLOX with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth'}
2022-07-11 08:36:56,782 - mmcv - INFO - load model from: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-11 08:36:56,787 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-11 08:36:57,029 - mmtrack - INFO - initialize BaseReID with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth'}
2022-07-11 08:36:57,030 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/



[1;31mSignature:[0m
[0mmodel[0m[1;33m.[0m[0mshow_result[0m[1;33m([0m[1;33m
[0m    [0mimg[0m[1;33m,[0m[1;33m
[0m    [0mresult[0m[1;33m,[0m[1;33m
[0m    [0mscore_thr[0m[1;33m=[0m[1;36m0.0[0m[1;33m,[0m[1;33m
[0m    [0mthickness[0m[1;33m=[0m[1;36m1[0m[1;33m,[0m[1;33m
[0m    [0mfont_scale[0m[1;33m=[0m[1;36m0.5[0m[1;33m,[0m[1;33m
[0m    [0mshow[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mout_file[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mwait_time[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mbackend[0m[1;33m=[0m[1;34m'cv2'[0m[1;33m,[0m[1;33m
[0m    [1;33m**[0m[0mkwargs[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Visualize tracking results.

Args:
    img (str | ndarray): Filename of loaded image.
    result (dict): Tracking result.
        - The value of key 'track_bboxes' is list with length
        num_classes, and each element 

# test_pipeline

Below cells will explore what are the different pre-processing steps between deep sort and YOLOX (detection format). 

We are doing this because when I run YOLOX with Deep SORT, not matches are found. I am thinking this is due to the different pre-processing steps of an image.

From below cells, we can see that there IS a huge difference in image dimension:
* DeepSORT test pipeline shape: `[3, 832, 1088]`
* YOLOX detection test pipeline shape: `[3, 416, 416]`

In [3]:
# deepsort
from mmtrack.apis import inference_mot, init_model
from mmdet.datasets.pipelines import Compose

config = "D:\Github\mmtracking\mmtracking\configs\mot\deepsort\deepsort_yolox_tiny_8x8_mot17-private-half_v2.py"
checkpoint = None
device = "cuda:0"
score_thr =  .01
frame_id = 0

model = init_model(config, checkpoint, device=device)

# load img
img = "D:\Photos\Erick_photos\Erick Pictures\IMG0374.JPG"

img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1088, 1088), # original
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='VideoCollect', keys=['img'])
        ])
]

data = dict(img_info=dict(filename=img, frame_id=frame_id), img_prefix=None)
test_pipeline = Compose(test_pipeline)
data = test_pipeline(data)
data["img"][0].shape

2022-07-10 19:28:35,853 - mmtrack - INFO - initialize YOLOX with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth'}
2022-07-10 19:28:35,855 - mmcv - INFO - load model from: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-10 19:28:35,857 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-10 19:28:36,157 - mmtrack - INFO - initialize BaseReID with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth'}
2022-07-10 19:28:36,159 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth




torch.Size([3, 832, 1088])

In [8]:
# det format
from mmdet.apis import inference_detector, init_detector

config = r"D:\Github\mmtracking\mmtracking\configs\det\det_format\yolox_tiny_8x8_300e_coco.py"
checkpoint = r"https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth"
device = "cuda:0"
score_thr =  .01

model = init_detector(config, checkpoint, device=device)

# load img
img = "D:\Photos\Erick_photos\Erick Pictures\IMG0374.JPG"


test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(416, 416),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Pad',
                pad_to_square=True,
                pad_val=dict(img=(114.0, 114.0, 114.0))),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img'])
        ])
]

data = dict(img_info=dict(filename=img, frame_id=frame_id), img_prefix=None)
test_pipeline = Compose(test_pipeline)
data = test_pipeline(data)
data["img"][0].size()

load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth


torch.Size([3, 416, 416])

In [2]:
# below, we will modify the test pipeline of Deep SORT to match that of YOLOX detection

# deepsort
import torch
from mmtrack.apis import inference_mot, init_model
from mmdet.datasets.pipelines import Compose
from mmcv.parallel import collate, scatter

config = "D:\Github\mmtracking\mmtracking\configs\mot\deepsort\deepsort_yolox_tiny_8x8_mot17-private-half_v2.py"
checkpoint = None
device = "cuda:0"
score_thr =  .01
frame_id = 0

model = init_model(config, checkpoint, device=device)

# load img
img = "D:\Photos\Erick_photos\Erick Pictures\IMG0374.JPG"

img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(416, 416), # original
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Pad', 
                pad_to_square=True,
                pad_val=dict(img=(114.0, 114.0, 114.0))),
            # dict(type='ImageToTensor', keys=['img']), # depracated (https://mmdetection.readthedocs.io/en/latest/1_exist_data_model.html?highlight=ImageToTensor)
            dict(type='DefaultFormatBundle'),
            # dict(type='VideoCollect', keys=['img'])# `test_pipeline(data)` fails with this 
            dict(type='Collect', keys=['img'])
        ])
]

device_ = next(model.parameters()).device 
data = dict(img_info=dict(filename=img, frame_id=frame_id), img_prefix=None)
test_pipeline = Compose(test_pipeline)
data = test_pipeline(data)
data['img'][0].size # (3,416,416)

data = collate([data], samples_per_gpu=1)
data = scatter(data, [device_.index])[0]
with torch.no_grad():
    result = model(return_loss=False, rescale=True, **data)

2022-07-11 08:52:27,801 - mmtrack - INFO - initialize YOLOX with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth'}
2022-07-11 08:52:27,803 - mmcv - INFO - load model from: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-11 08:52:27,804 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-11 08:52:28,112 - mmtrack - INFO - initialize BaseReID with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth'}
2022-07-11 08:52:28,114 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth




  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [27]:
result.keys()

dict_keys(['det_bboxes', 'track_bboxes'])

In [11]:
len(result["det_bboxes"]), len(result["track_bboxes"])

(80, 80)

In [31]:
model.show_result(
    img,
    result,
    score_thr=score_thr,
    show=False,
    wait_time=0,
    out_file=r"D:\Github\mmtracking\mmtracking\data\06_23_2022\preds\tracking\deepsort\cfg006\viz\test.jpeg"
)

array([[[165, 117,  83],
        [169, 121,  87],
        [167, 119,  85],
        ...,
        [171, 123,  95],
        [168, 120,  92],
        [172, 124,  96]],

       [[166, 118,  84],
        [169, 121,  87],
        [168, 120,  86],
        ...,
        [169, 121,  93],
        [168, 120,  92],
        [172, 124,  96]],

       [[167, 119,  85],
        [169, 121,  87],
        [168, 120,  86],
        ...,
        [170, 122,  94],
        [168, 120,  92],
        [167, 119,  91]],

       ...,

       [[178, 209, 232],
        [179, 210, 233],
        [181, 212, 235],
        ...,
        [160, 191, 206],
        [160, 191, 206],
        [160, 191, 206]],

       [[183, 214, 237],
        [184, 215, 238],
        [183, 214, 237],
        ...,
        [158, 189, 204],
        [157, 188, 203],
        [157, 188, 203]],

       [[180, 211, 234],
        [181, 212, 235],
        [181, 212, 235],
        ...,
        [158, 189, 204],
        [159, 190, 205],
        [159, 190, 205]]

In [4]:
from mmtrack.core import results2outs
import mmcv

track_bboxes = result.get('track_bboxes', None)
track_masks = result.get('track_masks', None)
if isinstance(img, str):
    img = mmcv.imread(img)
outs_track = results2outs(
    bbox_results=track_bboxes,
    mask_results=track_masks,
    mask_shape=img.shape[:2])

outs_track

{'labels': array([0, 0, 0, 0, 0], dtype=int64),
 'bboxes': array([[2.5433579e+03, 8.7913745e+02, 3.1922183e+03, 2.9003018e+03,
         8.9579111e-01],
        [6.6957251e+02, 8.0981274e+02, 1.2767234e+03, 2.9772202e+03,
         8.9265776e-01],
        [1.5910342e+03, 7.2342188e+02, 2.2035574e+03, 2.8570815e+03,
         8.7003696e-01],
        [2.0592065e+03, 8.1872607e+02, 2.6635134e+03, 2.9195657e+03,
         8.6754602e-01],
        [1.1463987e+03, 7.3711060e+02, 1.7110135e+03, 2.8859285e+03,
         8.6470163e-01]], dtype=float32),
 'ids': array([0, 1, 2, 3, 4], dtype=int64)}

In [35]:
# modify v2

# deepsort
import torch
from mmtrack.apis import inference_mot, init_model
from mmdet.datasets.pipelines import Compose
from mmcv.parallel import collate, scatter

config = "D:\Github\mmtracking\mmtracking\configs\mot\deepsort\deepsort_yolox_tiny_8x8_mot17-private-half_v2.py"
checkpoint = None
device = "cuda:0"
score_thr =  .01
frame_id = 0

model = init_model(config, checkpoint, device=device)

# load img
img = "D:\Photos\Erick_photos\Erick Pictures\IMG0374.JPG"

img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(416, 416), # original
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Pad', 
                pad_to_square=True,
                pad_val=dict(img=(114.0, 114.0, 114.0))),
            dict(type='VideoCollect', keys=['img']),
            dict(type='DefaultFormatBundle')
        ])
]

device_ = next(model.parameters()).device 
data = dict(img_info=dict(filename=img, frame_id=frame_id), img_prefix=None)
test_pipeline = Compose(test_pipeline)
data = test_pipeline(data)
data['img'][0].size # (3,416,416)

data = collate([data], samples_per_gpu=1)
data = scatter(data, [device_.index])[0]
with torch.no_grad():
    result = model(return_loss=False, rescale=True, **data)

2022-07-10 22:03:10,387 - mmtrack - INFO - initialize YOLOX with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth'}
2022-07-10 22:03:10,390 - mmcv - INFO - load model from: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-10 22:03:10,391 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-10 22:03:10,952 - mmtrack - INFO - initialize BaseReID with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth'}
2022-07-10 22:03:10,954 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth




In [36]:
model.show_result(
    img,
    result,
    score_thr=score_thr,
    show=False,
    wait_time=0,
    out_file=r"D:\Github\mmtracking\mmtracking\data\06_23_2022\preds\tracking\deepsort\cfg006\viz\test_v2.jpeg"
)

array([[[165, 117,  83],
        [169, 121,  87],
        [167, 119,  85],
        ...,
        [171, 123,  95],
        [168, 120,  92],
        [172, 124,  96]],

       [[166, 118,  84],
        [169, 121,  87],
        [168, 120,  86],
        ...,
        [169, 121,  93],
        [168, 120,  92],
        [172, 124,  96]],

       [[167, 119,  85],
        [169, 121,  87],
        [168, 120,  86],
        ...,
        [170, 122,  94],
        [168, 120,  92],
        [167, 119,  91]],

       ...,

       [[178, 209, 232],
        [179, 210, 233],
        [181, 212, 235],
        ...,
        [160, 191, 206],
        [160, 191, 206],
        [160, 191, 206]],

       [[183, 214, 237],
        [184, 215, 238],
        [183, 214, 237],
        ...,
        [158, 189, 204],
        [157, 188, 203],
        [157, 188, 203]],

       [[180, 211, 234],
        [181, 212, 235],
        [181, 212, 235],
        ...,
        [158, 189, 204],
        [159, 190, 205],
        [159, 190, 205]]

# YOLOX

When I run YOLOX with mmdet, it works. However, when I do it with mmtrack, it does NOT show anything and below warning is raised:

`Warning: the model doesn't have classes`

Further, when I run mmdet and I pass `checkpoint` on `cfg` file, below error is raised:

`AttributeError: 'YOLOX' object has no attribute 'CLASSES'`

However, when I pass checkpoint explictly as below:

`model = init_detector(args.config, checkpoint, device=device)`

Everything works fine.

## YOLOX Detection

In [9]:
import torch

weights_path = r"D:\Github\mmtracking\mmtracking\weights\yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth"
preTrainedDict = torch.load(weights_path)
preTrainedDict.keys()

dict_keys(['meta', 'state_dict'])

In [10]:
preTrainedDict["meta"]

{'env_info': 'sys.platform: linux\nPython: 3.8.12 (default, Oct 12 2021, 13:49:34) [GCC 7.5.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: Tesla PG503-216\nCUDA_HOME: /mnt/lustre/share/cuda-10.2\nNVCC: Cuda compilation tools, release 10.2, V10.2.89\nGCC: gcc (GCC) 5.4.0\nPyTorch: 1.8.0\nPyTorch compiling details: PyTorch built with:\n  - GCC 7.3\n  - C++ Version: 201402\n  - Intel(R) oneAPI Math Kernel Library Version 2021.3-Product Build 20210617 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v1.7.0 (Git Hash 7aed236906b1f7a05c0917e5257a1af05e9ff683)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 11.1\n  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,c

In [1]:
import torch
from mmdet.apis import init_detector

# build module by passing pretrained weights on config file
config = "D:\Github\mmtracking\mmtracking\configs\det\det_format\yolox_tiny_8x8_300e_coco.py"
checkpoint = None
device = torch.device("cuda:0")

model = init_detector(config, checkpoint, device)
modelHasCLASSESAttribute = hasattr(model, 'CLASSES')
if modelHasCLASSESAttribute:
    print(model.CLASSES)
else:
    print("No CLASSES atttribute")
model

  from .autonotebook import tqdm as notebook_tqdm


No CLASSES atttribute


YOLOX(
  (backbone): CSPDarknet(
    (stem): Focus(
      (conv): ConvModule(
        (conv): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(24, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): Swish()
      )
    )
    (stage1): Sequential(
      (0): ConvModule(
        (conv): Conv2d(24, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): Swish()
      )
      (1): CSPLayer(
        (main_conv): ConvModule(
          (conv): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(24, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (activate): Swish()
        )
        (short_conv): ConvModule(
          (conv): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(24, 

In [8]:
model.state_dict().keys()

odict_keys(['backbone.stem.conv.conv.weight', 'backbone.stem.conv.bn.weight', 'backbone.stem.conv.bn.bias', 'backbone.stem.conv.bn.running_mean', 'backbone.stem.conv.bn.running_var', 'backbone.stem.conv.bn.num_batches_tracked', 'backbone.stage1.0.conv.weight', 'backbone.stage1.0.bn.weight', 'backbone.stage1.0.bn.bias', 'backbone.stage1.0.bn.running_mean', 'backbone.stage1.0.bn.running_var', 'backbone.stage1.0.bn.num_batches_tracked', 'backbone.stage1.1.main_conv.conv.weight', 'backbone.stage1.1.main_conv.bn.weight', 'backbone.stage1.1.main_conv.bn.bias', 'backbone.stage1.1.main_conv.bn.running_mean', 'backbone.stage1.1.main_conv.bn.running_var', 'backbone.stage1.1.main_conv.bn.num_batches_tracked', 'backbone.stage1.1.short_conv.conv.weight', 'backbone.stage1.1.short_conv.bn.weight', 'backbone.stage1.1.short_conv.bn.bias', 'backbone.stage1.1.short_conv.bn.running_mean', 'backbone.stage1.1.short_conv.bn.running_var', 'backbone.stage1.1.short_conv.bn.num_batches_tracked', 'backbone.stage1

In [11]:
?model.extract_feat

[1;31mSignature:[0m [0mmodel[0m[1;33m.[0m[0mextract_feat[0m[1;33m([0m[0mimg[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m Directly extract features from the backbone+neck.
[1;31mFile:[0m      d:\open-mmlab\lib\site-packages\mmdet\models\detectors\single_stage.py
[1;31mType:[0m      method


In [12]:
model.bbox_head.num_classes

80

In [9]:
import torch
from mmdet.apis import init_detector

# build module by passing pretrained weights on config file
config = "D:\Github\mmtracking\mmtracking\configs\det\det_format\yolox_tiny_8x8_300e_coco.py"
checkpoint = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth'
device = torch.device("cuda:0")

model = init_detector(config, checkpoint, device)
modelHasCLASSESAttribute = hasattr(model, 'CLASSES')
if modelHasCLASSESAttribute:
    print(model.CLASSES)
else:
    print("No CLASSES atttribute")
model.state_dict().keys()

load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', '

odict_keys(['backbone.stem.conv.conv.weight', 'backbone.stem.conv.bn.weight', 'backbone.stem.conv.bn.bias', 'backbone.stem.conv.bn.running_mean', 'backbone.stem.conv.bn.running_var', 'backbone.stem.conv.bn.num_batches_tracked', 'backbone.stage1.0.conv.weight', 'backbone.stage1.0.bn.weight', 'backbone.stage1.0.bn.bias', 'backbone.stage1.0.bn.running_mean', 'backbone.stage1.0.bn.running_var', 'backbone.stage1.0.bn.num_batches_tracked', 'backbone.stage1.1.main_conv.conv.weight', 'backbone.stage1.1.main_conv.bn.weight', 'backbone.stage1.1.main_conv.bn.bias', 'backbone.stage1.1.main_conv.bn.running_mean', 'backbone.stage1.1.main_conv.bn.running_var', 'backbone.stage1.1.main_conv.bn.num_batches_tracked', 'backbone.stage1.1.short_conv.conv.weight', 'backbone.stage1.1.short_conv.bn.weight', 'backbone.stage1.1.short_conv.bn.bias', 'backbone.stage1.1.short_conv.bn.running_mean', 'backbone.stage1.1.short_conv.bn.running_var', 'backbone.stage1.1.short_conv.bn.num_batches_tracked', 'backbone.stage1

In [22]:
# det format

import torch
from mmdet.apis import init_detector
from mmdet.models import build_detector
from mmcv.runner import load_checkpoint
import mmcv

# build module by passing pretrained weights on config file
config = "D:\Github\mmtracking\mmtracking\configs\det\det_format\yolox_tiny_8x8_300e_coco.py"
config = mmcv.Config.fromfile(config)
print("pretrained" in config.model, "init_cfg" in config.model.backbone) # False, False

model = build_detector(config.model, test_cfg=config.get('test_cfg')) # YOLOX module
print(hasattr(model, "CLASSES")) # False

checkpoint = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth'
checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
print(checkpoint.keys())
model.CLASSES = checkpoint["meta"]["CLASSES"]
model.CLASSES[:5]

False False
False
load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
dict_keys(['meta', 'state_dict'])


('person', 'bicycle', 'car', 'motorcycle', 'airplane')

In [None]:
checkpoint = None
device = torch.device("cuda:0")

model = init_detector(config, checkpoint, device)
modelHasCLASSESAttribute = hasattr(model, 'CLASSES')
if modelHasCLASSESAttribute:
    print(model.CLASSES)
else:
    print("No CLASSES atttribute")

## YOLOX Tracking

In [3]:
import torch
from mmtrack.apis import inference_mot, init_model

# build module by passing pretrained weights on config file
config = "D:\Github\mmtracking\mmtracking\configs\mot\deepsort\deepsort_yolox_tiny_8x8_mot17-private-half.py"
checkpoint = None
device = torch.device("cuda:0")

model = init_model(config, checkpoint, device)
modelHasCLASSESAttribute = hasattr(model, 'detector') and hasattr(model.detector, 'CLASSES')
print(modelHasCLASSESAttribute) # False (this is supposed to be True)

2022-07-08 10:27:03,990 - mmtrack - INFO - initialize YOLOX with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth'}
2022-07-08 10:27:03,993 - mmcv - INFO - load model from: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-08 10:27:03,996 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-08 10:27:04,297 - mmtrack - INFO - initialize BaseReID with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth'}
2022-07-08 10:27:04,298 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth


False


In [33]:
import torch
from mmdet.apis import init_detector
from mmtrack.apis import inference_mot, init_model

# build module by passing pretrained weights on config file
config = "D:\Github\mmtracking\mmtracking\configs\mot\deepsort\deepsort_yolox_tiny_8x8_mot17-private-half.py"
checkpoint = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth'
device = torch.device("cuda:0")

model = init_model(config, checkpoint, device)
modelHasCLASSESAttribute = hasattr(model, "CLASSES")
print(modelHasCLASSESAttribute) # True 

2022-07-08 12:07:13,665 - mmtrack - INFO - initialize YOLOX with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth'}
2022-07-08 12:07:13,669 - mmcv - INFO - load model from: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-08 12:07:13,672 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
2022-07-08 12:07:13,961 - mmtrack - INFO - initialize BaseReID with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth'}
2022-07-08 12:07:13,962 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth


load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth
The model and loaded state dict do not match exactly

unexpected key in source state_dict: backbone.stem.conv.conv.weight, backbone.stem.conv.bn.weight, backbone.stem.conv.bn.bias, backbone.stem.conv.bn.running_mean, backbone.stem.conv.bn.running_var, backbone.stem.conv.bn.num_batches_tracked, backbone.stage1.0.conv.weight, backbone.stage1.0.bn.weight, backbone.stage1.0.bn.bias, backbone.stage1.0.bn.running_mean, backbone.stage1.0.bn.running_var, backbone.stage1.0.bn.num_batches_tracked, backbone.stage1.1.main_conv.conv.weight, backbone.stage1.1.main_conv.bn.weight, backbone.stage1.1.main_conv.bn.bias, backbone.stage1.1.main_conv.bn.running_mean, backbone.stage1.1.main_conv.bn.running_var, backbone.stage1.1.main_conv.bn.num_batches_tracked, backbone.stage1.1.short_conv.conv.weight, backbone.stage1.1.short_conv.bn.weight, ba

In [29]:
model.detector.state_dict().keys()

odict_keys(['backbone.stem.conv.conv.weight', 'backbone.stem.conv.bn.weight', 'backbone.stem.conv.bn.bias', 'backbone.stem.conv.bn.running_mean', 'backbone.stem.conv.bn.running_var', 'backbone.stem.conv.bn.num_batches_tracked', 'backbone.stage1.0.conv.weight', 'backbone.stage1.0.bn.weight', 'backbone.stage1.0.bn.bias', 'backbone.stage1.0.bn.running_mean', 'backbone.stage1.0.bn.running_var', 'backbone.stage1.0.bn.num_batches_tracked', 'backbone.stage1.1.main_conv.conv.weight', 'backbone.stage1.1.main_conv.bn.weight', 'backbone.stage1.1.main_conv.bn.bias', 'backbone.stage1.1.main_conv.bn.running_mean', 'backbone.stage1.1.main_conv.bn.running_var', 'backbone.stage1.1.main_conv.bn.num_batches_tracked', 'backbone.stage1.1.short_conv.conv.weight', 'backbone.stage1.1.short_conv.bn.weight', 'backbone.stage1.1.short_conv.bn.bias', 'backbone.stage1.1.short_conv.bn.running_mean', 'backbone.stage1.1.short_conv.bn.running_var', 'backbone.stage1.1.short_conv.bn.num_batches_tracked', 'backbone.stage1

In [34]:
model.state_dict().keys()

odict_keys(['detector.backbone.stem.conv.conv.weight', 'detector.backbone.stem.conv.bn.weight', 'detector.backbone.stem.conv.bn.bias', 'detector.backbone.stem.conv.bn.running_mean', 'detector.backbone.stem.conv.bn.running_var', 'detector.backbone.stem.conv.bn.num_batches_tracked', 'detector.backbone.stage1.0.conv.weight', 'detector.backbone.stage1.0.bn.weight', 'detector.backbone.stage1.0.bn.bias', 'detector.backbone.stage1.0.bn.running_mean', 'detector.backbone.stage1.0.bn.running_var', 'detector.backbone.stage1.0.bn.num_batches_tracked', 'detector.backbone.stage1.1.main_conv.conv.weight', 'detector.backbone.stage1.1.main_conv.bn.weight', 'detector.backbone.stage1.1.main_conv.bn.bias', 'detector.backbone.stage1.1.main_conv.bn.running_mean', 'detector.backbone.stage1.1.main_conv.bn.running_var', 'detector.backbone.stage1.1.main_conv.bn.num_batches_tracked', 'detector.backbone.stage1.1.short_conv.conv.weight', 'detector.backbone.stage1.1.short_conv.bn.weight', 'detector.backbone.stage1.

# ReID

Below explores some weights from reid that were not in the pretrained weights

In [None]:
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import tempfile
from argparse import ArgumentParser

import mmcv

from mmtrack.apis import inference_mot, init_model

# config = r"D:\Github\mmtracking\mmtracking\configs\mot\deepsort\deepsort_yolox_tiny_8x8_mot17-private-half.py"
# config = r"D:\Github\mmtracking\mmtracking\configs\mot\deepsort\deepsort_faster-rcnn_fpn_4e_mot17-private-half.py"
config = r"D:\Github\mmtracking\mmtracking\configs\mot\deepsort\deepsort_yolox_tiny_8x8_mot17-private-half_v2.py"
checkpoint = None
device = "cuda:0"


# build the model from a config file and a checkpoint file
model = init_model(config, checkpoint, device=device)
model

Below will transform a path with frames of a video into a video

In [27]:
frames_path = r"D:\Github\mmtracking\mmtracking\data\06_23_2022\preds\deepsort\cfg003\viz\grace_holding_baseball_bat\frames"
out_vid = r"D:\Github\mmtracking\mmtracking\data\06_23_2022\preds\deepsort\cfg003\viz\grace_holding_baseball_bat\vid.mp4"
mmcv.frames2video(frames_path, out_vid, fourcc="mp4v")

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 294/294, 4.4 task/s, elapsed: 66s, ETA:     0s


In [16]:
model.reid.head

LinearReIDHead(
  (loss_cls): CrossEntropyLoss(avg_non_ignore=False)
  (loss_triplet): TripletLoss(
    (ranking_loss): MarginRankingLoss()
  )
  (accuracy): Accuracy()
  (fcs): ModuleList(
    (0): FcModule(
      (fc): Linear(in_features=2048, out_features=1024, bias=True)
      (bn): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activate): ReLU(inplace=True)
    )
    init_cfg={'type': 'Kaiming', 'layer': 'Linear'}
  )
  (fc_out): Linear(in_features=1024, out_features=128, bias=True)
  (bn): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (classifier): Linear(in_features=128, out_features=380, bias=True)
)
init_cfg={'type': 'Normal', 'layer': 'Linear', 'mean': 0, 'std': 0.01, 'bias': 0}

In [21]:
#del model.reid.head.bn
model.reid.head

LinearReIDHead(
  (loss_cls): CrossEntropyLoss(avg_non_ignore=False)
  (loss_triplet): TripletLoss(
    (ranking_loss): MarginRankingLoss()
  )
  (accuracy): Accuracy()
  (fcs): ModuleList(
    (0): FcModule(
      (fc): Linear(in_features=2048, out_features=1024, bias=True)
      (bn): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activate): ReLU(inplace=True)
    )
    init_cfg={'type': 'Kaiming', 'layer': 'Linear'}
  )
  (fc_out): Linear(in_features=1024, out_features=128, bias=True)
  (classifier): Linear(in_features=128, out_features=380, bias=True)
)
init_cfg={'type': 'Normal', 'layer': 'Linear', 'mean': 0, 'std': 0.01, 'bias': 0}

In [23]:
#del model.reid.head.bn
model.reid.head.classifier.weight.shape

torch.Size([380, 128])

In [6]:
model.reid.head.bn

BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

In [14]:
model.reid.head.bn.weight.shape

torch.Size([128])

In [7]:
model.reid.head.classifier

Linear(in_features=128, out_features=380, bias=True)

In [11]:
import torch

weights_path = r"D:\Github\mmtracking\mmtracking\weights\tracktor_reid_r50_iter25245-a452f51f.pth"
preTrainedDict = torch.load(weights_path)
preTrainedDict.keys()

odict_keys(['backbone.conv1.weight', 'backbone.bn1.weight', 'backbone.bn1.bias', 'backbone.bn1.running_mean', 'backbone.bn1.running_var', 'backbone.layer1.0.conv1.weight', 'backbone.layer1.0.bn1.weight', 'backbone.layer1.0.bn1.bias', 'backbone.layer1.0.bn1.running_mean', 'backbone.layer1.0.bn1.running_var', 'backbone.layer1.0.conv2.weight', 'backbone.layer1.0.bn2.weight', 'backbone.layer1.0.bn2.bias', 'backbone.layer1.0.bn2.running_mean', 'backbone.layer1.0.bn2.running_var', 'backbone.layer1.0.conv3.weight', 'backbone.layer1.0.bn3.weight', 'backbone.layer1.0.bn3.bias', 'backbone.layer1.0.bn3.running_mean', 'backbone.layer1.0.bn3.running_var', 'backbone.layer1.0.downsample.0.weight', 'backbone.layer1.0.downsample.1.weight', 'backbone.layer1.0.downsample.1.bias', 'backbone.layer1.0.downsample.1.running_mean', 'backbone.layer1.0.downsample.1.running_var', 'backbone.layer1.1.conv1.weight', 'backbone.layer1.1.bn1.weight', 'backbone.layer1.1.bn1.bias', 'backbone.layer1.1.bn1.running_mean', '

In [13]:
preTrainedDict["head.fcs.0.bn.weight"].shape

torch.Size([1024])

In [24]:
preTrainedDict["head.fcs.0.fc.weight"].shape

torch.Size([1024, 2048])

In [15]:
preTrainedDict["head.fc_out.weight"].shape

torch.Size([128, 1024])

In [16]:
device = next(model.parameters()).device
device

device(type='cuda', index=0)

In [40]:
import numpy as np 
from mmdet.datasets.pipelines import Compose
from mmcv.ops import RoIPool
from mmcv.parallel import collate, scatter
img = r"D:\Github\mmtracking\mmtracking\data\06_23_2022\preds\bytetrack\cfg001\viz\000002.jpg"
frame_id = 2

cfg = model.cfg
device = next(model.parameters()).device  # model device
# prepare data
if isinstance(img, np.ndarray):
    # directly add img
    data = dict(img=img, img_info=dict(frame_id=frame_id), img_prefix=None)
    cfg = cfg.copy()
    # set loading pipeline type
    cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
else:
    # add information into dict
    data = dict(
        img_info=dict(filename=img, frame_id=frame_id), img_prefix=None)
# build the data pipeline
test_pipeline = Compose(cfg.data.test.pipeline)
"""
{'img_info': {'filename': 'D:\\Github\\mmtracking\\mmtracking\\data\x06_23_2022\\preds\x08ytetrack\\cfg001\x0biz\x00002.jpg',
  'frame_id': 2},
 'img_prefix': None}
"""
data = test_pipeline(data) # fails
data = collate([data], samples_per_gpu=1)
if next(model.parameters()).is_cuda:
    # scatter to specified GPU
    data = scatter(data, [device])[0]
else:
    for m in model.modules():
        assert not isinstance(
            m, RoIPool
        ), 'CPU inference with RoIPool is not supported currently.'
    # just get the actual data from DataContainer
    data['img_metas'] = data['img_metas'][0].data
# forward the model
with torch.no_grad():
    result = model(return_loss=False, rescale=True, **data)
result

{'det_bboxes': [array([[2.68163452e+02, 6.57019806e+01, 3.99621277e+02, 2.62253601e+02,
          6.88001871e-01],
         [1.00282478e+00, 3.90528564e+02, 2.14183853e+02, 7.15378845e+02,
          5.88423967e-01],
         [1.08638672e+03, 3.45268280e+02, 1.27939868e+03, 7.16355652e+02,
          4.83172596e-01],
         [6.67553406e+02, 3.50197968e+02, 7.40389709e+02, 4.29129486e+02,
          2.53530174e-01],
         [4.47429810e+02, 4.32137146e+02, 8.48930481e+02, 6.85256165e+02,
          5.95844649e-02],
         [2.89728210e+02, 2.31926483e+02, 3.44868347e+02, 2.61130676e+02,
          4.22661826e-02],
         [5.50090515e+02, 3.87062653e+02, 6.16259399e+02, 4.49026123e+02,
          3.60298753e-02],
         [2.51100189e+02, 1.20448799e+02, 2.93252777e+02, 2.52497086e+02,
          2.99452636e-02],
         [2.72144073e+02, 2.17304123e+02, 3.45083038e+02, 2.61174042e+02,
          2.93628238e-02],
         [6.58127869e+02, 3.49806885e+02, 7.51734985e+02, 4.51940491e+02,
   

In [41]:
result.keys()

dict_keys(['det_bboxes', 'track_bboxes'])

In [42]:
model = model.cuda()

In [43]:
import numpy as np 
from mmdet.datasets.pipelines import Compose
from mmcv.ops import RoIPool
from mmcv.parallel import collate, scatter
img = r"D:\Github\mmtracking\mmtracking\data\06_23_2022\preds\bytetrack\cfg001\viz\000002.jpg"
frame_id = 2

cfg = model.cfg
device = next(model.parameters()).device  # model device
# prepare data
if isinstance(img, np.ndarray):
    # directly add img
    data = dict(img=img, img_info=dict(frame_id=frame_id), img_prefix=None)
    cfg = cfg.copy()
    # set loading pipeline type
    cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
else:
    # add information into dict
    data = dict(
        img_info=dict(filename=img, frame_id=frame_id), img_prefix=None)
# build the data pipeline
test_pipeline = Compose(cfg.data.test.pipeline)
"""
{'img_info': {'filename': 'D:\\Github\\mmtracking\\mmtracking\\data\x06_23_2022\\preds\x08ytetrack\\cfg001\x0biz\x00002.jpg',
  'frame_id': 2},
 'img_prefix': None}
"""
data = test_pipeline(data) # fails
data = collate([data], samples_per_gpu=1)
if next(model.parameters()).is_cuda:
    # scatter to specified GPU
    data = scatter(data, [device])[0]
else:
    for m in model.modules():
        assert not isinstance(
            m, RoIPool
        ), 'CPU inference with RoIPool is not supported currently.'
    # just get the actual data from DataContainer
    data['img_metas'] = data['img_metas'][0].data
# forward the model
with torch.no_grad():
    result = model(return_loss=False, rescale=True, **data)
result

{'det_bboxes': [array([[2.68163452e+02, 6.57019806e+01, 3.99621277e+02, 2.62253601e+02,
          6.88001871e-01],
         [1.00282478e+00, 3.90528564e+02, 2.14183853e+02, 7.15378845e+02,
          5.88423967e-01],
         [1.08638672e+03, 3.45268280e+02, 1.27939868e+03, 7.16355652e+02,
          4.83172596e-01],
         [6.67553406e+02, 3.50197968e+02, 7.40389709e+02, 4.29129486e+02,
          2.53530174e-01],
         [4.47429810e+02, 4.32137146e+02, 8.48930481e+02, 6.85256165e+02,
          5.95844649e-02],
         [2.89728210e+02, 2.31926483e+02, 3.44868347e+02, 2.61130676e+02,
          4.22661826e-02],
         [5.50090515e+02, 3.87062653e+02, 6.16259399e+02, 4.49026123e+02,
          3.60298753e-02],
         [2.51100189e+02, 1.20448799e+02, 2.93252777e+02, 2.52497086e+02,
          2.99452636e-02],
         [2.72144073e+02, 2.17304123e+02, 3.45083038e+02, 2.61174042e+02,
          2.93628238e-02],
         [6.58127869e+02, 3.49806885e+02, 7.51734985e+02, 4.51940491e+02,
   