In [2]:
import argparse
import numpy as np
import sys
from functools import partial
import os
from tritonclient import grpc
import tritonclient.grpc.model_config_pb2 as mc
from tritonclient import http
from tritonclient.utils import triton_to_np_dtype
from tritonclient.utils import InferenceServerException
import torch
from clearml import InputModel, Task
import shutil
import pathlib

# Local modules
from cub_tools.trainer import ClearML_Ignite_Trainer
from cub_tools.args import get_parser
from cub_tools.config import get_cfg_defaults, get_key_value_dict
from cub_tools.triton import run_inference, get_model_info

In [1]:
model_repo_path = '/home/edmorris/projects/image_classification/caltech_birds/models/clearml_repo/ignite_resnet34'
model_config = '/home/edmorris/projects/image_classification/caltech_birds/scripts/configs/torchvision/resnet34_config.yaml'

In [3]:

model = InputModel(model_id="3c33df3afcc04e1cbc2d0330458a516f")
local_cache_path = model.get_local_copy()


2021-06-14 16:07:33,157 - clearml.storage - INFO - Downloading: 5.00MB / 81.72MB @ 19.43MBs from azure://clearmllibrary/artefacts/Caltech Birds%2FTraining/TRAIN [Network%3A resnet34, Library%3A torchvision] Ignite Train PyTorch CNN on CUB200.7ccafed2331443aba8735ee9e4a33cfe/models/cub200_resnet34_ignite_best_model_0.pt
2021-06-14 16:07:33,348 - clearml.storage - INFO - Downloading: 13.00MB / 81.72MB @ 41.80MBs from azure://clearmllibrary/artefacts/Caltech Birds%2FTraining/TRAIN [Network%3A resnet34, Library%3A torchvision] Ignite Train PyTorch CNN on CUB200.7ccafed2331443aba8735ee9e4a33cfe/models/cub200_resnet34_ignite_best_model_0.pt
2021-06-14 16:07:33,375 - clearml.storage - INFO - Downloading: 21.00MB / 81.72MB @ 299.33MBs from azure://clearmllibrary/artefacts/Caltech Birds%2FTraining/TRAIN [Network%3A resnet34, Library%3A torchvision] Ignite Train PyTorch CNN on CUB200.7ccafed2331443aba8735ee9e4a33cfe/models/cub200_resnet34_ignite_best_model_0.pt
2021-06-14 16:07:33,389 - clearml.

In [4]:
local_model_path = shutil.move(src=local_cache_path, dst=os.path.join(model_repo_path,pathlib.Path(local_cache_path).name))

In [5]:
cmd_args = [
    'DIRS.CLEAN_UP', False,     # Don't do anything to the directory structure.
    'MODEL.PRETRAINED', False,  # Don't load default weights, as we want to load our own.
    ]  
trainer = ClearML_Ignite_Trainer(task=None, config=model_config, cmd_args=cmd_args) # TODO Get config from clearml task at some point. From model.task to get task ID, and then pull the task config.


[INFO] Parameters Override:: ['DIRS.CLEAN_UP', False, 'MODEL.PRETRAINED', False]
DATA:
  DATA_DIR: /home/edmorris/projects/image_classification/caltech_birds/data/images
  NUM_CLASSES: 200
  TEST_DIR: test
  TRAIN_DIR: train
  TRANSFORMS:
    PARAMS:
      AGGRESIVE:
        persp_distortion_scale: 0.25
        rotation_range: (-10.0, 10.0)
        type: all
      DEFAULT:
        img_crop_size: 224
        img_resize: 256
    TYPE: default
DIRS:
  CLEAN_UP: False
  ROOT_DIR: /home/edmorris/projects/image_classification/caltech_birds
  WORKING_DIR: /home/edmorris/projects/image_classification/caltech_birds/models/classification/ignite_resnet34
EARLY_STOPPING_PATIENCE: 5
MODEL:
  MODEL_LIBRARY: torchvision
  MODEL_NAME: resnet34
  PRETRAINED: False
  WITH_AMP: False
  WITH_GRAD_SCALE: False
SYSTEM:
  LOG_HISTORY: True
TRAIN:
  BATCH_SIZE: 16
  LOSS:
    CRITERION: CrossEntropy
  NUM_EPOCHS: 40
  NUM_WORKERS: 4
  OPTIMIZER:
    PARAMS:
      lr: 0.001
      momentum: 0.9
      nesterov: 

In [6]:
# Get a sample dataset for running inference with
trainer.create_datatransforms()
trainer.create_dataloaders(shuffle={'train' : True, 'test' : True})

***********************************************
**            DATASET SUMMARY                **
***********************************************
train  size::  5994  images
test  size::  5794  images
Number of classes::  200
***********************************************
[INFO] Created data loaders.


In [7]:
trainer.create_model(load_to_device=False)

[INFO] Successfully created model but NOT pushed it to the device cuda:0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [16, 64, 112, 112]           9,408
       BatchNorm2d-2         [16, 64, 112, 112]             128
              ReLU-3         [16, 64, 112, 112]               0
         MaxPool2d-4           [16, 64, 56, 56]               0
            Conv2d-5           [16, 64, 56, 56]          36,864
       BatchNorm2d-6           [16, 64, 56, 56]             128
              ReLU-7           [16, 64, 56, 56]               0
            Conv2d-8           [16, 64, 56, 56]          36,864
       BatchNorm2d-9           [16, 64, 56, 56]             128
             ReLU-10           [16, 64, 56, 56]               0
       BasicBlock-11           [16, 64, 56, 56]               0
           Conv2d-12           [16, 64, 56, 56]          36,864
      BatchNorm2d-13          

In [8]:
trainer.update_model_from_checkpoint(checkpoint_file=local_model_path)

[INFO] Successfully loaded weights into the model from weights file:: /home/edmorris/projects/image_classification/caltech_birds/models/clearml_repo/ignite_resnet34/673ef0751293af799f525c3dfed99a48.cub200_resnet34_ignite_best_model_0.pt
[INFO] Successfully updated model and pushed it to the device cuda:0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [16, 64, 112, 112]           9,408
       BatchNorm2d-2         [16, 64, 112, 112]             128
              ReLU-3         [16, 64, 112, 112]               0
         MaxPool2d-4           [16, 64, 56, 56]               0
            Conv2d-5           [16, 64, 56, 56]          36,864
       BatchNorm2d-6           [16, 64, 56, 56]             128
              ReLU-7           [16, 64, 56, 56]               0
            Conv2d-8           [16, 64, 56, 56]          36,864
       BatchNorm2d-9           [16, 64, 56, 56]      

In [9]:
#model_conf, model_meta = get_model_info(model_name='cub200_resnext101', VERBOSE=False)

for i in np.arange(1,10,1):

    # Get a validation batch
    X, y = next(iter(trainer.val_loader))
    # Set the model into eval mode
    trainer.model.eval()
    # Push input images to gpu
    X_gpu = X.to(trainer.device)
    # Run inference on validatgion batch image
    y_prob_pred = trainer.model(X_gpu)
    # Get predicted classes
    _, y_pred = torch.max(y_prob_pred, 1)

    # Get Triton served predicted classes
    y_pred_proba_remote, y_pred_remote = run_inference(X.numpy(), X.shape, model_name='cub200_resnext101', VERBOSE=False)

    print('Result:: \ty\t\t:: {} \n\t \ty_pred[local]\t:: {} \n\t \ty_pred[triton]\t:: {} '.format(y.numpy(),y_pred.cpu().numpy(),y_pred_remote))
    print('')

Result:: 	y		:: [196   2] 
	 	y_pred[local]	:: [196   2] 
	 	y_pred[triton]	:: [122  58] 

Result:: 	y		:: [122 141] 
	 	y_pred[local]	:: [122 146] 
	 	y_pred[triton]	:: [ 54 146] 

Result:: 	y		:: [ 15 103] 
	 	y_pred[local]	:: [ 15 103] 
	 	y_pred[triton]	:: [177 157] 

Result:: 	y		:: [ 87 180] 
	 	y_pred[local]	:: [ 87 180] 
	 	y_pred[triton]	:: [ 12 169] 

Result:: 	y		:: [194 142] 
	 	y_pred[local]	:: [ 30 140] 
	 	y_pred[triton]	:: [138 144] 

Result:: 	y		:: [164 178] 
	 	y_pred[local]	:: [164 178] 
	 	y_pred[triton]	:: [119 179] 

Result:: 	y		:: [30 36] 
	 	y_pred[local]	:: [ 30 177] 
	 	y_pred[triton]	:: [169  69] 

Result:: 	y		:: [ 51 163] 
	 	y_pred[local]	:: [ 51 163] 
	 	y_pred[triton]	:: [103  13] 

Result:: 	y		:: [60 65] 
	 	y_pred[local]	:: [60 65] 
	 	y_pred[triton]	:: [136  44] 



In [None]:
model_conf

In [None]:
model_meta['name']

In [None]:
for key in model_meta.keys():
    print('{}   ::  {}'.format(key,model_meta[key]))