In [1]:
import argparse
import numpy as np
import sys
from functools import partial
import os
from tritonclient import grpc
import tritonclient.grpc.model_config_pb2 as mc
from tritonclient import http
from tritonclient.utils import triton_to_np_dtype
from tritonclient.utils import InferenceServerException
import torch
from clearml import InputModel, Task
import shutil
import pathlib

# Local modules
from cub_tools.trainer import ClearML_Ignite_Trainer
from cub_tools.args import get_parser
from cub_tools.config import get_cfg_defaults, get_key_value_dict

In [2]:
def run_inference(X, X_shape=(1, 3, 224,  224), X_dtype='FP32', model_name='cub200_resnet34', input_name=['INPUT__0'], output_name='OUTPUT__0',
                  url='ecm-clearml-compute-gpu-002.westeurope.cloudapp.azure.com', model_version='1', port=8000, VERBOSE=False):
    url = url+':'+str(port)
    triton_client = http.InferenceServerClient(url=url, verbose=VERBOSE)
    model_metadata = triton_client.get_model_metadata(model_name=model_name, model_version=model_version)
    model_config = triton_client.get_model_config(model_name=model_name, model_version=model_version)
  
    input0 = http.InferInput(input_name[0], X_shape, X_dtype)
    input0.set_data_from_numpy(X, binary_data=False)
    output = http.InferRequestedOutput(output_name,  binary_data=False)
    response = triton_client.infer(model_name, model_version=model_version, inputs=[input0], outputs=[output])
    y_pred_proba = response.as_numpy(output_name)
    y_pred = y_pred_proba.argmax(1)

    return y_pred_proba, y_pred

In [3]:
model_repo_path = '/home/edmorris/projects/image_classification/caltech_birds/models/clearml_repo/ignite_resnet34'
model_config = '/home/edmorris/projects/image_classification/caltech_birds/scripts/configs/torchvision/resnet34_config.yaml'

In [4]:

model = InputModel(model_id="57ed24c1011346d292ecc9e797ccb47e")
local_cache_path = model.get_local_copy()


2021-06-11 14:23:51,313 - clearml.storage - INFO - Downloading: 5.00MB / 81.72MB @ 25.63MBs from azure://clearmllibrary/artefacts/Caltech Birds%2FTraining/TRAIN [Network%3A resnet34, Library%3A torchvision] Ignite Train PyTorch CNN on CUB200.8611ada5be6f4bb6ba09cf730ecd2253/models/cub200_resnet34_ignite_best_model_0.pt
2021-06-11 14:23:51,508 - clearml.storage - INFO - Downloading: 13.00MB / 81.72MB @ 40.96MBs from azure://clearmllibrary/artefacts/Caltech Birds%2FTraining/TRAIN [Network%3A resnet34, Library%3A torchvision] Ignite Train PyTorch CNN on CUB200.8611ada5be6f4bb6ba09cf730ecd2253/models/cub200_resnet34_ignite_best_model_0.pt
2021-06-11 14:23:51,535 - clearml.storage - INFO - Downloading: 21.00MB / 81.72MB @ 296.87MBs from azure://clearmllibrary/artefacts/Caltech Birds%2FTraining/TRAIN [Network%3A resnet34, Library%3A torchvision] Ignite Train PyTorch CNN on CUB200.8611ada5be6f4bb6ba09cf730ecd2253/models/cub200_resnet34_ignite_best_model_0.pt
2021-06-11 14:23:51,622 - clearml.

In [5]:
local_model_path = shutil.move(src=local_cache_path, dst=os.path.join(model_repo_path,pathlib.Path(local_cache_path).name))

In [6]:
cmd_args = [
    'DIRS.CLEAN_UP', False,     # Don't do anything to the directory structure.
    'MODEL.PRETRAINED', False,  # Don't load default weights, as we want to load our own.
    ]  
trainer = ClearML_Ignite_Trainer(config=model_config, cmd_args=cmd_args) # TODO Get config from clearml task at some point. From model.task to get task ID, and then pull the task config.


[INFO] Parameters Override:: ['DIRS.CLEAN_UP', False, 'MODEL.PRETRAINED', False]
DATA:
  DATA_DIR: /home/edmorris/projects/image_classification/caltech_birds/data/images
  NUM_CLASSES: 200
  TEST_DIR: test
  TRAIN_DIR: train
  TRANSFORMS:
    PARAMS:
      AGGRESIVE:
        persp_distortion_scale: 0.25
        rotation_range: (-10.0, 10.0)
        type: all
      DEFAULT:
        img_crop_size: 224
        img_resize: 256
    TYPE: default
DIRS:
  CLEAN_UP: False
  ROOT_DIR: /home/edmorris/projects/image_classification/caltech_birds
  WORKING_DIR: /home/edmorris/projects/image_classification/caltech_birds/models/classification/ignite_resnet34
EARLY_STOPPING_PATIENCE: 5
MODEL:
  MODEL_LIBRARY: torchvision
  MODEL_NAME: resnet34
  PRETRAINED: False
  WITH_AMP: False
  WITH_GRAD_SCALE: False
SYSTEM:
  LOG_HISTORY: True
TRAIN:
  BATCH_SIZE: 16
  LOSS:
    CRITERION: CrossEntropy
  NUM_EPOCHS: 40
  NUM_WORKERS: 4
  OPTIMIZER:
    PARAMS:
      lr: 0.001
      momentum: 0.9
      nesterov: 

In [7]:
# Get a sample dataset for running inference with
trainer.create_datatransforms()
trainer.create_dataloaders(shuffle={'train' : True, 'test' : True})

***********************************************
**            DATASET SUMMARY                **
***********************************************
train  size::  5994  images
test  size::  5794  images
Number of classes::  200
***********************************************
[INFO] Created data loaders.


In [8]:
trainer.create_model(load_to_device=False)

[INFO] Successfully created model but NOT pushed it to the device cuda:0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [16, 64, 112, 112]           9,408
       BatchNorm2d-2         [16, 64, 112, 112]             128
              ReLU-3         [16, 64, 112, 112]               0
         MaxPool2d-4           [16, 64, 56, 56]               0
            Conv2d-5           [16, 64, 56, 56]          36,864
       BatchNorm2d-6           [16, 64, 56, 56]             128
              ReLU-7           [16, 64, 56, 56]               0
            Conv2d-8           [16, 64, 56, 56]          36,864
       BatchNorm2d-9           [16, 64, 56, 56]             128
             ReLU-10           [16, 64, 56, 56]               0
       BasicBlock-11           [16, 64, 56, 56]               0
           Conv2d-12           [16, 64, 56, 56]          36,864
      BatchNorm2d-13          

In [9]:
trainer.update_model_from_checkpoint(checkpoint_file=local_model_path)

[INFO] Successfully loaded weights into the model from weights file:: /home/edmorris/projects/image_classification/caltech_birds/models/clearml_repo/ignite_resnet34/e38f6052e6b887337635fc2821a6b5d4.cub200_resnet34_ignite_best_model_0.pt
[INFO] Successfully updated model and pushed it to the device cuda:0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [16, 64, 112, 112]           9,408
       BatchNorm2d-2         [16, 64, 112, 112]             128
              ReLU-3         [16, 64, 112, 112]               0
         MaxPool2d-4           [16, 64, 56, 56]               0
            Conv2d-5           [16, 64, 56, 56]          36,864
       BatchNorm2d-6           [16, 64, 56, 56]             128
              ReLU-7           [16, 64, 56, 56]               0
            Conv2d-8           [16, 64, 56, 56]          36,864
       BatchNorm2d-9           [16, 64, 56, 56]      

In [30]:
for i in np.arange(1,10,1):

    # Get a validation batch
    X, y = next(iter(trainer.val_loader))
    # Set the model into eval mode
    trainer.model.eval()
    # Push input images to gpu
    X_gpu = X.to(trainer.device)
    # Run inference on validatgion batch image
    y_prob_pred = trainer.model(X_gpu)
    # Get predicted classes
    _, y_pred = torch.max(y_prob_pred, 1)

    # Get Triton served predicted classes
    y_pred_proba_remote, y_pred_remote = run_inference(X.numpy(), X.shape)

    print('Result:: \ty\t\t:: {} \n\t \ty_pred[local]\t:: {} \n\t \ty_pred[triton]\t:: {} '.format(y.numpy(),y_pred.cpu().numpy(),y_pred_remote))
    print('')

Result:: 	y		:: [168 177] 
	 	y_pred[local]	:: [168 180] 
	 	y_pred[triton]	:: [168 180] 

Result:: 	y		:: [171 167] 
	 	y_pred[local]	:: [171 167] 
	 	y_pred[triton]	:: [171 167] 

Result:: 	y		:: [87  9] 
	 	y_pred[local]	:: [87  9] 
	 	y_pred[triton]	:: [87  9] 

Result:: 	y		:: [179 180] 
	 	y_pred[local]	:: [179 180] 
	 	y_pred[triton]	:: [179 180] 

Result:: 	y		:: [61 69] 
	 	y_pred[local]	:: [58 69] 
	 	y_pred[triton]	:: [58 69] 

Result:: 	y		:: [101 127] 
	 	y_pred[local]	:: [ 39 127] 
	 	y_pred[triton]	:: [ 39 127] 

Result:: 	y		:: [78 30] 
	 	y_pred[local]	:: [78 30] 
	 	y_pred[triton]	:: [78 30] 

Result:: 	y		:: [65 29] 
	 	y_pred[local]	:: [65 26] 
	 	y_pred[triton]	:: [65 26] 

Result:: 	y		:: [ 89 179] 
	 	y_pred[local]	:: [ 89 179] 
	 	y_pred[triton]	:: [ 89 179] 



Result:: 	y		:: [197 133] 
	 	y_pred[local]	:: [197 133] 
	 	y_pred[triton]	:: [197 133] 
