In [1]:
import argparse
import numpy as np
import sys
from functools import partial
import os
from tritonclient import grpc
import tritonclient.grpc.model_config_pb2 as mc
from tritonclient import http
from tritonclient.utils import triton_to_np_dtype
from tritonclient.utils import InferenceServerException
import torch
from clearml import InputModel, Task
import shutil
import pathlib

# Local modules
from cub_tools.trainer import ClearML_Ignite_Trainer
from cub_tools.args import get_parser
from cub_tools.config import get_cfg_defaults, get_key_value_dict
from cub_tools.triton import run_inference, get_model_info

In [2]:
model_repo_path = '/home/edmorris/projects/image_classification/caltech_birds/models/clearml_repo/ignite_resnet34'
#model_config = '/home/edmorris/projects/image_classification/caltech_birds/scripts/configs/torchvision/resnet34_config.yaml'
model_config = '/home/edmorris/projects/image_classification/caltech_birds/scripts/configs/pytorchcv/pnasnetlarge_config.yaml'

In [3]:

model = InputModel(model_id="d1a0a6c9a33f4d1da1bdf4f81f1595fa")
local_cache_path = model.get_local_copy()


2021-06-15 18:29:24,840 - clearml.storage - INFO - Downloading: 5.00MB / 316.32MB @ 16.74MBs from azure://clearmllibrary/artefacts/Caltech Birds%2FTraining/TRAIN [Network%3A pnasnet5large, Library%3A pytorchcv] Ignite Train PyTorch CNN on CUB200.52c46d63fe13483b960b4db3d2e914a4/models/cub200_pnasnet5large_ignite_best_model_0.pt
2021-06-15 18:29:25,052 - clearml.storage - INFO - Downloading: 13.00MB / 316.32MB @ 37.64MBs from azure://clearmllibrary/artefacts/Caltech Birds%2FTraining/TRAIN [Network%3A pnasnet5large, Library%3A pytorchcv] Ignite Train PyTorch CNN on CUB200.52c46d63fe13483b960b4db3d2e914a4/models/cub200_pnasnet5large_ignite_best_model_0.pt
2021-06-15 18:29:25,169 - clearml.storage - INFO - Downloading: 21.00MB / 316.32MB @ 68.41MBs from azure://clearmllibrary/artefacts/Caltech Birds%2FTraining/TRAIN [Network%3A pnasnet5large, Library%3A pytorchcv] Ignite Train PyTorch CNN on CUB200.52c46d63fe13483b960b4db3d2e914a4/models/cub200_pnasnet5large_ignite_best_model_0.pt
2021-06-

In [4]:
local_model_path = shutil.move(src=local_cache_path, dst=os.path.join(model_repo_path,pathlib.Path(local_cache_path).name))

In [5]:
cmd_args = [
    'DIRS.CLEAN_UP', False,     # Don't do anything to the directory structure.
    'MODEL.PRETRAINED', False,  # Don't load default weights, as we want to load our own.
    ]  
trainer = ClearML_Ignite_Trainer(task=None, config=model_config, cmd_args=cmd_args) # TODO Get config from clearml task at some point. From model.task to get task ID, and then pull the task config.


[INFO] Parameters Override:: ['DIRS.CLEAN_UP', False, 'MODEL.PRETRAINED', False]
DATA:
  DATA_DIR: /home/edmorris/projects/image_classification/caltech_birds/data/images
  NUM_CLASSES: 200
  TEST_DIR: test
  TRAIN_DIR: train
  TRANSFORMS:
    PARAMS:
      AGGRESIVE:
        persp_distortion_scale: 0.25
        rotation_range: (-10.0, 10.0)
        type: all
      DEFAULT:
        img_crop_size: 331
        img_resize: 512
    TYPE: default
DIRS:
  CLEAN_UP: False
  ROOT_DIR: /home/edmorris/projects/image_classification/caltech_birds
  WORKING_DIR: /home/edmorris/projects/image_classification/caltech_birds/models/classification/ignite_pnasnet5large
EARLY_STOPPING_PATIENCE: 5
MODEL:
  MODEL_LIBRARY: pytorchcv
  MODEL_NAME: pnasnet5large
  PRETRAINED: False
  WITH_AMP: False
  WITH_GRAD_SCALE: False
SYSTEM:
  LOG_HISTORY: True
TRAIN:
  BATCH_SIZE: 16
  LOSS:
    CRITERION: CrossEntropy
  NUM_EPOCHS: 40
  NUM_WORKERS: 4
  OPTIMIZER:
    PARAMS:
      lr: 0.001
      momentum: 0.9
      ne

In [6]:
# Get a sample dataset for running inference with
trainer.create_datatransforms()
trainer.create_dataloaders(shuffle={'train' : True, 'test' : True})

***********************************************
**            DATASET SUMMARY                **
***********************************************
train  size::  5994  images
test  size::  5794  images
Number of classes::  200
***********************************************
[INFO] Created data loaders.


In [7]:
trainer.create_model(load_to_device=False)

      0
         Conv2d-1051          [16, 864, 11, 11]           7,776
         Conv2d-1052          [16, 864, 11, 11]         746,496
        DwsConv-1053          [16, 864, 11, 11]               0
    BatchNorm2d-1054          [16, 864, 11, 11]           1,728
     NasDwsConv-1055          [16, 864, 11, 11]               0
           ReLU-1056          [16, 864, 11, 11]               0
         Conv2d-1057          [16, 864, 11, 11]           7,776
         Conv2d-1058          [16, 864, 11, 11]         746,496
        DwsConv-1059          [16, 864, 11, 11]               0
    BatchNorm2d-1060          [16, 864, 11, 11]           1,728
     NasDwsConv-1061          [16, 864, 11, 11]               0
      DwsBranch-1062          [16, 864, 11, 11]               0
           ReLU-1063          [16, 864, 21, 21]               0
         Conv2d-1064          [16, 864, 11, 11]         746,496
    BatchNorm2d-1065          [16, 864, 11, 11]           1,728
        NasConv-1066          [1

In [8]:
trainer.update_model_from_checkpoint(checkpoint_file=local_model_path)

      0
         Conv2d-1051          [16, 864, 11, 11]           7,776
         Conv2d-1052          [16, 864, 11, 11]         746,496
        DwsConv-1053          [16, 864, 11, 11]               0
    BatchNorm2d-1054          [16, 864, 11, 11]           1,728
     NasDwsConv-1055          [16, 864, 11, 11]               0
           ReLU-1056          [16, 864, 11, 11]               0
         Conv2d-1057          [16, 864, 11, 11]           7,776
         Conv2d-1058          [16, 864, 11, 11]         746,496
        DwsConv-1059          [16, 864, 11, 11]               0
    BatchNorm2d-1060          [16, 864, 11, 11]           1,728
     NasDwsConv-1061          [16, 864, 11, 11]               0
      DwsBranch-1062          [16, 864, 11, 11]               0
           ReLU-1063          [16, 864, 21, 21]               0
         Conv2d-1064          [16, 864, 11, 11]         746,496
    BatchNorm2d-1065          [16, 864, 11, 11]           1,728
        NasConv-1066          [1

In [10]:
#model_conf, model_meta = get_model_info(model_name='cub200_resnext101', VERBOSE=False)

for i in np.arange(1,10,1):

    # Get a validation batch
    X, y = next(iter(trainer.val_loader))
    # Set the model into eval mode
    trainer.model.eval()
    # Push input images to gpu
    X_gpu = X.to(trainer.device)
    # Run inference on validatgion batch image
    y_prob_pred = trainer.model(X_gpu)
    # Get predicted classes
    _, y_pred = torch.max(y_prob_pred, 1)

    # Get Triton served predicted classes
    y_pred_proba_remote, y_pred_remote = run_inference(X.numpy(), X.shape, model_name='cub200_pnasnet', VERBOSE=False)

    print('Result:: \ty\t\t:: {} \n\t \ty_pred[local]\t:: {} \n\t \ty_pred[triton]\t:: {} '.format(y.numpy(),y_pred.cpu().numpy(),y_pred_remote))
    print('')

InferenceServerException: unexpected shape for input 'INPUT__0' for model 'cub200_pnasnet'. Expected [-1,3,224,224], got [2,3,331,331]

In [None]:
model_conf

In [None]:
model_meta['name']

In [None]:
for key in model_meta.keys():
    print('{}   ::  {}'.format(key,model_meta[key]))