# Neuron Model Prediction Demo - Computer Vision
---

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import cv2
import numpy as np
import cv_helper_class
from cv_helper_class import ImgClassificationNet, VisionTransformerNet
import matplotlib.pyplot as plt
from common import preprocess_img
import torch
import torch_neuronx
from torchvision import models

<br>

## 1. Image classification (VGG, ResNet, ResNeXT, EfficientNet)

In [None]:
IMGCLS_NETWORK = "RESNET"
assert(IMGCLS_NETWORK in ["VGG", "RESNET", "RESNEXT", "EFFICIENTNET"])

if IMGCLS_NETWORK == "VGG":
    ## Choose the network size
    VGG_SIZE = "11" # can be 11,11_bn,13,13_bn,16,16_bn,19,19_bn
    assert(VGG_SIZE in ['11','11_bn','13','13_bn','16','16_bn','19','19_bn'])
    model_name = f"models.vgg{VGG_SIZE}"
elif IMGCLS_NETWORK == "RESNET":
    RESNET_SIZE = 50 # can be 18,34,50,101,152   
    assert(RESNET_SIZE in [18,34,50,101,152])
    model_name = f"models.resnet{RESNET_SIZE}"
elif IMGCLS_NETWORK == "RESNEXT":
    RESNEXT_SIZE="50_32x4d" # can be 50_32x4d,101_32x8d,101_64x4d
    assert(RESNEXT_SIZE in ['50_32x4d','101_32x8d','101_64x4d'])
    model_name=f"models.resnext{RESNEXT_SIZE}"
elif IMGCLS_NETWORK == "EFFICIENTNET":
    EFFICIENTNET_SIZE = 0 # can be 0,1,2,3,4,5,6,7
    assert(EFFICIENTNET_SIZE in range(8))
    model_name = f"models.efficientnet_b{EFFICIENTNET_SIZE}"
    
model_name_eval = eval(model_name)
model = model_name_eval(pretrained=True) 
imgclass_net = ImgClassificationNet(model=model, model_name=model_name)

### Load Neuron model

In [None]:
#imgclass_net.analyze()
imgclass_net.compile()
#imgclass_net.load("neuron_models.resnet50.pt")

In [None]:
img1 = preprocess_img.load_sample_imgA()

### Predict single image

In [None]:
%matplotlib inline
resize_img, y_pred, y_str, y_prob = imgclass_net.get_single_predict_result(img1)
plt.figure(figsize=(10,10))
plt.imshow(cv2.cvtColor(resize_img, cv2.COLOR_BGR2RGB))

### Predict multiple images

In [None]:
# img1 = preprocess_img.load_sample_imgA()
# img2 = preprocess_img.load_sample_imgE()
# img1 = preprocess_img.preprocess_imagenet(img1)
# img2 = preprocess_img.preprocess_imagenet(img2)
# img_list = [img1, img2]
# import numpy as np
# x = np.concatenate(img_list, axis=0)
# outputs = imgclass_net.predict(x, data_parallel=True)
# print(outputs.shape)


<br>

## 2. Vision Transformers

In [None]:
import types
from transformers import ViTFeatureExtractor, ViTForImageClassification

model_name = "vit-base-patch16-224"
model_type = "vit"
model = ViTForImageClassification.from_pretrained(f"google/{model_name}")
vit_net = VisionTransformerNet(model=model, model_name=model_name, model_type=model_type)


### Load Neuron model

In [None]:
#vit_net.analyze()
vit_net.compile()

In [None]:
img1 = preprocess_img.load_sample_imgA()
img2 = preprocess_img.load_sample_imgB()

### Predict single image

In [None]:
%matplotlib inline
resize_img, y_pred, y_str, y_prob = vit_net.get_single_predict_result(img1)
plt.figure(figsize=(10,10))
plt.imshow(cv2.cvtColor(resize_img, cv2.COLOR_BGR2RGB))

### Predict multiple images

In [None]:
# feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
# #x = feature_extractor(img1, return_tensors="pt")['pixel_values']
# x = feature_extractor([img1, img2], return_tensors="pt")['pixel_values'] # multiple images
# outputs = vit_net.predict(x, data_parallel=True)
# outputs.shape