In [1]:
import os
import io
import neptune
import torch
import requests
import json
import time
import numpy as np
import torchvision.transforms as transforms
from io import BytesIO
from PIL import Image
from tqdm import tqdm
from time import perf_counter_ns
import nncompression.utils as nnu
from nncompression.experiments import utils as exu
from nncompression.models.pytorch.utils import get_imagenet_val_loader
from nncompression.utils import IMAGENET_LABELS, DEVICE

In [2]:
BASE_URL = "http://192.168.0.28:5000/"

r = requests.get(BASE_URL+'avail_models')
AVAILABLE_MODELS = r.json()['available_models']

In [3]:

model = AVAILABLE_MODELS[3]

r = requests.post(BASE_URL+f'set_model/{model}', json={'api_key': 'password'})
model_info = r.json()


In [4]:
model_info

{'model_name': 'resnet101',
 'non-trainable_params': 0,
 'total_input_size': 0.57421875,
 'total_output_size': 429.73028564453125,
 'total_params': 44549160,
 'total_params_size': 169.94155883789062,
 'total_size': 600.2460632324219,
 'trainable_params': 44549160}

In [5]:
neptune.init('davidturner94/deployment-eval')

PARAMS = model_info

experiment = neptune.create_experiment(name=model, params=PARAMS)

NVMLError: NVML Shared Library Not Found - GPU usage metrics may not be reported.


https://ui.neptune.ai/davidturner94/deployment-eval/e/DEP-4


In [6]:
url = BASE_URL+'inference'

os.environ['NO_PROXY'] = '127.0.0.1'

val_loader = get_imagenet_val_loader('data/imagenet', batch_size=1)

batch_time = exu.AverageMeter('Time', ':6.3f')
inference_time = exu.AverageMeter('inference_time', ':6.3f')
latency_out = exu.AverageMeter('Latency_out', ':6.3f')
latency_back = exu.AverageMeter('Latency_back', ':6.3f')
power_usage = exu.AverageMeter('Power_in_watts', ':.1f')
gpu_utilization = exu.AverageMeter('GPU_utilization', ':.1f')
top1 = exu.AverageMeter('Acc@1', ':6.2f')
top5 = exu.AverageMeter('Acc@5', ':6.2f')

progress = exu.ProgressMeter(
    len(val_loader),
    [batch_time, top1, top5, inference_time, latency_out, latency_back, power_usage, ],
    prefix='Test: ')

print_freq = 5000
request_type = 'pil'

class_correct = list({'top1': 0., 'top5': 0., 'total': 0.} for _ in range(len(IMAGENET_LABELS)))

# Request Loop

In [7]:
with torch.no_grad():
    end = time.time()
    for i, (images, labels) in enumerate(val_loader):
        
        # check if flag is json vs pil
        if request_type == 'json':
            im = images[0]
            payload = json.dumps({"image": im.tolist()}).encode('utf-8')
            time_request_sent = time.time()
            r = requests.post(url, data=payload)
        elif request_type == 'pil':
            im = transforms.ToPILImage()(images[0])

            # convert pil image to bytes
            with BytesIO() as output:
                im.save(output, 'JPEG')
                data = output.getvalue()


            payload = {
                'files': (
                    '1.jpeg',
                    data,
                    'image/jpeg'
                )
            }
            time_request_sent = time.time()
            r = requests.post(url, files=payload)
        time_response_recieved = time.time()
        r_json = r.json()
        
        outputs = torch.tensor(
            r_json['result']['prediction_raw']).unsqueeze(0)
        
        outputs, labels = outputs.to(DEVICE), labels.to(DEVICE)
        
        acc1, acc5 = exu.accuracy(outputs, labels, topk=(1, 5))
        
#         print(torch.nn.functional.softmax(outputs).topk(5))
        
        class_correct[labels[0].item()]['total'] += 1
        
        if acc1[0] == 100.:
            class_correct[labels[0].item()]['top1'] += 1   
        
        if acc5[0] == 100.:
            class_correct[labels[0].item()]['top5'] += 1   
            
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))
        
        batch_time.update(time.time() - end)
        end = time.time()
        
        inference_time.update(r_json['result']['inference_time_ms'])
        
        power_usage.update(r_json['meta']['cuda_info']['gpu'][0]['power_readings']['power_draw'], images.size(0))
        gpu_utilization.update(r_json['meta']['cuda_info']['gpu'][0]['utilization']['gpu_util'], images.size(0))
        
        latency_out.update(r_json['meta']['time_request_recieved']-time_request_sent)
        latency_back.update(time_response_recieved-r_json['meta']['time_response_sent'])
        
        if i % print_freq == 0:
            progress.display(i)
        
        neptune.log_metric('batch_time', batch_time.val)
        neptune.log_metric('inference_time', inference_time.val)
        neptune.log_metric('latency_out', latency_out.val)
        neptune.log_metric('latency_back', latency_back.val)
        neptune.log_metric('top1_accuracy_avg', top1.avg)
        neptune.log_metric('top1_accuracy_raw', top1.val)
        neptune.log_metric('top5_accuracy_avg', top5.avg)
        neptune.log_metric('top5_accuracy_raw', top5.val)
        neptune.log_metric('gpu_power_w', power_usage.val)
        neptune.log_metric('gpu_util', gpu_utilization.val)
    
    results = {
       r_json['meta']['model']['name']: {
           'batch_time': {
               'avg': batch_time.avg,
               'max': batch_time.max,
               'min': batch_time.min
           },
           'inference_time': {
               'avg': inference_time.avg,
               'max': inference_time.max,
               'min': inference_time.min
           },
           'latency_out': {
               'avg': latency_out.avg,
               'max': latency_out.max,
               'min': latency_out.min
           },
           'latency_back': {
               'avg': latency_back.avg,
               'max': latency_back.max,
               'min': latency_back.min
           },
           'top1_accuracy': {
               'avg': top1.avg.item(),
           },
           'top5_accuracy': {
               'avg': top5.avg.item(),
           },
           'gpu_power_usage': {
               'avg': power_usage.avg,
               'max': power_usage.max,
               'min': power_usage.min
           },
           'gpu_util': {
               'avg': gpu_utilization.avg,
               'max': gpu_utilization.max,
               'min': gpu_utilization.min
           },
       } 
    }
    neptune.log_text('results',json.dumps(results))
    class_accuracy = '\n'.join([json.dumps(dict(x, **{'top1_accuracy': 100*x['top1']/x['total'], 'top5_accuracy': 100*x['top5']/x['total']},**{"class":IMAGENET_LABELS[i]})) for i,x in enumerate(class_correct)])
    neptune.log_text('class_accuracy', class_accuracy)
neptune.stop()

Test: [    0/50000]	Time  0.128 ( 0.128)	Acc@1 100.00 (100.00)	Acc@5 100.00 (100.00)	inference_time  0.019 ( 0.019)	Latency_out  0.030 ( 0.030)	Latency_back  0.009 ( 0.009)	Power_in_watts 57.9 (57.9)
Test: [ 1000/50000]	Time  0.090 ( 0.101)	Acc@1 100.00 ( 85.21)	Acc@5 100.00 ( 96.70)	inference_time  0.016 ( 0.019)	Latency_out  0.035 ( 0.037)	Latency_back  0.013 ( 0.014)	Power_in_watts 62.0 (60.3)
Test: [ 2000/50000]	Time  0.088 ( 0.099)	Acc@1 100.00 ( 81.46)	Acc@5 100.00 ( 96.45)	inference_time  0.018 ( 0.019)	Latency_out  0.042 ( 0.037)	Latency_back  0.004 ( 0.012)	Power_in_watts 61.1 (60.6)
Test: [ 3000/50000]	Time  0.114 ( 0.099)	Acc@1   0.00 ( 78.84)	Acc@5 100.00 ( 94.87)	inference_time  0.019 ( 0.019)	Latency_out  0.042 ( 0.038)	Latency_back  0.014 ( 0.012)	Power_in_watts 61.9 (60.9)
Test: [ 4000/50000]	Time  0.084 ( 0.099)	Acc@1 100.00 ( 76.41)	Acc@5 100.00 ( 94.40)	inference_time  0.016 ( 0.019)	Latency_out  0.037 ( 0.038)	Latency_back  0.002 ( 0.011)	Power_in_watts 62.8 (61.2)


AttributeError: module 'neptune' has no attribute 'end'