In [1]:
import daal4py.sklearn
import logging
import numpy as np
import os
import pandas
import torch
import torch.nn as nn
import torch.nn.functional as F
import tqdm
import warnings

from ads import set_auth
from ads.common.model import prepare_generic_model
from oci.auth.signers import get_resource_principals_signer
from oci.data_science import DataScienceClient
from os import path
from os import popen
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tempfile import mkdtemp
from torch.autograd import Variable

warnings.filterwarnings('ignore')
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)



In [2]:
from pytorch_benchmark import benchmark

In [3]:
import yaml
from torchvision.models import efficientnet_b0, efficientnet_b1 # up to b7
from pytorch_benchmark import benchmark # benchmarking library
# we create our b0 efficientnet. We can test with b0...b7
model = efficientnet_b0()
sample = torch.randn(8, 3, 224, 224)  # (B, C, H, W), C*H*W means channel, height, width for each image



def benchmark_efficientnet():

    model = efficientnet_b0()

    if torch.cuda.is_available():
        model = model.cuda()

    sample = torch.randn(2, 3, 224, 224)  # (B, C, H, W)

    results = benchmark(
        model=model,
        sample=sample,
        num_runs=1000,
        batch_size=8,
        print_details=True
    )

    for prop in {"device", "flops", "params", "timing"}:
        assert prop in results

    return yaml.dump(results, indent=4)

In [4]:
result = benchmark_efficientnet()
print(result)

Warming up with batch_size=1: 100%|██████████| 1/1 [00:00<00:00,  9.72it/s]


EfficientNet(
  5.29 M, 100.000% Params, 401.67 MMac, 100.000% MACs, 
  (features): Sequential(
    4.01 M, 75.778% Params, 400.33 MMac, 99.665% MACs, 
    (0): ConvNormActivation(
      928, 0.018% Params, 11.64 MMac, 2.898% MACs, 
      (0): Conv2d(864, 0.016% Params, 10.84 MMac, 2.698% MACs, 3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, 0.001% Params, 802.82 KMac, 0.200% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, inplace=True)
    )
    (1): Sequential(
      1.45 k, 0.027% Params, 11.64 MMac, 2.898% MACs, 
      (0): MBConv(
        1.45 k, 0.027% Params, 11.64 MMac, 2.898% MACs, 
        (block): Sequential(
          1.45 k, 0.027% Params, 11.64 MMac, 2.898% MACs, 
          (0): ConvNormActivation(
            352, 0.007% Params, 4.42 MMac, 1.099% MACs, 
            (0): Conv2d(288, 0.005% Params, 3.61 MMac, 0.899% MACs, 32, 32, kernel_size=(3

Warming up with batch_size=1: 100%|██████████| 100/100 [00:02<00:00, 34.05it/s]
Measuring inference for batch_size=1: 100%|██████████| 1000/1000 [00:30<00:00, 32.28it/s]
ERROR:torch-benchmark:Unable to measure energy consumption. Device must be a NVIDIA Jetson.
Warming up with batch_size=8: 100%|██████████| 100/100 [00:04<00:00, 20.84it/s]
Measuring inference for batch_size=8: 100%|██████████| 1000/1000 [00:44<00:00, 22.45it/s]
ERROR:torch-benchmark:Unable to measure energy consumption. Device must be a NVIDIA Jetson.


device: cpu
flops: 401669732
machine_info:
    cpu:
        architecture: x86_64
        cores:
            physical: 8
            total: 16
        frequency: 0.00 GHz
        model: Intel(R) Xeon(R) Platinum 8167M CPU @ 2.00GHz
    gpus: null
    memory:
        available: 112.34 GB
        total: 117.76 GB
        used: 4.25 GB
    system:
        node: 566747361fb0
        release: 4.14.35-2047.511.5.2.el7uek.x86_64
        system: Linux
params: 5288548
timing:
    batch_size_1:
        on_device_inference:
            human_readable:
                batch_latency: 30.574 ms +/- 3.174 ms [25.696 ms, 38.772 ms]
                batches_per_second: 33.07 +/- 3.47 [25.79, 38.92]
            metrics:
                batches_per_second_max: 38.91722570169334
                batches_per_second_mean: 33.0664472736708
                batches_per_second_min: 25.791737895241727
                batches_per_second_std: 3.4665513177126184
                seconds_per_batch_max: 0.038772106170654