In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from sklearn.metrics import accuracy_score
from PIL import Image

from efficientnet import EfficientNet
from efficientnet_v2 import EfficientNetV2


In [None]:
def eval_model(model, dataloader, device, criterion=None):
    loss_value = []
    y_pred = []
    y_true = []

    model.eval()
    with torch.no_grad():
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            if out.size(1) == 1:
                # regression
                out = torch.squeeze(out, 1)

            if criterion is not None:
                loss = criterion(out, yb)
                loss_value.append(loss.item())

            y_pred.append(out.detach().cpu())
            y_true.append(yb.detach().cpu())

    if criterion is not None:
        loss_value = sum(loss_value) / len(loss_value)
        return torch.cat(y_pred), torch.cat(y_true), loss_value
    else:
        return torch.cat(y_pred), torch.cat(y_true)


## EfficientNetV2

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

modelname = 's'
in_spatial_shape = EfficientNetV2._models[modelname]['eval_size']

# Setting tf_style_conv=True and in_spatial_shape only necessary when evaluating against Imagenet dataset
# Model names: 'b0, 'b1', 'b2', 'b3', 's', 'm', 'l', 'xl'
model = EfficientNetV2(modelname,
                       tf_style_conv=True,
                       in_spatial_shape=in_spatial_shape,
                       pretrained=True,
                       progress=True)
model.to(device)

val_trainsforms = transforms.Compose([
    transforms.Resize(in_spatial_shape,
                      interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.CenterCrop(in_spatial_shape),
    transforms.ToTensor(),
    transforms.Normalize(mean=0.5,
                         std=0.5),
])

val_dataset = datasets.ImageNet(root="/path/to/imagenet/val/subset", split="val",
                                transform=val_trainsforms)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=32, shuffle=False,
    num_workers=2, pin_memory=True)


In [None]:
y_pred, y_true = eval_model(model, val_loader, device)
_, y_pred = torch.max(y_pred, 1)

score = accuracy_score(y_pred, y_true)
print("Accuracy: {:.3%}".format(score))


Expected evaluation metric values on ImageNet validation set 

EfficientNetV2-b0 - 77.590% <br>
EfficientNetV2-b1 - 78.872% <br>
EfficientNetV2-b2 - 79.388% <br>
EfficientNetV2-b3 - 82.260% <br>
EfficientNetV2-S - 84.282% <br>
EfficientNetV2-M - 85.596% <br>
EfficientNetV2-L - 86.298% <br>
EfficientNetV2-XL - 86.414% <br>

## EfficientNetV1

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# EfficientNet model index, i.e. 0 for for EfficientNet-B0
idx = 0
model = EfficientNet(idx, pretrained=True, progress=True)
model.to(device)

val_trainsforms = transforms.Compose([
    transforms.Resize(model.in_spatial_shape[0], interpolation=Image.BICUBIC),
    transforms.CenterCrop(model.in_spatial_shape),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])


val_dataset = datasets.ImageNet(root="path/to/imagenet/dataset", split="val",
                                transform=val_trainsforms)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=32, shuffle=False,
    num_workers=1, pin_memory=True)


In [None]:
y_pred, y_true = eval_model(model, val_loader, device)
_, y_pred = torch.max(y_pred, 1)

score = accuracy_score(y_pred, y_true)
print("Accuracy: {:.3%}".format(score))


Expected evaluation metric values on ImageNet validation set 

EfficientNet-B0 - 76.43% <br>
EfficientNet-B1 - 78.396% <br>
EfficientNet-B2 - 79.804% <br>
EfficientNet-B3 - 81.542% <br>
EfficientNet-B4 - 83.036% <br>
EfficientNet-B5 - 83.79% <br>
EfficientNet-B6 - 84.136% <br>
EfficientNet-B7 - 84.578% <br>