In [None]:
import os
import logging
import requests
import tarfile
import time
import numpy as np
import pandas
from torchvision.io import read_image
import torch
from torchvision.models import (
    resnet50, 
    ResNet50_Weights, 
    regnet_y_16gf, 
    RegNet_Y_16GF_Weights, 
    mobilenet_v3_large, 
    MobileNet_V3_Large_Weights, 
    vit_l_16, 
    ViT_L_16_Weights, 
    convnext_tiny, 
    ConvNeXt_Tiny_Weights,
    vgg16, 
    VGG16_Weights, 
    swin_s, 
    Swin_S_Weights, 
    googlenet, 
    GoogLeNet_Weights, 
    efficientnet_b3, 
    EfficientNet_B3_Weights, 
    wide_resnet101_2, 
    Wide_ResNet101_2_Weights
)
from PIL import Image

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"

In [None]:
DATASET_PATH = "./dataset"
DATA_PATH = "./data"
RESULTS_PATH = "./results"
DATASET = {
    'test_files_url': 'https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar',
    'test_files_tar': os.path.join(DATASET_PATH, 'ILSVRC2012_img_val.tar'),
    'test_files': os.path.join(DATA_PATH, 'test_files'),
    'ground_truth_url': 'http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz',
    'ground_truth_tar_gz': os.path.join(DATASET_PATH, 'caffe_ilsvrc12.tar.gz'),
    'ground_truth': DATA_PATH,
    'ground_truth_file':  os.path.join(DATA_PATH, 'val.txt'),
}
LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO')
DEVICE = 'cpu'
NUM_FILES = 10000
print(DEVICE)


In [None]:
os.makedirs(DATASET_PATH, exist_ok=True)
os.makedirs(RESULTS_PATH, exist_ok=True)
os.makedirs(os.path.join(DATA_PATH, 'test_files'), exist_ok=True)


In [None]:
def get_logger(class_name: str, log_level: str = LOG_LEVEL):
    logging.basicConfig()
    logger = logging.getLogger(class_name)
    logger.setLevel(log_level)
    return logger

log = get_logger('main')

In [None]:
def download(url: str, output: str):
    if not os.path.exists(output):    
        with open(output, 'wb') as compressed_file:
            log.info(f"Downloading file: {url}")
            response = requests.get(url, stream=True)
            for chunk in response.iter_content(chunk_size=1024): 
                if chunk: # filter out keep-alive new chunks
                    compressed_file.write(chunk)
        log.info(f"Downloadedfile: {output}")
        return True
    log.info(f"File already exists: {output}")
    return False

def extract_all_files(tar_file_path: str, extract_to: str):
    with tarfile.open(tar_file_path, 'r') as tar:
        log.info(f"Extracting: {tar_file_path}")
        tar.extractall(extract_to)
        log.info(f"Extracted to: {extract_to}")

In [None]:
#download(DATASET['test_files_url'], DATASET['test_files_tar'])
#download(DATASET['ground_truth_url'], DATASET['ground_truth_tar_gz'])

In [None]:
#extract_all_files(DATASET['ground_truth_tar_gz'], DATASET['ground_truth'])
#extract_all_files(DATASET['test_files_tar'], DATASET['test_files'])


In [None]:

test_files_dir = DATASET['test_files']
ground_truth_file = DATASET['ground_truth_file']

# Builds the filename_to_class dictionary, which will serve as a ground truth
# The key is the filename, and the value is the ground truth class ID

files = sorted([f for f in os.listdir(test_files_dir) if os.path.isfile(os.path.join(test_files_dir, f))])
files = files[:NUM_FILES]
filename_to_class = {}

with open(ground_truth_file, 'r') as ground_truth:
  i = 0
  for line in ground_truth:
    if (i == NUM_FILES):
      break
    filename_to_class[files[i]] = int(line.strip().split(' ')[1])
    i += 1

In [None]:
model_metadata = [
    {
        'name': 'ResNet50',
        'params': 25557032,
        'gflops': 4.09,
        'year': 2015,
        'size': 97800000,
        'model': resnet50,
        'weights': ResNet50_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'RegNet Y 16GF',
        'params': 83590140,
        'gflops': 15.91,
        'year': 2020,
        'size': 319500000,
        'model': regnet_y_16gf,
        'weights': RegNet_Y_16GF_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'MobileNet V3 Large Weights',
        'params': 5483032,
        'gflops': 0.22,
        'year': 2019,
        'size': 21100000,
        'model': mobilenet_v3_large,
        'weights': MobileNet_V3_Large_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'ViT L 16',
        'params': 304326632,
        'gflops': 61.55,
        'year': 2021,
        'size': 1161000000,
        'model': vit_l_16,
        'weights': ViT_L_16_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'ConvNeXt Tiny Weights',
        'params': 28589128,
        'gflops': 4.46,
        'year': 2022,
        'size': 109100000,
        'model': convnext_tiny,
        'weights': ConvNeXt_Tiny_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'VGG 16',
        'params': 138357544,
        'gflops': 15.47,
        'year': 2014,
        'size': 527800000,
        'model': vgg16,
        'weights': VGG16_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'Swin S',
        'params': 49606258,
        'gflops': 8.74,
        'year': 2021,
        'size': 189800000,
        'model': swin_s,
        'weights': Swin_S_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'GoogLe Net',
        'params': 6624904,
        'gflops': 1.50,
        'year': 2014,
        'size': 49700000,
        'model': googlenet,
        'weights': GoogLeNet_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'Efficient Net B3',
        'params': 12233232,
        'gflops': 1.83,
        'year': 2019,
        'size': 47200000,
        'model': efficientnet_b3,
        'weights': EfficientNet_B3_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'Wide ResNet 101',
        'params': 126886696,
        'gflops': 22.75,
        'year': 2016,
        'size': 242900000,
        'model': wide_resnet101_2,
        'weights': Wide_ResNet101_2_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    }

]

In [None]:
def get_model_accuracy(metadata, weights):
  torch.cuda.empty_cache()
  model = metadata['model'](weights=weights)
  model.to(DEVICE)
  model.eval()
  
  # Step 2: Initialize the inference transforms
  preprocess = weights.transforms()

  right = 0
  wrong = 0
  time_elapsed: float = 0
  data = [
  ]


  
  for file in files:
    img = Image.open(test_files_dir + '/' + file)

    img = img.convert('RGB')
   
    start = time.time()
    # Step 3: Apply inference preprocessing transforms
    batch = preprocess(img).unsqueeze(0).to(DEVICE)


    # Step 4: Use the model and print the predicted category

    prediction = model(batch).squeeze(0).softmax(0)
    end = time.time()
    time_elapsed = time_elapsed + (end - start)
    class_id = prediction.argmax().item()
    if (class_id == filename_to_class[file]):
      right += 1
    else:
      wrong += 1
    index = right + wrong
    score = prediction[class_id].item()
    category_name = weights.meta["categories"][class_id]
    if not (right + wrong) % 1000:
      log.info(f'{index}\tAccuracy: {right/(index)}')
    data.append({
      'index': index,
      'accuracy': right/(index),
      'inference_time': end - start,
      'category_name': category_name,
      'score': score,
      'model': metadata['name'],
      'device': DEVICE
    })

  # TODO: Return a tuple of (inference_time, accuracy) instead of just accuracy and populate the metadata with inference as well
  # INFERENCE SHOULD BE AN AVERAGE PER PREDICTION, AND NOT THE SUM OF THEM ALL
  accuracy = right/(index)
  inference_avg = time_elapsed/(index)
  return (accuracy, inference_avg, data)

In [None]:
for metadata in model_metadata:
  weights = metadata['weights']
  
  # Step 1: Initialize model with the best available weights

  metadata['accuracy'], metadata['inference'], data = get_model_accuracy(metadata, weights)
  df = pandas.DataFrame.from_records(data)
  df.to_csv(os.path.join(RESULTS_PATH, f'{metadata["name"]}-{DEVICE}.csv'), index=False)


In [None]:
df = pandas.DataFrame.from_records(model_metadata)
df = df.drop('model', axis=1)

In [None]:
df.to_csv(os.path.join(RESULTS_PATH, f'results-{DEVICE}.csv'), index=False)