In [1]:
import os
import logging
import requests
import tarfile
import time
import numpy as np
import pandas
from torchvision.io import read_image
import torch
from torchvision.models import (
    resnet50, 
    ResNet50_Weights, 
    regnet_y_16gf, 
    RegNet_Y_16GF_Weights, 
    mobilenet_v3_large, 
    MobileNet_V3_Large_Weights, 
    vit_h_14, 
    ViT_H_14_Weights, 
    convnext_tiny, 
    ConvNeXt_Tiny_Weights
)
from PIL import Image

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"

In [2]:
DATASET_PATH = "./dataset"
DATA_PATH = "./data"
DATASET = {
    'test_files_url': 'https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar',
    'test_files_tar': os.path.join(DATASET_PATH, 'ILSVRC2012_img_val.tar'),
    'test_files': os.path.join(DATA_PATH, 'test_files'),
    'ground_truth_url': 'http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz',
    'ground_truth_tar_gz': os.path.join(DATASET_PATH, 'caffe_ilsvrc12.tar.gz'),
    'ground_truth': DATA_PATH,
    'ground_truth_file':  os.path.join(DATA_PATH, 'val.txt'),
}
LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO')
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_FILES = 50000
print(DEVICE)


cuda


In [3]:
os.makedirs(DATASET_PATH, exist_ok=True)
os.makedirs(os.path.join(DATA_PATH, 'test_files'), exist_ok=True)


In [4]:
def get_logger(class_name: str, log_level: str = LOG_LEVEL):
    logging.basicConfig()
    logger = logging.getLogger(class_name)
    logger.setLevel(log_level)
    return logger

log = get_logger('main')

In [5]:
def download(url: str, output: str):
    if not os.path.exists(output):    
        with open(output, 'wb') as compressed_file:
            log.info(f"Downloading file: {url}")
            response = requests.get(url, stream=True)
            for chunk in response.iter_content(chunk_size=1024): 
                if chunk: # filter out keep-alive new chunks
                    compressed_file.write(chunk)
        log.info(f"Downloadedfile: {output}")
        return True
    log.info(f"File already exists: {output}")
    return False

def extract_all_files(tar_file_path: str, extract_to: str):
    with tarfile.open(tar_file_path, 'r') as tar:
        log.info(f"Extracting: {tar_file_path}")
        tar.extractall(extract_to)
        log.info(f"Extracted to: {extract_to}")

In [6]:
#download(DATASET['test_files_url'], DATASET['test_files_tar'])
#download(DATASET['ground_truth_url'], DATASET['ground_truth_tar_gz'])

In [7]:
#extract_all_files(DATASET['ground_truth_tar_gz'], DATASET['ground_truth'])
#extract_all_files(DATASET['test_files_tar'], DATASET['test_files'])


In [8]:

test_files_dir = DATASET['test_files']
ground_truth_file = DATASET['ground_truth_file']

# Builds the filename_to_class dictionary, which will serve as a ground truth
# The key is the filename, and the value is the ground truth class ID

files = sorted([f for f in os.listdir(test_files_dir) if os.path.isfile(os.path.join(test_files_dir, f))])
files = files[:NUM_FILES]
filename_to_class = {}

with open(ground_truth_file, 'r') as ground_truth:
  i = 0
  for line in ground_truth:
    if (i == NUM_FILES):
      break
    filename_to_class[files[i]] = int(line.strip().split(' ')[1])
    i += 1

In [9]:
model_metadata = [
    {
        'name': 'ResNet50',
        'params': 25557032,
        'gflops': 4.09,
        'year': 2015,
        'size': 97800000,
        'model': resnet50,
        'weights': ResNet50_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'RegNet_Y_16GF',
        'params': 83590140,
        'gflops': 15.91,
        'year': 2020,
        'size': 319500000,
        'model': regnet_y_16gf,
        'weights': RegNet_Y_16GF_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    {
        'name': 'MobileNet_V3_Large_Weights',
        'params': 5483032,
        'gflops': 0.22,
        'year': 2019,
        'size': 21100000,
        'model': mobilenet_v3_large,
        'weights': MobileNet_V3_Large_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0
    },
    #{
    #    'name': 'ViT_H_14',
    #    'params': 633470440,
    #    'gflops': 1016.72,
    #    'year': 2021,
    #    'size': 2416600000,
    #    'model': vit_h_14,
    #    'weights': ViT_H_14_Weights.DEFAULT,
    #    'accuracy': 0,
    #    'inference': 0
    #},
    {
        'name': 'ConvNeXt_Tiny_Weights',
        'params': 28589128,
        'gflops': 4.46,
        'year': 2022,
        'size': 109100000,
        'model': convnext_tiny,
        'weights': ConvNeXt_Tiny_Weights.DEFAULT,
        'accuracy': 0,
        'inference': 0,
    }
]

In [10]:
def get_model_accuracy(metadata, weights):
  torch.cuda.empty_cache()
  model = metadata['model'](weights=weights)
  model.to(DEVICE)
  model.eval()
  
  # Step 2: Initialize the inference transforms
  preprocess = weights.transforms()

  right = 0
  wrong = 0
  time_elapsed: float = 0
  data = [
  ]


  
  for file in files:
    img = Image.open(test_files_dir + '/' + file)

    img = img.convert('RGB')
   
    start = time.time()
    # Step 3: Apply inference preprocessing transforms
    batch = preprocess(img).unsqueeze(0).to(DEVICE)


    # Step 4: Use the model and print the predicted category

    prediction = model(batch).squeeze(0).softmax(0)
    end = time.time()
    time_elapsed = time_elapsed + (end - start)
    class_id = prediction.argmax().item()
    if (class_id == filename_to_class[file]):
      right += 1
    else:
      wrong += 1
    index = right + wrong
    score = prediction[class_id].item()
    category_name = weights.meta["categories"][class_id]
    if not (right + wrong) % 1000:
      log.info(f'{index}\tAccuracy: {right/(index)}')
    data.append({
      'index': index,
      'accuracy': right/(index),
      'inference_time': end - start,
      'category_name': category_name,
      'score': score,
      'model': metadata['name']
    })

  # TODO: Return a tuple of (inference_time, accuracy) instead of just accuracy and populate the metadata with inference as well
  # INFERENCE SHOULD BE AN AVERAGE PER PREDICTION, AND NOT THE SUM OF THEM ALL
  accuracy = right/(index)
  inference_avg = time_elapsed/(index)
  return (accuracy, inference_avg, data)

In [11]:
for metadata in model_metadata:
  weights = metadata['weights']
  
  # Step 1: Initialize model with the best available weights

  metadata['accuracy'], metadata['inference'], data = get_model_accuracy(metadata, weights)
  df = pandas.DataFrame.from_records(data)
  df.to_csv(f'{metadata["name"]}.csv', index=False)


INFO:main:1000	Accuracy: 0.801
INFO:main:1000	Accuracy: 0.826
INFO:main:1000	Accuracy: 0.751
Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /home/fschwanck/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth
100.0%
INFO:main:1000	Accuracy: 0.819


In [12]:
df = pandas.DataFrame.from_records(model_metadata)
df.drop('model', axis=1)

Unnamed: 0,name,params,gflops,year,size,weights,accuracy,inference
0,ResNet50,25557032,4.09,2015,97800000,ResNet50_Weights.IMAGENET1K_V2,0.801,0.009014
1,RegNet_Y_16GF,83590140,15.91,2020,319500000,RegNet_Y_16GF_Weights.IMAGENET1K_V2,0.826,0.014883
2,MobileNet_V3_Large_Weights,5483032,0.22,2019,21100000,MobileNet_V3_Large_Weights.IMAGENET1K_V2,0.751,0.011017
3,ConvNeXt_Tiny_Weights,28589128,4.46,2022,109100000,ConvNeXt_Tiny_Weights.IMAGENET1K_V1,0.819,0.006469


In [13]:
df.to_csv('results.csv', index=False)