### Things to fix/implement this PR:
* [x] The inferance is not being run on the GPU despite setting the torch.device to GPU
* [x] Allow optional parameter to specify batch size for batch scoring
* [x] Constructor option to scorer class for specifying version of ResNet to use
* [x] Use formal logging for scorer instead of print statements.

In [1]:
import pandas as pd
from pathlib import Path
from dataclasses import dataclass
from pprint import pprint

from PIL import Image

from modeling_notebooks.resnet_selection.resnet_scorer import OtterScorer, ResNetVersions

import torch
import torchvision

oon_data_dir = Path("/mnt/a/data/ott-or-not")
naro = Path("/mnt/a/data/ott-or-not/north_american_river_otter_100")

In [2]:
if torch.cuda.is_available():
    notebook_device = torch.device("cuda")
    print(f"Using CUDA backend on device '{torch.cuda.get_device_name(0)}'.")
else:
    notebook_device = torch.device("cpu")
    print("Using CPU backend.")

Using CUDA backend on device 'NVIDIA GeForce RTX 3080 Ti'.


## Record runtimes for model-device combinations

In [3]:
# version = ResNetVersions.v152
# device = torch.device("cpu")
# device = torch.device("cuda")
# scorer = OtterScorer(resnet_version=version, device=device)
# scorer = OtterScorer(resnet_version=version)

In [4]:
# Test batching by reading all the files in the NARO directory

# Resnet-18
# 9.0s on GPU
# 11.0s on CPU

# Resnet-34
# 10.8s on GPU
# 13.1s on CPU

# Resnet-50
# 9.3 on GPU
# 19.6 on CPU

# Resnet-101
# 11.4 on GPU
# 25.4 on CPU

# Resnet-152  Note: this model does not fit in the GPU's memory
# 19m 35.0s on GPU
# 31.6 on CPU

# naro_results = scorer.score_images_in_directory(oon_data_dir, batch_size=64)
# len(naro_results)

## Benchmark performance for each model version
We want to make sure that our selected model will percorm well across all species of otter we want to identify. Later we will also add some similar-but-different examples to classify as well (e.g. beaver, weasel, platypus)

In [5]:
version_device_pairs = [
    (ResNetVersions.v18, torch.device("cuda")),
    (ResNetVersions.v34, torch.device("cuda")),
    (ResNetVersions.v50, torch.device("cuda")),
    (ResNetVersions.v101, torch.device("cuda")),
    (ResNetVersions.v152, torch.device("cpu")),
]

In [6]:
notebook_device.type

'cuda'

In [7]:
model_result_records = list()

for version, device in version_device_pairs:
    print(f"Scoring with {version.value} on {device.type}")
    scorer = OtterScorer(resnet_version=version, device=device)
    results = scorer.score_images_in_directory(oon_data_dir, batch_size=64, silent=True)

    for result in results:
        model_result_records.append({
            "version": version.value,
            "version_name": f"ResNet-{version.value}",
            "device": device.type,
            "file_path": result.file_path,
            "correctly_identified": result.category == "otter",
            "category": result.category,
            "category_score": result.category_score,
            "otter_score": result.otter_score,
            "otter_species": result.file_path.parent.name,
        })

        # Clean up our memory so the GPU doesn't complain
        scorer = None
        torch.cuda.empty_cache()

# Assemble the results into a DataFrame
model_result_df = pd.DataFrame(model_result_records)
model_result_df.shape

Scoring with 18 on cuda


  return F.conv2d(input, weight, bias, self.stride,


Scoring with 34 on cuda
Scoring with 50 on cuda
Scoring with 101 on cuda
Scoring with 152 on cpu


(1090, 9)

In [8]:
# model_result_df

In [9]:
# Let's look at how many mis-classified example each model has for each species
model_result_df.loc[model_result_df["correctly_identified"] == False] \
    .groupby(["version", "version_name", "otter_species"]) \
    .agg(examples = ("file_path", "count")) \
    .pivot_table(index=["version", "version_name"], columns="otter_species", values="examples", fill_value=0) \
    .sort_values("version")

KeyError: 'version_name'