In [None]:
%env CUDA_VISIBLE_DEVICES=0

In [None]:
import os, torch, torchvision
from cmc.models.resnet import InsResNet50
from torchvision import transforms
from netdissect import tally, runningstats, renormalize, parallelfolder, pbar, show, imgviz

selected_classes = 500

expdir = 'results/decoupled-%d-imagenet-resnet' % selected_classes
def ef(s):
    return os.path.join(expdir, s)

dataset = "imagenet"
val_path = f"datasets/{dataset}/val"
train_path = f"datasets/{dataset}/train"
img_trans = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    renormalize.NORMALIZER['imagenet']
])
dsv = parallelfolder.ParallelImageFolders([val_path],
                        transform=img_trans, classification=True, shuffle=True)
dst = parallelfolder.ParallelImageFolders([train_path],
                        transform=img_trans, classification=True, shuffle=True)
try:
    labelfile = f"datasets/{dataset}/labels.txt"
    import csv
    with open(labelfile) as f:
        labels = [r[1] for r in csv.reader(f)]
except:
    label = csv.classes
model = torchvision.models.resnet50(num_classes=selected_classes)
model.load_state_dict(torch.load(ef('best_weights.pth'))['state_dict'])
model.cuda()


In [None]:
iv = imgviz.ImageVisualizer(100, source=dsv)
show([[iv.image(dsv[i][0]), labels[dsv[i][1]].replace("Norwegian", "")] for i in range(100) if dsv[i][1] >= 10])

In [None]:
model.cuda()

# How much we think
def novelty_score(imgdat):
    logits = model(imgdat.cuda())
    return -logits.max(1)[0]

def batch_score_inliers(imgbatch, c):
    selected = imgbatch[c < selected_classes]
    if not len(selected):
        return None
    return novelty_score(selected)[:,None]

def batch_score_outliers(imgbatch, c):
    selected = imgbatch[c >= selected_classes]
    if not len(selected):
        return None
    return novelty_score(selected)[:,None]

rq_inlier = tally.tally_quantile(batch_score_inliers, dsv, num_workers=100, batch_size=512, pin_memory=True,
                   cachefile=ef(f'{dataset}-resnet50-sel{selected_classes}-inlier_rq.npz'))
rq_outlier = tally.tally_quantile(batch_score_outliers, dsv, num_workers=100, batch_size=512, pin_memory=True,
                   cachefile=ef(f'{dataset}-resnet50-sel{selected_classes}-outlier_rq.npz'))


In [None]:
from matplotlib import pyplot as plt
plt.title('Validation set scores, %d seen classes'%  selected_classes)
xrange = torch.linspace(0,1,100)
plt.plot(rq_inlier.quantiles(xrange)[0].numpy(), xrange.numpy(), label="inliers")
plt.plot(rq_outlier.quantiles(xrange)[0].numpy(), xrange.numpy(), label="novel")
plt.ylabel('percentile')
plt.xlabel('score')
plt.legend()

In [None]:
def avg_prec(precision, recall):
    precision2 = torch.tensor([precision[i:].max() for i in range(len(precision))])[1:]
    recall2 = recall[1:] - recall[:-1]
    return (recall2 * precision2).sum().item()

lowscore = min([rq.quantiles(0.0).item() for rq in [rq_inlier, rq_outlier]])
highscore = max([rq.quantiles(1.0).item() for rq in [rq_inlier, rq_outlier]])
srange = torch.linspace(lowscore, highscore, 10000)
true_pos = (1.0 - rq_outlier.normalize(srange[None])[0]).flip(0)
false_pos = (1.0 - rq_inlier.normalize(srange[None])[0]).flip(0)
precision = (true_pos + 1e-20) / (true_pos + false_pos + 1e-20)
recall = true_pos
accuracy = (true_pos + (1 - false_pos)) / 2
plt.title("decoupled classifier novelty detection w places MoCo\nFirst %s imagenet classes vs others" % selected_classes)
# plt.plot(recall, true_pos, label="True positives")
plt.plot(recall, false_pos, label="False positives")
plt.plot(recall, precision, label="Precision")
ap = avg_prec(precision, recall)
plt.axhline(y=ap, color='orange', linestyle='--', label="AP=%.3g" % ap)
plt.plot(recall, accuracy, label="Max acc=%.3g" % accuracy.max().item())
plt.ylabel('precision')
plt.xlabel('recall')
plt.legend()

In [None]:
lowscore