In [1]:
import torch
from torch.nn import NLLLoss
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.backends.cudnn as cudnn

%matplotlib inline
import pylab as pl
from IPython import display
import time

import sys
sys.path.append('../')
from helpful_files.networks import *
from helpful_files.training import *
from helpful_files.testing import *

In [2]:
# Set Important Values

# General settings
datapath = '/data/dww78/mini_inat_shrunk/'                     # The location of your train, test, repr, and query folders. Make sure it ends in '/'!
model = 'orig-proto-Train-100.pth'  # What model do you wish to evaluate, and where is it saved?
gpu = 2                             # What gpu do you wish to run on?
workers = 1                         # Number of cpu worker processes to use for data loading
verbosity = 10                      # How many categories in between status updates 
ensemble = 4                        # How many models to evaluate in parallel
k = 1                               # Evaluate top-k accuracy. Typically 1 or 5. 
torch.cuda.set_device(gpu) 
cudnn.benchmark = True

# Model characteristics
covariance_pooling = False           # Did your model use covariance pooling?
localizing = False                   # Did your model use localization?
fewshot_local = False                # If you used localization: few-shot, or parametric? Few-shot if True, param if False
network_width = 64                  # Number of channels at every layer of the network
trainshot = 5 
testshot = 15 
# Batch construction
bsize = 64                          # Batch size
boxes_available = 10                # Percentage of images with bounding boxes available (few-shot localization models only)
include_masks = (localizing         # Include or ignore the bounding box annotations?
                 and fewshot_local)
n_trials = (10                      # Number of trials (few-shot localization models only)
            if include_masks else 1)

augmentation_flipping = False
folding = False
# Calculate embedding size based on model setup
d = (network_width if not 
     covariance_pooling else
     network_width**2)
if localizing and not covariance_pooling:
    d = network_width*2
assert n_trials == 1 or include_masks, ("Repeated trials will yield repeated identical results under this configuration."+
                                        "Please set ntrials to 1 or use a few-shot localizer.")

In [8]:
# Load Testing Data

d_boxes = torch.load(datapath + 'box_coords.pth')

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4905, 0.4961, 0.4330],std=[0.1737, 0.1713, 0.1779])
    ])


if folding:
    # Batch folding has no reference/query distinction
    shots = [trainshot+testshot]
else:
    # Standard setup
    shots = [trainshot, testshot]
if localizing and fewshot_local and not folding:
    # Unfolded prototype localizers need another set of reference images to inform foreground/background predictions
    shots = [trainshot, trainshot, testshot-trainshot]
    

test_dataset = datasets.ImageFolder(
    datapath+'test', 
    loader = lambda x: load_transform(x, d_boxes, transform, include_masks))
way = 5
test_loader = torch.utils.data.DataLoader(
    test_dataset, 
    batch_sampler = ProtoSampler(test_dataset, way, shots),
    num_workers = workers,
    pin_memory = True)

# repr_dataset = datasets.ImageFolder(
#     datapath+'repr', 
#     loader = lambda x: load_transform(x, d_boxes, transform, include_masks))
# query_dataset = datasets.ImageFolder(
#     datapath+'query',
#     loader = lambda x: load_transform(x, d_boxes, transform, include_masks))
# repr_loader = torch.utils.data.DataLoader(
#     repr_dataset, 
#     batch_sampler = OrderedSampler(repr_dataset, bsize),
#     num_workers = workers,
#     pin_memory = True)
# query_loader = torch.utils.data.DataLoader(
#     query_dataset,
#     batch_sampler = OrderedSampler(query_dataset, bsize),
#     num_workers = workers,
#     pin_memory = True)


# Determine number of images with bounding boxes per-class
catsizes = torch.LongTensor(np.array([t[1] for t in test_dataset.imgs])).bincount().float()
ngiv = (catsizes*boxes_available//100)
for i in range(ngiv.size(0)):
    if ngiv[i] == 0:
        ngiv[i] = 1
ngiv = ngiv.long().tolist()

print('Data loaded!')

Data loaded!


In [9]:
# Make Models
models = [Network(network_width, folding, covariance_pooling, 
                  localizing, fewshot_local, shots).cuda() 
          for i in range(ensemble)]
# expander = avgpool()
# if localizing:
#     if fewshot_local:
#         expander = fsCL if covariance_pooling else fsL
#     else:
#         expander = pCL() if covariance_pooling else pL()
# elif covariance_pooling:
#     expander = covapool
# expanders = [expander for _ in range(ensemble)]

# Load saved parameters
model_state = torch.load(model)
for i in range(ensemble):
    models[i].encode.load_state_dict(model_state[i])
    models[i].encode.eval()
    # Zero out the bias on the final layer, since it doesn't do anything
    models[i].encode.process[-1].layers[-1].bias.data.zero_()

# Load additional parameters for parametric localizer models
# if localizing and not fewshot_local:
#     fbcentroids = torch.load(model[:model.rfind('.')]+'_localizers'+model[model.rfind('.'):])
#     for i in range(ensemble):
#         expanders[i].centroids.data = fbcentroids[i]
#         expanders[i].cuda()

print("Ready to go!")

Ready to go!


In [10]:
#                                                    EVALUATE

In [15]:
def episodic_eval(test_loader, models, way, shots, verbosity):
    for model in models:
        model.eval()
    nqueries = shots[-1]
    ensemble = len(models)
    targ = torch.LongTensor([i//nqueries for i in range(nqueries*way)]).cuda()
    acctracker = [[] for i in range(ensemble)]
    print("Training images covered this round:")
    for i, ((inp, masks), _) in enumerate(test_loader):
        inp = inp.cuda()
        masks = masks.cuda()
        for j in range(ensemble):
            # Predict, step
            out = models[j](inp, masks)
            print(out.shape)
            _,bins = torch.max(out,1)
            print("BINS BELOW")
            print(bins.shape)
            acc = torch.sum(torch.eq(bins,targ)).item()/nqueries/way
            acctracker[j].append(acc)
        if i%verbosity == 0:
            print('%d of approx. 192270'%(i*way*sum(shots)))
    all_acc = []
    for i in acctracker:
        for j in i:
            all_acc.append(j)
    mean_acc = sum(all_acc)/len(all_acc)
    confs = 1.96*np.sqrt(np.var(all_acc)/len(all_acc))
    return mean_acc, confs
#     return [L/(i+1) for L in acctracker]

In [16]:
mean_acc, confs = episodic_eval(test_loader, models, way, shots, verbosity)
print(mean_acc*100)
print(confs*100)
# accs = sum(acclist)/n_trials/ensemble
# confs = 1.96*np.sqrt(np.var(acclist)/n_trials/ensemble)

Training images covered this round:
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
0 of approx. 192270
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
t

torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Size([75])
torch.Size([75, 5])
BINS BELOW
torch.Siz

KeyboardInterrupt: 

In [None]:
# print(acclist)
# print(accs)
# print(confs)

In [None]:
# acclist = []
# pcacclist = []
# alldispacc = np.zeros(way)
# for r in range(n_trials):
#     # Accumulate foreground/background prototypes, if using
#     fbcentroids = (accumulateFB(models, repr_loader, way, network_width, ngiv, bsize)
#                    if include_masks else 
#                    [None]*ensemble)
#     # Accumulate category prototypes
#     centroids, counts = accumulate(models, repr_loader, expanders, 
#                                    fbcentroids, way, d)
#     # Score the models
#     allacc, dispacc, perclassacc = score(k, centroids, fbcentroids, models, 
#                                          query_loader, expanders, way)
#     # Record statistics
#     acclist = acclist+allacc
#     pcacclist = pcacclist+list(perclassacc)
#     alldispacc += dispacc

# # Aggregate collected statistics
# accs = sum(acclist)/n_trials/ensemble
# pcaccs = sum(pcacclist)/n_trials/ensemble
# alldispacc = alldispacc/n_trials
# confs = 1.96*np.sqrt(np.var(acclist)/n_trials/ensemble)
# pcconfs = 1.96*np.sqrt(np.var(pcacclist)/n_trials/ensemble)

# # Report
# print("Accuracies and 95% confidence intervals")
# print("Mean accuracy: \t\t%.2f \t+/- %.2f" % (accs*100, confs*100))
# print("Per-class accuracy: \t%.f \t+/- %.2f" % (pcaccs*100, pcconfs*100))
# logcounts = [np.log10(c) for c in counts]
# pl.figure()
# pl.axhline(0,color='k')
# pl.scatter(counts, dispacc*100, s=4)
# z = np.polyfit(logcounts, np.array(dispacc)*100, 1)
# p = np.poly1d(z)
# pl.plot([min(counts),max(counts)], [p(min(logcounts)),p(max(logcounts))], "r--")
# pl.ylim([0,100])
# pl.xlabel('# Reference Images')
# pl.ylabel('Percentage Points')
# pl.xscale('log')
# pl.title('Per-Class Top-%d Accuracy' % k)
# pl.show()

In [None]:
# Shut down the notebook

In [None]:
%%javascript
// Jupyter.notebook.session.delete();