In [1]:
import os
import sys
sys.path.append('../../../')
from fastai.conv_learner import *
from fastai.dataset import *
from pathlib import Path
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects
from pathlib import Path

In [2]:
f_model=resnet50
size=224
export = "../models/modanet-ref-resnet50.pkl"

aug_tfms = [RandomRotate(10, p=0.5, tfm_y=TfmType.COORD),
            RandomLighting(0.05, 0.05, tfm_y=TfmType.COORD),
            RandomFlip(tfm_y=TfmType.COORD)]

tfms = tfms_from_model(f_model,
                       size,
                       crop_type=CropType.NO,
                       tfm_y=TfmType.COORD,
                       aug_tfms=aug_tfms)

anchor_grid_sizes = [28,14,7,4,2,1]
anchor_zooms =  [.7, 2**0, 2**(1/3), 2**(2/3)]
anchor_ratios = [(1.,1.), (.5,1.), (1.,.5), (3.,1.), (1.,3.)]

anchor_scales = [(zoom*n, zoom*m) for zoom in anchor_zooms for (n, m) in anchor_ratios]
k = len(anchor_scales)

# the center offsets
anchor_offsets = [1/(size*2) for size in anchor_grid_sizes]

# create the x coordinates for the anchors
anchor_xs = np.concatenate([np.repeat(np.linspace(offset, 1-offset, gsize), gsize)
                            for offset, gsize in zip(anchor_offsets, anchor_grid_sizes)])

# create the y coordinates for the anchors
anchor_ys = np.concatenate([np.tile(np.linspace(offset, 1-offset, gsize), gsize)
                            for offset, gsize in zip(anchor_offsets, anchor_grid_sizes)])

# create k anchor boxes per grid cell, all with the  same center coordinates
anchor_centres = np.repeat(np.stack([anchor_xs, anchor_ys], axis=1), k, axis=0)

# create the height and width of the anchors
anchor_sizes = np.concatenate([np.array([[h/gsize, w/gsize] for i in range(gsize*gsize) for h,w in anchor_scales])
                            for gsize in anchor_grid_sizes])

# create a pytorch variable with the size of the grid cell for each default anchor box
grid_sizes = V(np.concatenate([np.array([1/gsize for i in range(gsize*gsize) for h,w in anchor_scales])
               for gsize in anchor_grid_sizes]), requires_grad=False).unsqueeze(1)

# create the anchor coordinates (x_center, y_center, height, width)
anchor_hws = V(np.concatenate([anchor_centres, anchor_sizes], axis=1), requires_grad=False).float()


def conv_layer(num_in, num_out, stride=2, dp= 0.1):
    return nn.Sequential(
        nn.Conv2d(num_in, num_out, 3, bias=False, stride=stride, padding=1),
        nn.BatchNorm2d(num_out, momentum=0.01),
        nn.LeakyReLU(negative_slope=0.1, inplace=True),
        nn.Dropout(dp))
    
class OutputConvolution(nn.Module):
    def __init__(self, k, num_in, bias):
        super().__init__()
        
        self.cat_output = nn.Conv2d(num_in, (len(id2cat)+1) * k, 3, padding=1)
        self.cat_output.bias.data.zero_().add_(bias)

        self.bbox_output = nn.Conv2d(num_in, 4 * k, 3, padding = 1)
        self.k = k
        
    def forward(self, x):
        return [flatten(self.cat_output(x), self.k),
                flatten(self.bbox_output(x), self.k)]
    
def flatten(x, k):
    batch_size, num_filters, _, _ = x.size()
    x = x.permute(0,2,3,1).contiguous()
    return x.view(batch_size, -1, num_filters//k)

class SaveFeatures():
    features=None
    def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn)
    def hook_fn(self, module, input, output): 
        self.features = output
        self.features_in = input
    def remove(self): self.hook.remove()
        
drop = 0.4

class FPN_SSD_Model(nn.Module):
    def __init__(self, model_base, k, bias):
        super().__init__()

        self.model_base = model_base
        self.drop = nn.Dropout(drop)

        # The convolutional layers that reduce the grid size
        # that form the top-down pathway (28x28, 14x14, 7x7, 4x4, 2x2, 1x1)
        self.saved_features = [SaveFeatures(model_base[i]) for i in [5,6]]
        self.conv7 = conv_layer(2048,1024, dp=drop, stride=1) 
        self.conv4 = conv_layer(1024,512, dp=drop) 
        self.conv2 = conv_layer(512,512, dp=drop) 
        self.conv1 = conv_layer(512,512, dp=drop) 
        
        # Layers for forming the lateral connections (28x28, 14x14, 7x7, 4x4, 2x2, 1x1)
        self.lat28 = nn.Conv2d(512,512,kernel_size=1, stride=1, padding=0)
        self.lat14 = nn.Conv2d(1024,512,kernel_size=1, stride=1, padding=0)
        self.lat7 = nn.Conv2d(1024,512,kernel_size=1, stride=1, padding=0)
        self.lat4 = nn.Conv2d(512,512,kernel_size=1, stride=1, padding=0)
        self.lat2 = nn.Conv2d(512,512,kernel_size=1, stride=1, padding=0)
        self.lat1 = nn.Conv2d(512,512,kernel_size=1, stride=1, padding=0)
        
        # The upsampling layers that increase the grid size
        # that from the bottom-up pathway (2x2, 4x4, 7x7, 14x14, 28x28)
        self.upsamp2 = nn.Upsample(size=(2,2), mode='bilinear')
        self.upsamp4 = nn.Upsample(size=(4,4), mode='bilinear')
        self.upsamp7 = nn.Upsample(size=(7,7), mode='bilinear')
        self.upsamp14 = nn.Upsample(size=(14,14), mode='bilinear')
        self.upsamp28 = nn.Upsample(size=(28,28), mode='bilinear')
        
        # The output convolutional layer to split the network
        # for categories and bounding boxes (28x28, 14x14, 7x7, 4x4, 2x2, 1x1)
        self.out28 = OutputConvolution(k, 512, bias)
        self.out14 = OutputConvolution(k, 512, bias)
        self.out7 = OutputConvolution(k, 512, bias)
        self.out4 = OutputConvolution(k, 512, bias)
        self.out2 = OutputConvolution(k, 512, bias)
        self.out1 = OutputConvolution(k, 512, bias)

    def forward(self,x):
        # get the activations from the pre-trained model
        x = self.drop(F.relu(self.model_base(x)))
        
        # get the activations from reducing the grid size
        c28 = F.relu(self.saved_features[0].features)
        c14 = F.relu(self.saved_features[1].features)
        c7 = self.conv7(x)
        c4 = self.conv4(c7)
        c2 = self.conv2(c4)
        c1 = self.conv1(c2)
       
        # Upsampling and joining the lateral connections
        p1 = self.lat1(c1)
        p2 = self.upsamp2(p1) + self.lat2(c2)    
        p4 = self.upsamp4(p2) + self.lat4(c4)
        p7 = self.upsamp7(p4) + self.lat7(c7)
        p14 = self.upsamp14(p7) + self.lat14(c14)
        p28 = self.upsamp28(p14) + self.lat28(c28)
        
        # making the final predictions
        out28cat,out28bbox = self.out28(p28)
        out14cat,out14bbox = self.out14(p14)
        out7cat,out7bbox = self.out7(p7)
        out4cat,out4bbox = self.out4(p4)
        out2cat,out2bbox = self.out2(p2)
        out1cat,out1bbox = self.out1(p1)
        
        # concatenate all the predictions together
        return [torch.cat([out28cat, out14cat, out7cat, out4cat, out2cat, out1cat], dim=1),
                torch.cat([out28bbox, out14bbox, out7bbox, out4bbox, out2bbox, out1bbox], dim=1)]
    
    
cut,lr_cut = model_meta[f_model]

class MakeModel():
    def __init__(self,model,name='makemodel'):
        self.model,self.name = model,name

    def get_layer_groups(self, precompute):
        lgs = list(split_by_idxs(children(self.model.model_base), [lr_cut]))
        return lgs + [children(self.model)[1:]]

def get_base(f_model):
    cut, _ = model_meta[f_model]
    layers = cut_model(f_model(True), cut)
    return nn.Sequential(*layers)

class Fake():
    def __init__(self,p):
        self.path = p

def hw2corners(ctr, hw): return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)

def preds_to_bbs(actn, anchor_hws):
    # run the activations through a non-linear layer
    actn_bbs = torch.tanh(actn)
    # convert the first two activations to offsets in the centers of the default anchor boxes
    actn_ctrs = torch.clamp(((actn_bbs[:,:2] * grid_sizes) + anchor_hws[:,:2]), 0, size)
    # convert the the second two activations to scaled height and width of default anchor boxes
    actn_hw = torch.clamp(((1 + actn_bbs[:,2:]) * anchor_hws[:,2:]), 0, size)
    # convert the bounding boxes from (center_x, center_y, height, widht) -> (x1,x2,y1,y2)
    return hw2corners(actn_ctrs, actn_hw)

def nms(boxes, scores, overlap=0.5, top_k=100):
    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0: return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        keep[count] = i
        count += 1
        if idx.size(0) == 1: break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]
    return keep, count

def load_learner():
    state = torch.load(export)
    id2cat = state.pop("classes")
    cat2id = {c:i for i,c in enumerate(id2cat)}
    p = state.pop("path")
    mm = state.pop("model")
    c = state.pop("class")
    weights = state.pop("weights")
    learn = c(Fake(p), mm)
    learn.load(weights)
    return learn, id2cat, cat2id

In [3]:
def predict_fn(learner, fn):
    # open the image and run it through the transforms to pass to the model
    image = open_image(fn)
    image = tfms[1](image, np.zeros(4))[0][None]

    # pass the image to the model
    learner.model.cuda()
    learner.model.eval()
    pred_cat, pred_bb = learner.model(V(image))
    
    # get rid of the batch dimension
    pred_cat.squeeze_()
    pred_cat.sigmoid_()
    pred_bb.squeeze_()
    
    # convert the activations to bounding boxes
    pred_bb = preds_to_bbs(pred_bb, anchor_hws)
    
    # get the probability for each anchor boxes for each category
    conf_scores = pred_cat.t().data
    
    final_scores, final_bbs, final_cats = [],[],[]
    for cat in range(0, len(conf_scores)-1):
        
        # get only the anchor boxes with more than 25% probability
        c_mask = conf_scores[cat] > 0.25
        if c_mask.sum() == 0: continue
            
        # filter the confidence scores
        scores = conf_scores[cat][c_mask]
        
        # filter the bounding boxes
        l_mask = c_mask.unsqueeze(1).expand_as(pred_bb)
        boxes = pred_bb[l_mask].view(-1, 4)
        
        # only keep one bounding box per item
        ids, count = nms(boxes.data, scores, 0.4, 50)
        ids = ids[:count]
        
        # append to the final results
        final_scores.append(scores[ids])
        final_bbs.append(boxes.data[ids])
        final_cats.append([cat]*count)

    if not final_cats:
        return [], [], []
    
    final_cats = T(np.concatenate(final_cats))
    final_scores = torch.cat(final_scores)
    final_bbs = torch.cat(final_bbs)
    
    return final_bbs, final_cats, final_scores

In [4]:
# learner, _, _ = load_learner()
# predict_fn(learner, "../static/image_data/1571522_250538.jpg")

(
  0.8872  0.4312  0.9816  0.5564
  0.3452  0.3479  0.8688  0.6422
  0.1819  0.3156  0.3391  0.6514
 [torch.cuda.FloatTensor of size 3x4 (GPU 0)], 
  3
  7
  8
 [torch.cuda.LongTensor of size 3 (GPU 0)], 
  0.2954
  0.9845
  0.7131
 [torch.cuda.FloatTensor of size 3 (GPU 0)])

In [5]:
# f_model=resnet34
# size=224

# aug_tfms = [RandomRotate(10, tfm_y=TfmType.COORD),
#             RandomLighting(0.05, 0.05, tfm_y=TfmType.COORD),
#             RandomFlip(tfm_y=TfmType.COORD)]
# tfms = tfms_from_model(f_model,
#                        size,
#                        crop_type=CropType.NO,
#                        tfm_y=TfmType.COORD,
#                        aug_tfms=aug_tfms)

# def hw2corners(ctr, hw): return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)

# anc_grids = [28,14,7,4,2,1]
# anc_zooms =  [.7, 2**0, 2**(1/3), 2**(2/3)]
# anc_ratios = [(1.,1.), (.5,1.), (1.,.5), (3.,1.), (1.,3.)]

# anchor_scales = [(anz*i,anz*j) for anz in anc_zooms for (i,j) in anc_ratios]
# k = len(anchor_scales)
# anc_offsets = [1/(o*2) for o in anc_grids]
# anc_x = np.concatenate([np.repeat(np.linspace(ao, 1-ao, ag), ag)
#                         for ao,ag in zip(anc_offsets,anc_grids)])
# anc_y = np.concatenate([np.tile(np.linspace(ao, 1-ao, ag), ag)
#                         for ao,ag in zip(anc_offsets,anc_grids)])
# anc_ctrs = np.repeat(np.stack([anc_x,anc_y], axis=1), k, axis=0)
# anc_sizes  =   np.concatenate([np.array([[o/ag,p/ag] for i in range(ag*ag) for o,p in anchor_scales])
#                for ag in anc_grids])
# grid_sizes = V(np.concatenate([np.array([ 1/ag       for i in range(ag*ag) for o,p in anchor_scales])
#                for ag in anc_grids]), requires_grad=False).unsqueeze(1)
# anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()
# anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])

# class StdConv(nn.Module):
#     def __init__(self, n_in,n_out,stride=2,dp = 0.1):
#         super().__init__()
#         self.conv = nn.Conv2d(n_in,n_out,3,stride=stride,padding=1)
#         self.bn = nn.BatchNorm2d(n_out)
#         self.dropout = nn.Dropout(dp)
        
#     def forward(self,x):
#         return self.dropout(self.bn(F.relu(self.conv(x))))
    
# class OutConv(nn.Module):
#     def __init__(self, k, n_in, bias):
#         super().__init__()
#         self.k = k
#         self.oconv1 = nn.Conv2d(n_in, (len(id2cat)+1) * k, 3, padding=1)
#         self.oconv2 = nn.Conv2d(n_in, 4 * k, 3, padding = 1)
#         self.oconv1.bias.data.zero_().add_(bias)
        
#     def forward(self,x):
#         return [flatten_conv(self.oconv1(x), self.k),
#                 flatten_conv(self.oconv2(x), self.k)]

# def flatten_conv(x,k):
#     bs,nf,gx,gy = x.size()
#     x = x.permute(0,2,3,1).contiguous()
#     return x.view(bs,-1,nf//k)

# class SaveFeatures():
#     features=None
#     def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn)
#     def hook_fn(self, module, input, output): 
#         self.features = output
#         self.features_in = input
#     def remove(self): self.hook.remove()
        
# cut,lr_cut = model_meta[f_model]
# def get_base():
#     layers = cut_model(f_model(True), cut)
#     return nn.Sequential(*layers)

# drop = 0.4

# class SSD_Custom4(nn.Module):
#     def __init__(self, m_base, k, bias):
#         super().__init__()

#         self.m_base = m_base
#         self.sfs = [SaveFeatures(m_base[i]) for i in [5,6]] # 28, 14
        
#         self.drop = nn.Dropout(drop)
#         self.layer2 = StdConv(512,256, dp=drop, stride=1) # 7
#         self.layer3 = StdConv(256,256, dp=drop) # 4
#         self.layer4 = StdConv(256,256, dp=drop) # 2
#         self.layer5 = StdConv(256,256, dp=drop) # 1
        
#         self.lat6 = nn.Conv2d(256,256,kernel_size=1, stride=1, padding=0)
#         self.lat5 = nn.Conv2d(256,256,kernel_size=1, stride=1, padding=0)
#         self.lat4 = nn.Conv2d(256,256,kernel_size=1, stride=1, padding=0)
#         self.lat3 = nn.Conv2d(256,256,kernel_size=1, stride=1, padding=0)
#         self.lat2 = nn.Conv2d(256,256,kernel_size=1, stride=1, padding=0)
#         self.lat1 = nn.Conv2d(128,256,kernel_size=1, stride=1, padding=0)
        
#         self.upsamp2 = nn.Upsample(size=(2,2), mode='bilinear')
#         self.upsamp4 = nn.Upsample(size=(4,4), mode='bilinear')
#         self.upsamp7 = nn.Upsample(size=(7,7), mode='bilinear') # can't use nearest interpol for 4x4 -> 7x7
#         self.upsamp14 = nn.Upsample(size=(14,14), mode='bilinear')
#         self.upsamp28 = nn.Upsample(size=(28,28), mode='bilinear')
        
#         self.out1 = OutConv(k, 256, bias)
#         self.out2 = OutConv(k, 256, bias)
#         self.out3 = OutConv(k, 256, bias)
#         self.out4 = OutConv(k, 256, bias)
#         self.out5 = OutConv(k, 256, bias)
#         self.out6 = OutConv(k, 256, bias)

#     def forward(self,x):
#         x = self.drop(F.relu(self.m_base(x)))
        
#         c1 = F.relu(self.sfs[0].features) # 28
#         c2 = F.relu(self.sfs[1].features) # 14
#         c3 = self.layer2(x) # 7
#         c4 = self.layer3(c3) # 4
#         c5 = self.layer4(c4) # 2
#         c6 = self.layer5(c5) # 1
       
#         p6 = self.lat6(c6)
#         p5 = self.upsamp2(p6) + self.lat5(c5)    
#         p4 = self.upsamp4(p5) + self.lat4(c4)
#         p3 = self.upsamp7(p4) + self.lat3(c3)
#         p2 = self.upsamp14(p3) + self.lat2(c2)
#         p1 = self.upsamp28(p2) + self.lat1(c1)
        
#         o1c,o1l = self.out1(p1)
#         o2c,o2l = self.out2(p2)
#         o3c,o3l = self.out3(p3)
#         o4c,o4l = self.out4(p4)
#         o5c,o5l = self.out5(p5)
#         o6c,o6l = self.out6(p6)
        
#         return [torch.cat([o1c,o2c,o3c,o4c,o5c,o6c], dim=1),
#                 torch.cat([o1l,o2l,o3l,o4l,o5l,o6l], dim=1)]

# class MakeModel():
#     def __init__(self,model,name='makemodel'):
#         self.model,self.name = model,name

#     def get_layer_groups(self, precompute):
#         lgs = list(split_by_idxs(children(self.model.m_base), [lr_cut]))
#         return lgs + [children(self.model)[1:]]
    

# class Fake():
#     def __init__(self,p):
#         self.path = p

# def intersection(box_a,box_b):
#     min_xy = torch.max(box_a[:,None,:2],box_b[None,:,:2])
#     max_xy = torch.min(box_a[:,None,2:],box_b[None,:,2:])
#     inter = torch.clamp(max_xy-min_xy,min=0)
#     return inter[:,:,0] * inter[:,:,1]

# def get_size(box):
#     return (box[:,2]-box[:,0]) * (box[:,3] - box[:,1])

# def jaccard(box_a,box_b):
#     inter = intersection(box_a,box_b)
#     union = get_size(box_a).unsqueeze(1) + get_size(box_b).unsqueeze(0) - inter
#     return inter/union

# #Removes the zero padding in the target bbox/class
# def get_y(bbox,clas):
#     bbox = bbox.view(-1,4)/size
#     bb_keep = ((bbox[:,2] - bbox[:,0])>0.).nonzero()[:,0]
#     return bbox[bb_keep], clas[bb_keep]
    
# def actn_to_bb(actn, anchors):
#     actn_bbs = actn
#     actn_ctrs = torch.clamp(((actn_bbs[:,:2] * grid_sizes) + anchors[:,:2]),0,size)
#     actn_hw = torch.clamp(((1 + actn_bbs[:,2:]) * anchors[:,2:]),0,size)
#     return hw2corners(actn_ctrs,actn_hw)

# def map_to_ground_truth(overlaps, print_it=False):
#     prior_overlap, prior_idx = overlaps.max(1)
#     #if print_it: print(prior_overlap)
# #     pdb.set_trace()
#     gt_overlap, gt_idx = overlaps.max(0)
#     gt_overlap[prior_idx] = 1.99
#     for i,o in enumerate(prior_idx): gt_idx[o] = i
#     return gt_overlap,gt_idx

# def nms(boxes, scores, overlap=0.5, top_k=100):
#     keep = scores.new(scores.size(0)).zero_().long()
#     if boxes.numel() == 0: return keep
#     x1 = boxes[:, 0]
#     y1 = boxes[:, 1]
#     x2 = boxes[:, 2]
#     y2 = boxes[:, 3]
#     area = torch.mul(x2 - x1, y2 - y1)
#     v, idx = scores.sort(0)  # sort in ascending order
#     idx = idx[-top_k:]  # indices of the top-k largest vals
#     xx1 = boxes.new()
#     yy1 = boxes.new()
#     xx2 = boxes.new()
#     yy2 = boxes.new()
#     w = boxes.new()
#     h = boxes.new()

#     count = 0
#     while idx.numel() > 0:
#         i = idx[-1]  # index of current largest val
#         keep[count] = i
#         count += 1
#         if idx.size(0) == 1: break
#         idx = idx[:-1]  # remove kept element from view
#         # load bboxes of next highest vals
#         torch.index_select(x1, 0, idx, out=xx1)
#         torch.index_select(y1, 0, idx, out=yy1)
#         torch.index_select(x2, 0, idx, out=xx2)
#         torch.index_select(y2, 0, idx, out=yy2)
#         # store element-wise max with next highest score
#         xx1 = torch.clamp(xx1, min=x1[i])
#         yy1 = torch.clamp(yy1, min=y1[i])
#         xx2 = torch.clamp(xx2, max=x2[i])
#         yy2 = torch.clamp(yy2, max=y2[i])
#         w.resize_as_(xx2)
#         h.resize_as_(yy2)
#         w = xx2 - xx1
#         h = yy2 - yy1
#         # check sizes of xx1 and xx2.. after each iteration
#         w = torch.clamp(w, min=0.0)
#         h = torch.clamp(h, min=0.0)
#         inter = w*h
#         # IoU = i / (area(a) + area(b) - i)
#         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
#         union = (rem_areas - inter) + area[i]
#         IoU = inter/union  # store result in iou
#         # keep only elements with an IoU <= overlap
#         idx = idx[IoU.le(overlap)]
#     return keep, count

# def load_learner():
#     export = "../models/export.pkl"
#     state = torch.load(export)
#     id2cat = state.pop("classes")
#     cat2id = {c:i for i,c in enumerate(id2cat)}
#     p = state.pop("path")
#     mm = state.pop("model")
#     c = state.pop("class")
#     learn1 = c(Fake(Path("../../")/p), mm)
#     learn1.load("fpn-modanet5")
#     return learn1, id2cat, cat2id

# def predict(learner, fn):
#     image = open_image(fn)
#     image = tfms[1](image, np.zeros(4))[0][None]
#     learner.model.cuda()
#     learner.model.eval()
#     pred_class,pred_bb = learner.model(V(image))
    
#     a_ic = actn_to_bb(pred_bb[0], anchors)
#     clas_pr, clas_ids = pred_class[0].max(1)
#     clas_pr = clas_pr.sigmoid()
    
#     conf_scores = pred_class[0].sigmoid().t().data
    
#     out1,out2,cc = [],[],[]
#     for cl in range(0, len(conf_scores)-1):
#         c_mask = conf_scores[cl] > 0.25
#         if c_mask.sum() == 0: continue
#         scores = conf_scores[cl][c_mask]
#         l_mask = c_mask.unsqueeze(1).expand_as(a_ic)
#         boxes = a_ic[l_mask].view(-1, 4)
#         ids, count = nms(boxes.data, scores, 0.4, 50)
#         ids = ids[:count]
#         out1.append(scores[ids])
#         out2.append(boxes.data[ids])
#         cc.append([cl]*count)
    
#     if not cc:
#         print(f"{i}: empty array")
#         return
    
#     cc = T(np.concatenate(cc))
#     out1 = torch.cat(out1)
#     out2 = torch.cat(out2)
    
#     return out2, cc, out1