In [56]:
import torch
import pickle
import numpy as np
import torchvision.models as models
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from generate_proposals import GenerateProposals
from roi_align import RoIAlignFunction, preprocess_rois
import utils.vis as vis_utils
import utils.result_utils as result_utils
import skimage.io as io
from utils.blob import prep_im_for_blob
import utils.dummy_datasets as dummy_datasets
from PIL import Image

In [57]:
R_101_FPN_PATH = "/home/at3577/driveME/src/fair-maskrcnn/models/35861858-R-101-FPN/model_final.pkl"
X_152_32x8d_FPN_IN5k_PATH = "/home/at3577/driveME/src/fair-maskrcnn/models/37129812-X-152-32x8d-FPN-IN5k/model_final.pkl"
R_101_PATH = "/home/at3577/driveME/src/fair-maskrcnn/models/R-101/R-101.pkl"
X_152_32x8d_IN5k_PATH = "/home/at3577/driveME/src/fair-maskrcnn/models/X-152-32x8d-IN5k/X-152-32x8d-IN5k.pkl"

R_50_C4_PATH = "/home/at3577/driveME/src/fair-maskrcnn/models/tmp/model_final.pkl"
R_50_PATH = "/home/at3577/driveME/src/fair-maskrcnn/models/tmp/R-50.pkl"

In [58]:
class ResnetModel(nn.Module):
    def __init__(self, pretrained_model_file, resnet_feature_extraction_layers=['conv1','bn1','relu','maxpool','layer1','layer2','layer3']):
        super(ResnetModel, self).__init__()
        
        self.resnet_model = eval('models.resnet101()') # construct ResNet model (maybe not very safe :) 

        # swap stride (2,2) and (1,1) in first layers (PyTorch ResNet is slightly different to caffe2 ResNet)
        # this is required for compatibility with caffe2 models
        self.resnet_model.layer2[0].conv1.stride=(2,2)
        self.resnet_model.layer2[0].conv2.stride=(1,1)
        self.resnet_model.layer3[0].conv1.stride=(2,2)
        self.resnet_model.layer3[0].conv2.stride=(1,1)
        self.resnet_model.layer4[0].conv1.stride=(2,2)
        self.resnet_model.layer4[0].conv2.stride=(1,1)
        
        self.init_weights()
        
        # All except the last layer are used as feature extractor... Last layer is for ROI pooling
        self.model = torch.nn.Sequential(*[getattr(self.resnet_model, layer) for layer in resnet_feature_extraction_layers])
        self.model.eval()
        
    def forward(self, image):
        return self.model(image)
    
    def init_weights(self):
        with open(pretrained_model_file, 'rb') as model_pickle_file:
            fb_model = pickle.load(model_pickle_file)
            # Model has two keys- config and blobs
            fb_model = fb_model['blobs']
        
        model_dict = self.resnet_model.state_dict()
        
        for key in model_dict.keys():
            # skip running mean/std and fc weights
            # I am not sure what running is but fc is the last fuly connected layer of resnet.. so fb model doesnt have it
            if 'running' in key or 'fc' in key:
                continue
            
            fb_key = self.convert_key_to_fb_format(key.split('.'))
           
            assert model_dict[key].size()==torch.FloatTensor(fb_model[fb_key]).size()
            
            if key=='conv1.weight': # convert from BGR to RGB                
                model_dict[key]=torch.FloatTensor(fb_model[fb_key][:,(2, 1, 0),:,:])
            else:
                model_dict[key]=torch.FloatTensor(fb_model[fb_key])
        
        # update model
        self.resnet_model.load_state_dict(model_dict)

    def convert_key_to_fb_format(self, terms, i=0, parsed=''):
        # Convert PyTorch ResNet weight names to caffe2 weight names
        if i==0:
            if terms[i]=='conv1':
                parsed='conv1'
            elif terms[i]=='bn1':
                parsed='res_conv1'
            elif terms[i].startswith('layer'):
                parsed='res'+str(int(terms[i][-1])+1)
        else:
            if terms[i]=='weight' and (terms[i-1].startswith('conv') or terms[i-1]=='0'):
                parsed+='_w'
            elif terms[i]=='weight' and (terms[i-1].startswith('bn') or terms[i-1]=='1'):
                parsed+='_bn_s'
            elif terms[i]=='bias' and (terms[i-1].startswith('bn') or terms[i-1]=='1'):
                parsed+='_bn_b'
            elif terms[i-1].startswith('layer'):
                parsed+='_'+terms[i]
            elif terms[i].startswith('conv') or terms[i].startswith('bn'):
                parsed+='_branch2'+chr(96+int(terms[i][-1]))
            elif terms[i]=='downsample':
                parsed+='_branch1'
        # increase counter
        i+=1
        # do recursion
        if i==len(terms):
            return parsed
        return self.convert_key_to_fb_format(terms,i,parsed)

In [59]:
class RegionProposalNetwork(nn.Module):
    def __init__(self, pretrained_model_file, feature_extractor_output_channels, rpn_conv_output_channels, number_of_anchors):
        super(RegionProposalNetwork, self).__init__()
        
        #RPN is used propose regions with probability of foreground/background.. i.e just tell if object is present
        # It has 3 parts:
        # 1) 3x3 conv with 512/1024 channels
        # 2) 1x1 conv with 2k channels (for each anchor box we predict foreground/background)
        # 3) 1x1 conv with 4k channels (for each anchor box we predict delta of boxes)
        
        self.conv_rpn = torch.nn.Conv2d(in_channels=feature_extractor_output_channels,
                                        out_channels=rpn_conv_output_channels,
                                        kernel_size=3,
                                        stride=1,
                                        padding=1)
        self.rpn_cls_prob = torch.nn.Conv2d(in_channels=rpn_conv_output_channels,
                                            out_channels=number_of_anchors,
                                            kernel_size=1,
                                            stride=1,
                                            padding=0)
        self.rpn_bbox_pred = torch.nn.Conv2d(in_channels=rpn_conv_output_channels,
                                             out_channels=4*number_of_anchors,
                                             kernel_size=1,
                                             stride=1,
                                             padding=0)
        
    def forward(self, anchor_features):
        # image features shape should be (N,Cin,H,W)  
        conv_anchor_features = F.relu(self.conv_rpn(anchor_features))
        anchor_cls_prob =  F.softmax(self.rpn_cls_prob(conv_anchor_features))
        anchor_box_pred =  self.rpn_bbox_pred(conv_anchor_features)
        return anchor_cls_prob, anchor_box_pred

    def init_weights(self):
        with open(pretrained_model_file, 'rb') as model_pickle_file:
            fb_model = pickle.load(model_pickle_file)
            # Model has two keys- config and blobs
            fb_model = fb_model['blobs']
        
            self.rpn.conv_rpn.weight.data = torch.FloatTensor(fb_model['conv_rpn_w'])
            self.rpn.conv_rpn.bias.data = torch.FloatTensor(fb_model['conv_rpn_b'])
            self.rpn.rpn_cls_prob.weight.data = torch.FloatTensor(fb_model['rpn_cls_logits_w'])
            self.rpn.rpn_cls_prob.bias.data = torch.FloatTensor(fb_model['rpn_cls_logits_b'])
            self.rpn.rpn_bbox_pred.weight.data = torch.FloatTensor(fb_model['rpn_bbox_pred_w'])
            self.rpn.rpn_bbox_pred.bias.data = torch.FloatTensor(fb_model['rpn_bbox_pred_b'])
        

In [None]:
#ROI_POOLING
#TODO

In [64]:
class RCNN(nn.Module):
    def __init__(self, roi_feature_channels, N_classes):
        super(RCNN, self).__init__()
        # What will be the size of roi_feature_channels??
        self.bbox_head=torch.nn.Linear(roi_feature_channels, 4*N_classes)
        self.class_prob_head=torch.nn.Linear(roi_feature_channels, N_classes)
    
    def forward(self, roi_features):
        # compute classification probabilities
        cls_score =  F.softmax(self.class_prob_head(roi_features))

        # compute bounding box parameters 
        bbox_pred = self.bbox_head(roi_features)
        
        return (cls_score,bbox_pred)
    
    def init_weights(self):
        #TODO

In [63]:
class Detector(nn.Module):
    def __init__(self, 
                 pretrained_model_file, 
                 resnet_feature_extraction_layers, 
                 feature_extractor_output_channels,
                 rpn_conv_output_channels,
                 number_of_anchors):
        super(Detector, self).__init__() 
        
        self.resnet_model = ResnetModel(pretrained_model_file= pretrained_model_file,
                                        resnet_feature_extraction_layers= resnet_feature_extraction_layers)
        self.rpn = RegionProposalNetwork(pretrained_model_file= pretrained_model_file,
                                         feature_extractor_output_channels= feature_extractor_output_channels,
                                         rpn_conv_output_channels= rpn_conv_output_channels,
                                         number_of_anchors= number_of_anchors)
        self.proposal_generator = GenerateProposals(train=False)
    
    def forward(self, image, scaling_factor=None):
        h,w = image.size(2), image.size(3)

        img_features = self.resnet_model(image)
        
        rpn_cls_prob, rpn_bbox_pred = self.rpn(img_features)
        rois, rpn_roi_probs = self.proposal_generator(rpn_cls_prob, rpn_bbox_pred, h, w, scaling_factor)
        
        return (rpn_cls_prob,rpn_bbox_pred,rois,img_features)

In [41]:
model=  detector(arch=arch,
                 detector_pkl_file=pretrained_model_file,
                 use_rpn_head = use_rpn_head,
                 use_mask_head = use_mask_head)

def eval_model(sample):
    class_scores,bbox_deltas,rois,img_features=model(sample['image'],
                                                     sample['proposal_coords'],
                                                     scaling_factor=sample['scaling_factors'].cpu().data.numpy().item())   
    return class_scores,bbox_deltas,rois,img_features

In [66]:
image_filename = 'demo/33823288584_1d21cf0a26_k.jpg'

# Load image
image = io.imread(image_filename)
orig_im_size = image.shape

# Preprocess image
im_list, im_scales = prep_im_for_blob(image)

# Build sample
sample = {}
sample['image'] = torch.FloatTensor(im_list[0]).permute(2,0,1).unsqueeze(0)
sample['scaling_factors'] = torch.FloatTensor([im_scales[0]])
sample['original_im_size'] = torch.FloatTensor(orig_im_size)
sample['proposal_coords']=torch.FloatTensor([-1]) # dummy value


