In [37]:
import model_lib
import numpy as np
import warnings
warnings.filterwarnings('ignore', '.*output shape of zoom.*')
import pickle
import importlib
importlib.reload(model_lib)
import os
import time
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "3"


In [38]:
# config to train
# TODO: check Config is correct
class ProposalConfig():
    NAME = "InSegm"
    GPU_COUNT = 1
    # online training
    IMAGES_PER_GPU = 1
    STEPS_PER_EPOCH = 100
    # not going to use these
    N_DISTORTIONS = 0
    MAX_DISTORTION = 0.3
    MIN_DISTORTION = -0.1
    NUM_WORKERS = 16
    PIN_MEMORY = True
    VALIDATION_STEPS = 20
    # including gt
    NUM_CLASSES = 81
    # only flips
    IMAGE_AUGMENT = True
    DATA_ORDER = "ins"
    MEAN_PIXEL = np.array([0.485, 0.456, 0.406],dtype=np.float32).reshape(1,1,-1)
    STD_PIXEL = np.array([0.229, 0.224, 0.225],dtype=np.float32).reshape(1,1,-1)
    MAX_GT_INSTANCES = 100
    DETECTION_MAX_INSTANCES = 100
    DETECTION_MIN_CONFIDENCE = 0.7
    CLASS_NAMES = [
        'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
        'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    DETECTION_NMS_THRESHOLD = 0.3
    LEARNING_RATE = 0.05
    LEARNING_MOMENTUM = 0.9
    WEIGHT_DECAY = 0.0001
    WIDTH = 224
    HEIGHT = 224
    MASK_SHAPE = (64,64)
    GRID_WIDTH = 16
    GRID_HEIGHT = 16
    CLUE_SHAPE = (20,20)
    GRID_SHAPE = (GRID_WIDTH, GRID_HEIGHT)
    GRID_RESOLUTION = (1, 1)
    IS_PADDED = True
    MASK_THRESOLD = 0.7
    CROP_SIZE = 224
    def __init__(self):
        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
        self.IMAGE_SHAPE = (self.WIDTH, self.HEIGHT,3)
        self.MAX_BATCH_SIZE = self.BATCH_SIZE*32

    def display(self):
        """Display Configuration values."""
        print("\nConfigurations:")
        for a in dir(self):
            if not a.startswith("__") and not callable(getattr(self, a)):
                print("{:30} {}".format(a, getattr(self, a)))
        print("\n")

In [39]:
train_image_dir = "/media/data/nishanth/aravind/train2017/"
val_image_dir = "/media/data/nishanth/aravind/val2017/"
config = ProposalConfig()
model_dir = "./models/"
train_pickle = "/home/aravind/re/data/train_cwid.pickle"
val_pickle = "/home/aravind/re/data/val_cwid.pickle"

In [40]:
# with open(train_pickle,"rb") as train_ann:
#     train_cwid = pickle.load(train_ann)
with open(val_pickle,"rb") as val_ann:
    val_cwid = pickle.load(val_ann)

In [41]:
# train_loader = model_lib.get_loader(train_cwid,config,train_image_dir)
val_loader = model_lib.get_loader(val_cwid,config,val_image_dir)

In [None]:
import torch
import torch.nn.functional as F
from PIL import Image
net = model_lib.SimpleHGModel()

pretrained_dict = torch.load(model_dir+"model_vgg_class_only.pt")
net = model_lib.SimpleHGModel()
net_dict = net.state_dict()

pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in net_dict}
net_dict.update(pretrained_dict) 
net.load_state_dict(net_dict)

# net.vgg.load_state_dict(torch.load(model_dir+"model_vgg_class_only.pt").vgg)
# net.classifier.load_state_dict(torch.load(model_dir+"model_vgg_class_only.pt").classifier)
net = net.cuda()
with torch.no_grad():
    for i,data in enumerate(val_loader):
        batch_images,batch_impulses,batch_gt_responses,batch_class_ids = data
        print(config.CLASS_NAMES[np.argmax(batch_class_ids.numpy())])
        a,b,c= batch_images.numpy(), batch_impulses.numpy(),batch_gt_responses.numpy()
        a = np.moveaxis(a,1,-1)
        b = np.moveaxis(b,1,-1)
        c = np.moveaxis(c,1,-1)
        Image.fromarray(((a[0]*config.STD_PIXEL + config.MEAN_PIXEL)*255).astype(np.uint8),"RGB").show()
        Image.fromarray((b[0][:,:,0]*128).astype(np.uint8),"L").show()
#         Image.fromarray((c[0][:,:,0]*128).astype(np.uint8),"L").show()
        batch_images,batch_impulses,batch_gt_responses,batch_class_ids = batch_images.cuda(),batch_impulses.cuda(),batch_gt_responses.cuda(),batch_class_ids.cuda()
        pred_class,pred_mask = net([batch_images,batch_impulses])
#         print(pred_class)
        pred_class = F.softmax(pred_class,dim=-1).squeeze()
        maxs, indices = torch.topk(pred_class,5,-1)
#         print(maxs.shape,indices.shape)
        for i in range(5):
            print(maxs[i],indices[i],config.CLASS_NAMES[int(indices[i])])
#         print(batch_class_ids)
#         print(indices)
#         print("gt_class:",config.CLASS_NAMES[int(batch_class_ids[0])])
#         print("pred_class: ",pred_class[int(batch_class_ids[0])])
#         print(config.CLASS_NAMES[int(indices[0])])
        input()

sheep
[torch.Size([1, 32, 56, 56]), torch.Size([1, 64, 28, 28]), torch.Size([1, 64, 14, 14])]
tensor(0.3943, device='cuda:0') tensor(1, device='cuda:0') person
tensor(0.1383, device='cuda:0') tensor(58, device='cuda:0') couch
tensor(0.1140, device='cuda:0') tensor(20, device='cuda:0') cow
tensor(1.00000e-02 *
       7.8846, device='cuda:0') tensor(78, device='cuda:0') teddy bear
tensor(1.00000e-02 *
       6.4920, device='cuda:0') tensor(60, device='cuda:0') bed

car
[torch.Size([1, 32, 56, 56]), torch.Size([1, 64, 28, 28]), torch.Size([1, 64, 14, 14])]
tensor(0.9107, device='cuda:0') tensor(3, device='cuda:0') car
tensor(1.00000e-02 *
       3.1449, device='cuda:0') tensor(25, device='cuda:0') backpack
tensor(1.00000e-02 *
       1.4887, device='cuda:0') tensor(9, device='cuda:0') boat
tensor(1.00000e-03 *
       7.1183, device='cuda:0') tensor(26, device='cuda:0') umbrella
tensor(1.00000e-03 *
       6.6484, device='cuda:0') tensor(8, device='cuda:0') truck

bird
[torch.Size([1, 32, 


donut
[torch.Size([1, 32, 56, 56]), torch.Size([1, 64, 28, 28]), torch.Size([1, 64, 14, 14])]
tensor(0.6136, device='cuda:0') tensor(46, device='cuda:0') bowl
tensor(0.1837, device='cuda:0') tensor(49, device='cuda:0') sandwich
tensor(1.00000e-02 *
       7.6789, device='cuda:0') tensor(55, device='cuda:0') donut
tensor(1.00000e-02 *
       6.1773, device='cuda:0') tensor(56, device='cuda:0') cake
tensor(1.00000e-02 *
       2.3740, device='cuda:0') tensor(47, device='cuda:0') banana

potted plant
[torch.Size([1, 32, 56, 56]), torch.Size([1, 64, 28, 28]), torch.Size([1, 64, 14, 14])]
tensor(0.7821, device='cuda:0') tensor(2, device='cuda:0') bicycle
tensor(0.1027, device='cuda:0') tensor(59, device='cuda:0') potted plant
tensor(1.00000e-02 *
       4.8950, device='cuda:0') tensor(76, device='cuda:0') vase
tensor(1.00000e-02 *
       1.4625, device='cuda:0') tensor(40, device='cuda:0') bottle
tensor(1.00000e-03 *
       9.3402, device='cuda:0') tensor(41, device='cuda:0') wine glass

e

In [None]:
import random
import time
np.set_printoptions(threshold=np.nan)
counts = [0 for i in range(81)]
train_loader = iter(train_loader)
for i in range(1000):
    start = time.time()
    g = next(train_loader)
    print(time.time()-start)
for i,g in enumerate(train_loader):
    print("sample:" + str(i))
#     start = time.time()
    batch_images,batch_impulses,batch_gt_responses,batch_class_ids = g
#     print(batch_class_ids)
    for i in batch_class_ids:
        counts[int(i.item())] += 1
print(counts)
#     batch_images = np.moveaxis(batch_images.numpy(),1,-1)
#     print(batch_images.shape)
#     batch_impulses = batch_impulses.squeeze().numpy()*128
#     batch_gt_responses = batch_gt_responses.squeeze().numpy()*128
#     batch_class_ids = batch_class_ids.numpy()
#     # s =(time.time()-start)
#     # print(s)
#     batch_images[0] *= 128
#     batch_images[0] += config.MEAN_PIXEL
#     img = Image.fromarray((batch_images[0]).astype("uint8"),"RGB")
#     img.show()
#     print((np.sum(batch_gt_responses)//128)**0.5)
#     mask = Image.fromarray(((batch_gt_responses)).astype("uint8"),"L")
#     mask.show()
#     print(config.CLASS_NAMES[batch_class_ids[0]])
#     impulse = Image.fromarray(((batch_impulses)).astype("uint8"),"L")
#     impulse.show()
#     input()

In [None]:
[len(c) for c in train_cid.class_wise_instance_info]