In [1]:
import model_lib
import numpy as np
import warnings
warnings.filterwarnings('ignore', '.*output shape of zoom.*')
import pickle
import importlib
importlib.reload(model_lib)
import os
import time
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"


In [2]:
# config to train
# TODO: check Config is correct
class ProposalConfig():
    NAME = "InSegm"
    GPU_COUNT = 1
    # online training
    IMAGES_PER_GPU = 16
    STEPS_PER_EPOCH = 100
    # not going to use these
    N_DISTORTIONS = 0
    MAX_DISTORTION = 0.3
    MIN_DISTORTION = -0.1
    
    VALIDATION_STEPS = 20
    # including gt
    NUM_CLASSES = 81
    # only flips
    IMAGE_AUGMENT = True

    MEAN_PIXEL = np.array([123.7, 116.8, 103.9],dtype=np.float32)
    MAX_GT_INSTANCES = 100
    DETECTION_MAX_INSTANCES = 100
    DETECTION_MIN_CONFIDENCE = 0.7
    CLASS_NAMES = [
        'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
        'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    DETECTION_NMS_THRESHOLD = 0.3
    LEARNING_RATE = 0.05
    LEARNING_MOMENTUM = 0.9
    WEIGHT_DECAY = 0.0001
    WIDTH = 224
    HEIGHT = 224
    MASK_SHAPE = (64,64)
    GRID_WIDTH = 16
    GRID_HEIGHT = 16
    CLUE_SHAPE = (20,20)
    GRID_SHAPE = (GRID_WIDTH, GRID_HEIGHT)
    GRID_RESOLUTION = (1, 1)
    IS_PADDED = True
    MASK_THRESOLD = 0.7
    def __init__(self):
        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
        self.IMAGE_SHAPE = (self.WIDTH, self.HEIGHT,3)
        self.MAX_BATCH_SIZE = self.BATCH_SIZE*32

    def display(self):
        """Display Configuration values."""
        print("\nConfigurations:")
        for a in dir(self):
            if not a.startswith("__") and not callable(getattr(self, a)):
                print("{:30} {}".format(a, getattr(self, a)))
        print("\n")

In [None]:
root_dir = "/media/data/nishanth/aravind/"
config = ProposalConfig()
model_dir = "./models/"
train_dataset,val_dataset = None,None
train_pickle = root_dir+"val_cid.pickle"
val_pickle = root_dir+"val_cid.pickle"
class ClassInstancesDataset():
    def __init__(self):
        self.class_wise_instance_info = [[] for i in range(81)]
        self.instance_info = []
with open(train_pickle,"rb") as train_ann:
    train_cid = pickle.load(train_ann)
#     train_dataset = model_lib.CocoDataset(train_cid,config)
with open(val_pickle,"rb") as val_ann:
    val_cid = pickle.load(val_ann)
#     val_dataset = model_lib.CocoDataset(val_cid,config)
train_loader = model_lib.get_loader(train_cid,config)
val_loader = model_lib.get_loader(val_cid,config)

In [None]:
import torch.optim as optim
import torch
net = model_lib.SimpleHGModel()
net = net.cuda()
optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)

net_size = sum([i.numel() for i in net.parameters()])
print(net_size)

iters_per_checkpoint = 20
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i,data in enumerate(train_loader,0):
        batch_images,batch_impulses,batch_gt_responses,batch_class_ids = data
        batch_images,batch_impulses,batch_gt_responses,batch_class_ids = batch_images.cuda(),batch_impulses.cuda(),batch_gt_responses.cuda(),batch_class_ids.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        pred_class = net([batch_images,batch_impulses])
        loss = model_lib.loss_criterion(0,0,batch_class_ids,pred_class)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() 
        if i % iters_per_checkpoint == 0:
            print("batch: ",i,"epoch: ",epoch, "loss: %0.5f" % (running_loss/iters_per_checkpoint))
            torch.save(net.state_dict(),model_dir+"model_%d_%d.pt"%(epoch,i))
            running_loss = 0.0
    # print("batch: %d time:%0.3f sec" %(i, end-start)); print(loss.item())
        


print('Finished Training')

In [9]:
from PIL import Image
import random
import time
np.set_printoptions(threshold=np.nan)

# train_iter = enumerate(train_loader)
# for i in range(1000):
#     start = time.time()
#     data = next(train_iter)
#     end = time.time()
#     print(end-start)
for i,g in enumerate(train_loader):
    print("sample:" + str(i))
    # start = time.time()
    batch_images,batch_impulses,batch_gt_responses,batch_class_ids = g
    batch_images = np.moveaxis(batch_images.numpy(),1,-1)
    print(batch_images.shape)
    batch_impulses = batch_impulses.squeeze().numpy()*128
    batch_gt_responses = batch_gt_responses.squeeze().numpy()*128
    batch_class_ids = batch_class_ids.numpy()
    # s =(time.time()-start)
    # print(s)
    for j in range(batch_images.shape[0]):
        img = Image.fromarray((batch_images[j]).astype("uint8"),"RGB")
        img.show()
        print((np.sum(batch_gt_responses[j])//128)**0.5)
        mask = Image.fromarray(((batch_gt_responses[j])).astype("uint8"),"L")
        mask.show()
        print(config.CLASS_NAMES[batch_class_ids[j]])
        impulse = Image.fromarray(((batch_impulses[j])).astype("uint8"),"L")
        impulse.show()
        input()

10
2
/media/data/nishanth/aravind//val2017/000000393226.jpg 10 2
19
1
/media/data/nishanth/aravind//val2017/000000458755.jpg 19 1
57
1
/media/data/nishanth/aravind//val2017/000000311303.jpg 57 1
32
1
/media/data/nishanth/aravind//val2017/000000393469.jpg 32 1
sample:0
(4, 224, 224, 3)
2.23606797749979
traffic light

70.40596565632774
sheep

6.855654600401044
chair

5.477225575051661
snowboard

36
1
/media/data/nishanth/aravind//val2017/000000213171.jpg 36 1
45
1
/media/data/nishanth/aravind//val2017/000000344100.jpg 45 1
46
3
/media/data/nishanth/aravind//val2017/000000434230.jpg 46 3
17
2
/media/data/nishanth/aravind//val2017/000000049269.jpg 17 2
sample:1
(4, 224, 224, 3)
15.556349186104045
baseball glove

8.306623862918075
spoon

7.0
bowl

80.3305670339753
dog

66
1
/media/data/nishanth/aravind//val2017/000000434247.jpg 66 1
75
2
/media/data/nishanth/aravind//val2017/000000098392.jpg 75 2
75
3
/media/data/nishanth/aravind//val2017/000000000139.jpg 75 3
37
1
/media/data/nishanth/arav

KeyboardInterrupt: 