In [1]:
import model_lib
import numpy as np
import warnings

warnings.filterwarnings('ignore', '.*output shape of zoom.*')
import pickle
import importlib
importlib.reload(model_lib)

import os
import time
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"


In [2]:
# config to train
# TODO: check Config is correct
class ProposalConfig():
    NAME = "InSegm"
    GPU_COUNT = 1
    # online training
    IMAGES_PER_GPU = 16
    STEPS_PER_EPOCH = 100
    NUM_WORKERS = 16
    PIN_MEMORY = True
    DATA_ORDER = "cw_ins"
    VALIDATION_STEPS = 20
    # including gt
    NUM_CLASSES = 81
    
    # only flips
    MEAN_PIXEL = np.array([0.485, 0.456, 0.406],dtype=np.float32).reshape(1,1,-1)
    STD_PIXEL = np.array([0.229, 0.224, 0.225],dtype=np.float32).reshape(1,1,-1)
    CLASS_NAMES = [
        'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
        'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    WIDTH = 224
    HEIGHT = 224
    CROP_SIZE = 224
    def __init__(self):
        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
        self.IMAGE_SHAPE = (self.WIDTH, self.HEIGHT,3)

    def display(self):
        """Display Configuration values."""
        print("\nConfigurations:")
        for a in dir(self):
            if not a.startswith("__") and not callable(getattr(self, a)):
                print("{:30} {}".format(a, getattr(self, a)))
        print("\n")

In [3]:
train_image_dir = "/media/Data1/interns/aravind/train2017/"
# train_image_dir = "/media/Data1/interns/aravind/val2017/"
val_image_dir = "/media/Data1/interns/aravind/val2017/"
config = ProposalConfig()
model_dir = "./models/"
train_pickle = "/media/Data1/interns/aravind/re/data/train_cwid.pickle"
# train_pickle = "/media/Data1/interns/aravind/re/data/val_cwid.pickle"
val_pickle = "/media/Data1/interns/aravind/re/data/val_cwid.pickle"

In [4]:
with open(train_pickle,"rb") as train_ann:
    train_cwid = pickle.load(train_ann)
with open(val_pickle,"rb") as val_ann:
    val_cwid = pickle.load(val_ann)

In [5]:
train_loader = model_lib.get_loader(train_cwid,config,train_image_dir)
val_loader = model_lib.get_loader(val_cwid,config,val_image_dir)

In [6]:
import torch.optim as optim
import torch
import torch.nn.functional as F
net = model_lib.MultiHGModel()

In [7]:
net.vgg0.load_state_dict(torch.load("./models/split_vgg16_features_4_zero_init.pt"))
# net.vgg1.load_state_dict(torch.load("./models/split_vgg16_features_4.pt"))
# pretrained_dict = torch.load(model_dir+"reg_01.pt")
# net_dict = net.state_dict()
# pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in net_dict}

# net_dict.update(pretrained_dict) 
# net.load_state_dict(net_dict)

In [8]:
# net.vgg1.load_state_dict(net.vgg0.state_dict())
# net.mp1.load_state_dict(net.mp0.state_dict())
# net.vgg1.load_state_dict(torch.load("./models/split_vgg16_features_4.pt"))

In [9]:
def set_trainable(module,state):
    for param in module.parameters():
        param.requires_grad = state

set_trainable(net,False)
set_trainable(net.mp0,True)
# set_trainable(net.mp1,True)
set_trainable(net.class_predictor,True)
for name,child in net.vgg0.named_children():
    if name[:-1] == "layer":
        [set_trainable(s.ignore_filters,False) for s in child[::2]]
        [set_trainable(s.copy_filters,False) for s in child[::2]]
# for name,child in net.vgg1.named_children():
#     if name[:-1] == "layer":
#         [set_trainable(s.ignore_filters,False) for s in child[::2]]
#         [set_trainable(s.copy_filters,False) for s in child[::2]]


In [10]:
# optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()),lr = 0.001)
# optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=0.01, momentum=0.9)
param_lr = []
param_lr.append({'params': net.mp0.parameters(),'lr':1e-2,'momentum':0.9})
# param_lr.append({'params': net.mp1.parameters(),'lr':1e-2,'momentum':0.9})
param_lr.append({'params': net.class_predictor.parameters(),'lr':1e-2,'momentum':0.9})

for name,child in net.vgg0.named_children():
    if name[:-1] == "layer":
        for s in child[::2]:
            lr = 0
            if int(name[-1]) > 0:
                lr = 0
            else:
                lr = 0
#             param_lr.append({'params':s.ignore_filters.parameters(),'lr':lr,'momentum':0.9})
#             param_lr.append({'params':s.copy_filters.parameters(),'lr':1e-3,'momentum':0.9})
# for name,child in net.vgg1.named_children():
#     if name[:-1] == "layer":
#         for s in child[::2]:
#             lr = 0
#             if int(name[-1]) > 0:
#                 lr = 0
#             else:
#                 lr = 0
#             param_lr.append({'params':s.ignore_filters.parameters(),'lr':lr,'momentum':0.9})
#             param_lr.append({'params':s.copy_filters.parameters(),'lr':1e-3,'momentum':0.9})

net_size = sum([i.numel() for i in net.parameters()])
trainable_params = filter(lambda p: p.requires_grad, net.parameters())
trainable_size = sum([i.numel() for i in trainable_params])
print(net_size,trainable_size)
print(param_lr)
optimizer = optim.SGD(param_lr)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=1, patience=10, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=100, min_lr=1e-2, eps=1e-08)

50704468 27711748
[{'params': <generator object Module.parameters at 0x7f8a51652ba0>, 'lr': 0.01, 'momentum': 0.9}, {'params': <generator object Module.parameters at 0x7f8a06ea58e0>, 'lr': 0.01, 'momentum': 0.9}]


In [11]:
net = net.cuda()

In [None]:
# torch.set_printoptions(threshold=10**6)
iters_per_checkpoint = 1
for epoch in range(10000):  # loop over the dataset multiple times
    running_loss = 0.0
    loss1,loss2 = 0.0,0.0
    acc1,acc2 = 0.0,0.0
    for i,data in enumerate(train_loader,0):
        batch_images,batch_impulses,batch_gt_responses,batch_bboxes,batch_one_hot = data
        batch_images,batch_impulses,batch_gt_responses,batch_bboxes,batch_one_hot = batch_images.cuda(),batch_impulses.cuda(),batch_gt_responses.cuda(),batch_bboxes.cuda(),batch_one_hot.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        pred_class,pred_masks = net([batch_images,batch_impulses])
        # we are giving no weighting for classes...
        class_loss,mask_loss = model_lib.multi_mask_loss_criterion(pred_class,batch_one_hot,pred_masks,batch_gt_responses,batch_bboxes)
        class_acc,mask_acc = model_lib.accuracy(pred_class,batch_one_hot,pred_masks[1],batch_gt_responses)
        loss = class_loss+mask_loss
        loss.backward()
        optimizer.step()
        running_loss += loss.item(); loss1 += class_loss.item(); loss2 += mask_loss.item()
        acc1 += class_acc.item(); acc2 += mask_acc.item()
        if i % iters_per_checkpoint == iters_per_checkpoint-1:
#             scheduler.step((loss1+loss2)/iters_per_checkpoint)
            print("batch: ",i,"epoch: ",epoch, "loss: %0.5f" % (running_loss/iters_per_checkpoint))
            print("class_loss: %0.5f \t mask_loss: %0.5f"%(loss1/iters_per_checkpoint,loss2/iters_per_checkpoint))
            print("class_acc: %0.5f \t mask_acc: %0.5f"%(acc1/iters_per_checkpoint,acc2/iters_per_checkpoint))
#             torch.save(net.state_dict(),model_dir+("model_mask_vgg_%d_%d.pt")%(2,2))
            torch.save(net.state_dict(),model_dir+("aa_00.pt"))
            running_loss = 0.0; loss1 = 0.0; loss2 = 0.0
            acc1 = 0.0; acc2 = 0.0
    # print("batch: %d time:%0.3f sec" %(i, end-start)); print(loss.item())
print('Finished Training')

batch:  0 epoch:  0 loss: 61.85410
class_loss: 6.14399 	 mask_loss: 55.71011
class_acc: 0.00000 	 mask_acc: 0.03568
batch:  1 epoch:  0 loss: 65.24720
class_loss: 20.66764 	 mask_loss: 44.57956
class_acc: 0.00000 	 mask_acc: 0.04938
batch:  2 epoch:  0 loss: 52.99132
class_loss: 30.62601 	 mask_loss: 22.36530
class_acc: 0.00000 	 mask_acc: 0.00309
batch:  3 epoch:  0 loss: 59.61137
class_loss: 41.70615 	 mask_loss: 17.90522
class_acc: 0.00000 	 mask_acc: 0.00000
batch:  4 epoch:  0 loss: 44.02042
class_loss: 32.84660 	 mask_loss: 11.17382
class_acc: 0.00000 	 mask_acc: 0.00000
batch:  5 epoch:  0 loss: 46.00536
class_loss: 34.93845 	 mask_loss: 11.06690
class_acc: 0.00000 	 mask_acc: 0.00000
batch:  6 epoch:  0 loss: 51.83308
class_loss: 37.49614 	 mask_loss: 14.33694
class_acc: 0.00000 	 mask_acc: 0.00000
batch:  7 epoch:  0 loss: 21.38422
class_loss: 8.35474 	 mask_loss: 13.02948
class_acc: 0.00000 	 mask_acc: 0.00000
batch:  8 epoch:  0 loss: 74.96761
class_loss: 6.72258 	 mask_loss

batch:  71 epoch:  0 loss: 11.63324
class_loss: 4.22345 	 mask_loss: 7.40979
class_acc: 0.00000 	 mask_acc: 0.19510
batch:  72 epoch:  0 loss: 8.64857
class_loss: 4.37661 	 mask_loss: 4.27196
class_acc: 0.00000 	 mask_acc: 0.14664
batch:  73 epoch:  0 loss: 6.90852
class_loss: 4.03166 	 mask_loss: 2.87687
class_acc: 0.25000 	 mask_acc: 0.17959
batch:  74 epoch:  0 loss: 17.44831
class_loss: 4.39671 	 mask_loss: 13.05160
class_acc: 0.00000 	 mask_acc: 0.27754
batch:  75 epoch:  0 loss: 10.61550
class_loss: 4.28629 	 mask_loss: 6.32921
class_acc: 0.00000 	 mask_acc: 0.16654
batch:  76 epoch:  0 loss: 46.63582
class_loss: 4.11642 	 mask_loss: 42.51941
class_acc: 0.25000 	 mask_acc: 0.00433
batch:  77 epoch:  0 loss: 29.77471
class_loss: 4.63364 	 mask_loss: 25.14108
class_acc: 0.00000 	 mask_acc: 0.04986
batch:  78 epoch:  0 loss: 9.76586
class_loss: 4.07141 	 mask_loss: 5.69445
class_acc: 0.00000 	 mask_acc: 0.21789
batch:  79 epoch:  0 loss: 13.84303
class_loss: 4.75217 	 mask_loss: 9.0