In [1]:
%load_ext autoreload
%autoreload 2

### 2. Test the Foward part of the Network
> * Now make sure that we can map from an image into an SxSx2x11 Tensor
> * First make sure that the Yolo2 weights are loadable and make sense.

In [2]:
import os
import h5py
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
from Model.vis_utils import visualize_grid

  from ._conv import register_converters as _register_converters


In [3]:
# l2 = np.array(list((yolo_weights['model_weights']['conv2d_1']['conv2d_1'][u'kernel:0'])))
# grid = visualize_grid(l2.transpose(3, 1, 2, 0))
# print (grid.shape, l2.shape)
# plt.imshow(grid.astype('uint8'))
# plt.axis('off')
# plt.gcf().set_size_inches(5, 5)
# plt.show()

#### 2.1 Overfit Localizer in Tiny Data
> * For debugging, we run the foward, and backward pass for `a few`
epochs.

In [4]:
import torch
print(torch.__version__)
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from Model.dataloader import NutritionDataset
from Model.net import LocalizerNet
from train import train_localizer, calculate_iou

0.4.0


In [5]:
def train_localizer_network():
    IMAGE_DIR = '../Data/FullData/RawImages/'
    BOXES = '../Data/FullData/BoundingBoxes.npy'
    DATA_PATH = '../Data/FullData/train_dev_test.npy'
    YOLO_PATH = '../Data/FullData/yolo.h5'
    debug = not torch.cuda.is_available()
    dataset = DataLoader(
        NutritionDataset(IMAGE_DIR, BOXES, DATA_PATH, debug=debug),
        batch_size=100, shuffle=True, num_workers=16,
    )
    dev = DataLoader(
        NutritionDataset(IMAGE_DIR, BOXES, DATA_PATH, split='dev', debug=debug),
        batch_size=100, shuffle=True, num_workers=16,
    )
    model = LocalizerNet(YOLO_PATH, use_full_yolo=True)
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=1.04e-4
    )
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [10, 25, 35, 50, 60, 75, 85, 95])
    r = train_localizer(
        model, optimizer, dataset, dev, epochs=100, 
        scheduler=scheduler, restore=True
    )
    return r    

In [None]:
train_losses, dev_losses, train_map, dev_map = train_localizer_network()














=== Performance Check ===
	 Train Loss =  0.04407673329114914
	 Dev Loss =  0.15357188358902932
	 Train mAP =  [0.03202928 0.0106383  0.00531915 0.        ]
	 Dev mAP =  [0.02425616 0.02067571 0.01920639 0.01728796]















=== Performance Check ===
	 Train Loss =  0.03735364228487015
	 Dev Loss =  0.15138196498155593
	 Train mAP =  [0.06526571 0.02985452 0.01485149 0.0049505 ]
	 Dev mAP =  [0.02539962 0.01946412 0.01767123 0.01376451]















=== Performance Check ===
	 Train Loss =  0.03660574555397034
	 Dev Loss =  0.15993940085172653
	 Train mAP =  [0.08645833 0.06125    0.02041667 0.        ]
	 Dev mAP =  [0.02976772 0.02379627 0.02254451 0.0219698 ]















=== Performance Check ===
	 Train Loss =  0.029812796041369438
	 Dev Loss =  0.16300440281629563
	 Train mAP =  [0.11536154 0.08030803 0.04020402 0.0149515 ]
	 Dev mAP =  [0.02526521 0.02133434 0.01747328 0.01453211]















=== Performance Check ===
	 Train Loss =  0.03092048317193985
	 Dev Loss =  0.16200697794556618
	 Train mAP =  [0.09706439 0.06123737 0.02556818 0.00520833]
	 Dev mAP =  [0.02835252 0.02328653 0.02028829 0.01806808]















=== Performance Check ===
	 Train Loss =  0.033233679831027985
	 Dev Loss =  0.1608474001288414
	 Train mAP =  [0.09668853 0.04071644 0.02535666 0.01020514]
	 Dev mAP =  [0.03649985 0.03224604 0.03098001 0.02781158]















=== Performance Check ===
	 Train Loss =  0.03415418416261673
	 Dev Loss =  0.1575948029756546
	 Train mAP =  [0.09475948 0.04485449 0.01990199 0.        ]
	 Dev mAP =  [0.02874009 0.01828032 0.0169814  0.01577641]















=== Performance Check ===
	 Train Loss =  0.029720481485128403
	 Dev Loss =  0.16212756782770157
	 Train mAP =  [0.10779304 0.04088151 0.         0.        ]
	 Dev mAP =  [0.02626483 0.02179847 0.01991928 0.01929428]


In [None]:
def plot(train, dev, title):
    plt.title(title)
    plt.plot(train, label='train'); plt.plot(dev, label='dev')
    plt.xlabel('Iteration')
    plt.legend()
    plt.show()

In [None]:
plot(train_losses, dev_losses, title='Loss Curves')

In [None]:
plt.plot(dev_map)
plt.plot(train_map, '-o')
# plt.savefig('goals2.png')

In [None]:
def calculate_mean_image():
    '''
    calculate the mean image on the training set at 
    different sizes, (270, 480) and (540, 960)
    '''
    IMAGE_DIR = '../Data/FullData/RawImages/'
    DATA_PATH = '../Data/FullData/train_dev_test.npy'
    image_paths = [os.path.join(IMAGE_DIR, f) for f in np.load(DATA_PATH).item()['train']]
    mean_270, mean_540 = np.zeros((480, 270, 3)), np.zeros((960, 540, 3))
    for img in tqdm(image_paths):
        mean_270 += resized_image((1080//4, 1920//4), img)
        mean_540 += resized_image((1080//2, 1920//2), img)
    N = len(image_paths)
    return mean_270 / N, mean_540 / N

def resized_image(size, path):
    with Image.open(path) as img:
        img = img.resize(size, resample=Image.BILINEAR)
        img = np.asarray(img)
    return img

In [None]:
# # s, l = calculate_mean_image()
# mean_image_s, mean_image_l = (
#     Image.fromarray(s.astype('uint8'), 'RGB'), 
#     Image.fromarray(l.astype('uint8'), 'RGB')
# )