In [1]:
%matplotlib inline
import numpy as np

import torch
import torch.optim as optim
from torch.autograd import Variable

from tqdm import tqdm
from models import SubCNN
import training_utils as tu
import sys
sys.path.append("../utils")
import logging_utils as lu

from time import time

import shutil


from graphviz import Digraph
import re
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from torch.autograd import Variable
import torchvision.models as models


def make_dot(var):
    node_attr = dict(style='filled',
                     shape='box',
                     align='left',
                     fontsize='12',
                     ranksep='0.1',
                     height='0.2')
    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
    seen = set()

    def add_nodes(var):
        if var not in seen:
            if isinstance(var, Variable):
                value = '('+(', ').join(['%d'% v for v in var.size()])+')'
                dot.node(str(id(var)), str(value), fillcolor='lightblue')
            else:
                dot.node(str(id(var)), str(type(var).__name__))
            seen.add(var)
            if hasattr(var, 'previous_functions'):
                for u in var.previous_functions:
                    dot.edge(str(id(u[0])), str(id(var)))
                    add_nodes(u[0])
    add_nodes(var.creator)
    return dot


def train(settings):
    """Train NN for eye tracking regression

    Args:
        settings: StereoTrackerSettings instance
        iteration: Iteration of the regression cascade
    """

    try:
        shutil.rmtree("figures_pytorch")
    except:
        pass

    # Sampling grid for landmarks
    max_x, max_y = 240, 240
    patch_size = 32
    settings.patch_size = patch_size
    settings.max_x = max_x
    settings.max_y = max_y
    patch_shape = (patch_size, patch_size)
    patch_shape = np.array(patch_shape)
    patch_half_shape = np.require(np.round(patch_shape / 2), dtype=int)
    start = -patch_half_shape
    end = patch_half_shape
    sampling_grid = np.mgrid[start[0]:end[0], start[1]:end[1]]
    sampling_grid = sampling_grid.swapaxes(0, 2).swapaxes(0, 1).astype(np.float32)

    sg_size = sampling_grid.shape
    pts_size = (21, 2)

    sampling_grid = torch.FloatTensor(sampling_grid).cuda()
    sampling_grid = sampling_grid.contiguous().view((1,) + sg_size).expand((pts_size[0],) + sg_size)
    old_size = sampling_grid.size()
    sampling_grid = sampling_grid.contiguous().view((1,) + old_size)

    ###########################
    # Data and normalization
    ###########################

    # Get data shape and normalization
    tr_img0, tr_img1, tr_pts0, tr_pts1, tr_init0, tr_init1 = tu.load_data("data/processed/CNN_database.h5", "training")
    val_img0, val_img1, val_pts0, val_pts1, val_init0, val_init1 = tu.load_data(
        "data/processed/CNN_database.h5", "validation")

    # Random idx to plot figures.
    idxs_train = np.random.permutation(tr_img0.shape[0])[:4]
    idxs_val = np.random.permutation(val_img0.shape[0])[:4]

    ###########################
    # Neural Net
    ###########################

    n_filters = 32
    hidden_dim = 512
    input_dim = 42
    output_dim = 84

    list_models = [SubCNN(input_dim, output_dim, n_filters, hidden_dim).cuda() for i in range(settings.max_iteration)]

    model_parameters = [{'params': list_models[i].parameters()} for i in range(settings.max_iteration)]

    optimizer = optim.Adam(model_parameters, lr=settings.learning_rate)

    loss_fn = torch.nn.MSELoss(size_average=True).cuda()
    # loss_fn2 = torch.nn.L1Loss(size_average=True).cuda()

    ###########################
    # Monitoring
    ###########################

    # Initialize a dict to hold monitoring metrics
    d_losses = {"train_losses": [],
                "train_mean_shape_rmse": [],
                "train_std_shape_rmse": [],
                "val_mean_shape_rmse": [],
                "val_std_shape_rmse": [],
                "val_mean_shape_rmse_closed": [],
                "val_std_shape_rmse_closed": [],
                "val_mean_shape_rmse_open": [],
                "val_std_shape_rmse_open": [],
                "best_val_mean_shape_rmse": [],
                "duration": []}

    #################
    # Training
    ################
    lu.print_start_training()

    for e in range(settings.nb_epoch):

        s = time()

        d_loss = {0: [], 1:[], 2:[]}

        num_elem = tr_img0.shape[0]
        num_batches = num_elem // settings.batch_size
        list_batches = np.array_split(np.arange(num_elem), num_batches)

        # list_batches += list_batches

        list_models_epoch = list_models
        # if e < 5:
        #     list_models_epoch = list_models[:1]
        # elif 5 <= e < 10:
        #     list_models_epoch = list_models[:2]
        # else:
        #     list_models_epoch = list_models

        for batch_idxs in tqdm(list_batches):

            start = batch_idxs[0]
            end = batch_idxs[-1]

            img0_batch = tr_img0[start: end + 1]
            img1_batch = tr_img1[start: end + 1]

            pts0_batch = tr_pts0[start: end + 1]
            pts1_batch = tr_pts1[start: end + 1]

            init0_batch = tr_init0[start: end + 1]
            init1_batch = tr_init1[start: end + 1]

            y_true, y_init, y_pred, list_dy_pred, list_y_pred = tu.forward_pass(settings, list_models_epoch,
                                                                                img0_batch, img1_batch,
                                                                                pts0_batch, pts1_batch,
                                                                                init0_batch, init1_batch,
                                                                                sampling_grid)

            g = make_dot(y_pred)
            g.view()

            for i in range(len(list_y_pred)):
                loss = loss_fn(240 * list_y_pred[i], 240 * y_true)
                d_loss[i].append(loss.cpu().data.numpy()[0])

            # Compute loss
            total_loss = loss_fn(y_pred, y_true)

            # Backward pass
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        # Compute eval metrics
        print("")
        for stage in range(len(list_models_epoch)):
            print("Stage : %s, loss: %s" % (stage, np.mean(d_loss[stage])))

        d_losses["train_losses"].append(np.mean(d_loss[len(list_models_epoch) - 1]))
        d_losses["duration"].append(time() - s)
        d_losses, ri = tu.display_metrics(settings, d_losses, list_models_epoch, sampling_grid,
                                          tr_img0, tr_img1, tr_pts0, tr_pts1, tr_init0, tr_init1,
                                          val_img0, val_img1, val_pts0, val_pts1, val_init0, val_init1)

        # Plot some images
        if e % 10 == 0:
            # Training set
            tu.plot_regression(settings, list_models, sampling_grid, e,
                               tr_img0[idxs_train], tr_img1[idxs_train],
                               tr_pts0[idxs_train], tr_pts1[idxs_train],
                               tr_init0[idxs_train], tr_init1[idxs_train],
                               "training")
            tu.plot_regression(settings, list_models, sampling_grid, e,
                               val_img0[idxs_val], val_img1[idxs_val],
                               val_pts0[idxs_val], val_pts1[idxs_val],
                               val_init0[idxs_val], val_init1[idxs_val],
                               "validation")

            # dy_true = y_true - y_init

            # dy_pred = list_dy_pred[0]

            # total_loss = mse_loss(240 * dy_true, 240 * dy_pred)
            # d_loss[0].append(total_loss.cpu().data.numpy()[0])
            # for i in range(1, len(list_dy_pred)):
            #     dy_pred += list_dy_pred[i]
            #     loss = mse_loss(240 * dy_true, 240 * dy_pred)
            #     total_loss += loss
            #     d_loss[i].append(loss.cpu().data.numpy()[0])

            # list_y_pred = [y_init + list_dy_pred[0]]
            # for dy in list_dy_pred[1:]:
            #     list_y_pred.append(dy + list_y_pred[-1])
            # total_loss = 0
            # for yp in list_y_pred:
            #     total_loss += loss_fn(240 * yp, 240 * y_true)



ValueError: Attempted relative import in non-package