In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np
import random
from options import DefaultConfig
opt = DefaultConfig()
torch.cuda.set_device(opt.gpu_ids)
torch.manual_seed(opt.seed)
torch.cuda.manual_seed(opt.seed)
torch.cuda.manual_seed_all(opt.seed)
np.random.seed(opt.seed)
random.seed(opt.seed)
import sys
import os
import time
import pdb
from models.model import model
import torch
import torchvision
import torchvision.transforms as transforms
from scipy import interpolate
from utils.image_folder import ImageFolder
from utils import utils
from utils.utils import *
import matplotlib.pyplot as plt
from IPython.display import clear_output


Interaction Network: initialising
Propagation Network: initialising


In [3]:
opt.dataroot_train = opt.dataroot + 'train/'
dataset_train = ImageFolder(opt.dataroot_train, opt.num_frames, 
                       transform=transforms.Compose([
                                   transforms.Resize(opt.loadSize),
                                   transforms.RandomResizedCrop(opt.fineSize),
                                   transforms.RandomHorizontalFlip(),
                                   transforms.ToTensor()]))
dataset_loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=opt.batch_size, shuffle=False)
dataset_train_size = len(dataset_train)
print('#training videos = %d' % dataset_train_size)

opt.dataroot_val = opt.dataroot + 'val/'
dataset_val = ImageFolder(opt.dataroot_val, opt.num_frames, 
                       transform=transforms.Compose([
                                   transforms.Resize(opt.loadSize),
                                   transforms.RandomResizedCrop(opt.fineSize),
                                   transforms.ToTensor()]))
dataset_loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=opt.batch_size_val, shuffle=False)
dataset_val_size = len(dataset_val)
print('#validation videos = %d' % dataset_val_size)

#training videos = 40
#validation videos = 10


In [4]:
model = model(opt)
model.setup()

[Interaction net] loading Inet sccesses




In [5]:
!nvidia-smi 

Wed May  6 00:33:26 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.59       Driver Version: 440.59       CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 208...  On   | 00000000:19:00.0 Off |                  N/A |
| 18%   27C    P8    11W / 250W |     11MiB / 11019MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 208...  On   | 00000000:1A:00.0 Off |                  N/A |
| 18%   53C    P2    57W / 250W |   1994MiB / 11019MiB |     14%      Default |
+-------------------------------+----------------------+----------------------+
|   2  GeForce RTX 208...  On   | 00000000:67:00.0 Off |                  N/A |
| 41%   

In [6]:
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

2020-05-06 00:33:27


In [8]:
torch.autograd.set_detect_anomaly(True)
X_val = []
X_train = []

val_losses = []
train_losses = []
val_psnrs = []
train_psnrs = []

total_steps = 0
for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay):
    val_step = 0
    model_index = 0
    for i, data_raw in enumerate(dataset_loader_train):
        total_steps += 1
        val_step += 1
        data_raw = torch.squeeze(data_raw, dim=0)
        # first round estimation without user interaction, turn on evaluation mode
        opt.no_prev = True
        model.val()
        
        data = utils.get_colorization_data(data_raw, opt, p=opt.sample_p)
        data = ToCuda(data)
        data['prev'] = model.run_auto_colour(data)[0].cpu()
        
        # first estimation is used as previous fake_ab, turn on training mode
        model.train()
        opt.no_prev = False
        fam = None
        num_round = np.random.randint(3) + 1 
        print('----------------')
        for _ in range(num_round):
            # print(_)
            data_cpu = utils.get_colorization_data(data_raw, opt, prev=data['prev'], p=opt.sample_p)
            # max error frame 
            # n = utils.argmax_l2(data_cpu['prev'], data_cpu['ab'])
            # random frame index
            n = np.random.randint(14) 
            data_cpu['marks'][n] = 1
            print(data_cpu['marks'])
            if(data_cpu is None): 
                continue 
            data = ToCuda(data_cpu)
            data['prev'][n,:,:,:], tr5 = model.run_interaction(data['gray'][n,:,:,:], data['clicks'][n,:,:,:], data['prev'][n,:,:,:], n)
            data['prev'], fam = model.run_propagation(data, n, tr5, fam)
            data['prev'] = data['prev'].detach().cpu()
            
        if val_step % opt.val_freq == 0:
            print('validation phase')
            val_loss = 0
            val_psnr = 0
            
            model.val()
            for ii, data_raw_val in enumerate(dataset_loader_val):
                # auto colourisartion
                opt.no_prev = True
                data_cpu_val = utils.get_colorization_data(data_raw_val, opt, p=opt.sample_p)
                if(data_cpu_val is None):
                    continue
                data_val = ToCuda(data_cpu_val)
                fake_ab_val = model(data_val['gray'], data_val['clicks'], data_val['prev'])
                prev_ab_val = fake_ab_val.detach().cpu()
                
                # update first colourisation
                opt.no_prev = False
                data_cpu_val = utils.get_colorization_data(data_raw_val, opt, prev=prev_ab_val, p=opt.sample_p)
                data_val = ToCuda(data_cpu_val)
                fake_ab_val = model(data_val['gray'], data_val['clicks'], data_val['prev'])
                loss_val = model.calc_loss(data_val['ab'], fake_ab_val)
                psnr_val = calc_batch_psnr(data_val['gray'].detach(), data_val['ab'].detach(), fake_ab_val.detach(), opt, avg=False)

                val_loss += loss_val.detach().cpu().numpy()
                val_psnr += psnr_val
            val_psnrs.append(val_psnr / dataset_val_size)
            val_losses.append(val_loss / dataset_val_size)
            X_val.append(total_steps)
            model.train()
            
            # save model
            model_index += 1
            save_model(model, opt, epoch, model_index, val_psnr / dataset_val_size)

        if total_steps % opt.print_freq == 0:
            X_train.append(total_steps)
            # loss and PSNR
            train_psnrs.append(calc_batch_psnr(data['gray'].detach(), data['ab'].detach(), prev_ab.detach(), opt))
            train_losses.append(model.total_loss.numpy() / data['gray'].shape[0])
            
            # plot loss and PSNR curve
            clear_output(wait=True)
            plt.figure(1)
            plt.rcParams['figure.dpi'] = 120 
            #plt.ylim([0, .08])
            plt.title('Loss')
            plt.plot(X_train, train_losses, label="Training loss")
            plt.plot(X_val, val_losses, label="Validation loss")
            plt.grid(True)
            plt.legend()
            plt.show()

            plt.figure(2)
            plt.rcParams['figure.dpi'] = 120 
            # plt.ylim([15, 30])
            plt.title('PSNR')
            plt.plot(X_train, train_psnrs, label="Training PSNR")
            plt.plot(X_val, val_psnrs, label="Validation PSNR")
            plt.grid(True)
            plt.legend()
            plt.show()

    np.save('Iter'+str(epoch)+'train_loss.npy', train_losses)
    np.save('Iter'+str(epoch)+'val_loss.npy', val_losses)
    np.save('Iter'+str(epoch)+'train_psnr.npy', train_psnrs)
    np.save('Iter'+str(epoch)+'val_psnrs.npy', val_psnrs)


----------------
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[MODEL: interaction network] User Interaction on 4
[MODEL: propagation network] >>>>>>>>> 4 to 5


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 2, 224, 224]], which is output 0 of UnsqueezeBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

In [9]:
#!sudo apt-get install -y psmisc
#!fuser /dev/nvidia*

In [None]:
#!kill 20457      

In [25]:
l

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [8]:
str(None)

'None'