# GCN for DAVIS 2016

In this notebook, a custom [PyTorch Geometric](https://rusty1s.github.io/pytorch_geometric/build/html/index.html) [InMemoryDataset](https://rusty1s.github.io/pytorch_geometric/build/html/_modules/torch_geometric/data/in_memory_dataset.html#InMemoryDataset) for the DAVIS 2016 dataset is created. The implementation is based on this [tutorial](https://rusty1s.github.io/pytorch_geometric/build/html/notes/create_dataset.html). The dataset is then used to train a simple GCN network as a first evaluation based on this [tutorial](https://rusty1s.github.io/pytorch_geometric/build/html/notes/introduction.html#learning-methods-on-graphs).

The dataset consists of single PyTorch Geometric [Data](https://rusty1s.github.io/pytorch_geometric/build/html/_modules/torch_geometric/data/data.html#Data) objects which model a single graph with various attributes. For this dataset, a graph for each contour is created. Hereby, each node of the graph represents one contour point. The feature of each node is the OSVOS feature vector from the next frame at this point. Each node is connected to its K nearest neighbours. The feature of each edge is the distance between the nodes it connects. The targets of each node is the translation it undergoes from the current to the next frame.

## Imports

In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.utils.data.sampler import SequentialSampler

from torch_geometric.nn import GCNConv
from torch_geometric.data import DataLoader

from pg_datasets.davis_2016 import DAVIS2016

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Paths & Constants

In [2]:
PYTORCH_GEOMETRIC_DAVIS_2016_DATASET_PATH = 'pg_datasets/DAVIS_2016'
CONTOURS_FOLDERS_PATH = 'DAVIS_2016/DAVIS/Contours/480p'
IMAGES_FOLDERS_PATH = 'DAVIS_2016/DAVIS/JPEGImages/480p'
TRANSLATIONS_FOLDERS_PATH = 'DAVIS_2016/DAVIS/Translations/480p'

SKIP_SEQUENCES = ['bmx-trees', 'bus', 'cows', 'dog-agility', 'horsejump-high', 
                  'horsejump-low', 'kite-walk', 'lucia', 'libby', 'motorbike',
                  'paragliding', 'rhino', 'scooter-gray', 'swing']

UNEQUAL_TRANSLATION_LENGTH = ['surf', 'bmx-bumps']

SKIP_SEQUENCES += UNEQUAL_TRANSLATION_LENGTH

TRAIN_SEQUENCES = ['bear', 'bmx-bumps', 'boat', 'breakdance-flare', 'bus', 
                   'car-turn', 'dance-jump', 'dog-agility', 'drift-turn', 
                   'elephant', 'flamingo', 'hike', 'hockey', 'horsejump-low', 
                   'kite-walk', 'lucia', 'mallard-fly', 'mallard-water', 
                   'motocross-bumps', 'motorbike', 'paragliding', 'rhino', 
                   'rollerblade', 'scooter-gray', 'soccerball', 'stroller',
                   'surf', 'swing', 'tennis', 'train']

VAL_SEQUENCES = ['blackswan', 'bmx-trees', 'breakdance', 'camel', 'car-roundabout',
                 'car-shadow', 'cows', 'dance-twirl', 'dog', 'drift-chicane', 
                 'drift-straight', 'goat', 'horsejump-high', 'kite-surf', 'libby', 
                 'motocross-jump', 'paragliding-launch', 'parkour', 'scooter-black', 
                 'soapbox']

BATCH_SIZE = 16
LAYER = 9
K = 32
EPOCHS_WO_AVEGRAD = 5

## Train and Val Dataset

In [3]:
train = DAVIS2016(PYTORCH_GEOMETRIC_DAVIS_2016_DATASET_PATH, 
                  CONTOURS_FOLDERS_PATH, IMAGES_FOLDERS_PATH, TRANSLATIONS_FOLDERS_PATH, 
                  LAYER, K, EPOCHS_WO_AVEGRAD,
                  SKIP_SEQUENCES, TRAIN_SEQUENCES, VAL_SEQUENCES,
                  train=True)

In [4]:
val = DAVIS2016(PYTORCH_GEOMETRIC_DAVIS_2016_DATASET_PATH, 
                CONTOURS_FOLDERS_PATH, IMAGES_FOLDERS_PATH, TRANSLATIONS_FOLDERS_PATH, 
                LAYER, K, EPOCHS_WO_AVEGRAD,
                SKIP_SEQUENCES, TRAIN_SEQUENCES, VAL_SEQUENCES,
                train=False)

In [5]:
data = train[0]
print(data)

Data(edge_attr=[15768], edge_index=[2, 15768], x=[256, 128], y=[256, 2])


## Simple GCN

In [6]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Net, self).__init__()
        
        self.conv1 = GCNConv(in_channels, in_channels * 2)
        self.conv2 = GCNConv(in_channels * 2, in_channels * 2)
        
        self.lin1 = nn.Linear(in_channels * 2, in_channels)
        self.lin2 = nn.Linear(in_channels, out_channels)

    def forward(self, data):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        
        x = self.conv1(x, edge_index, edge_attr)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
                
        x = self.conv2(x, edge_index, edge_attr)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        
        x = self.lin1(x)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        
        x = self.lin2(x)
        
        return x       

In [22]:
from tensorboardX import SummaryWriter
import os
import timeit
from datetime import datetime

def val_net(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    
    for i, data in enumerate(val_loader):

        data = data.to(device)

        # forward pass to get outputs
        with torch.no_grad():
            out = model(data)

        # calculate the loss between predicted and target keypoints
        out_flatten = out.flatten()
        y_flatten = data.y.flatten()
        loss = criterion(out_flatten, y_flatten)

        # log the loss every log_nth iterations
        running_loss += loss.item()

    loss = running_loss/len(val_loader)
    print("Loss on VAL data: {}".format(loss))
    return loss
    
    
def train_net(model, train_loader, val_loader, optimizer, criterion, num_epochs=10, log_nth=10, verbose=True):

    # Logging into Tensorboard
    log_dir = os.path.join('GCN_Files', 'runs', datetime.now().strftime('%b%d_%H-%M-%S'))
    writer1 = SummaryWriter(logdir=log_dir, comment='train')
    writer2 = SummaryWriter(logdir=log_dir, comment='val')
    
    # prepare the net for training
    model.train()
    
    train_loss_history = []
    val_loss_history = []
    
    if verbose: print('START TRAIN.')
        
    start_time = timeit.default_timer()
    # loop over the dataset multiple times
    for epoch in range(num_epochs):
        print('\nEpoch::', epoch+1, '/', num_epochs)
        running_loss = 0.0
        # train on batches of data
        for i, data in enumerate(train_loader):
            data = data.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward pass to get outputs
            out = model(data)
            
            # calculate the loss between predicted and target keypoints
            out_flatten = out.flatten()
            y_flatten = data.y.flatten()
            
            loss = criterion(out_flatten, y_flatten)
            print('\tBatch:', i+1, '/', len(train_loader), ': Loss:', loss.data)
            writer1.add_scalar('data', loss.item(), epoch)
            # backward pass to calculate the weight gradients
            loss.backward()

            # update the weights
            optimizer.step()
            
            # store loss for each batch
            train_loss_history.append(loss.data)
            
            # log the loss every log_nth iterations
            running_loss += loss.item()
            if i % log_nth == log_nth - 1:
                if verbose:
                    print('[%d, %5d] loss: %.5f' 
                          %(epoch + 1, i + 1, running_loss / log_nth))
                running_loss = 0.0
        
        #Compute val loss after each epoch
        train_loss_epoch = np.mean([x.cpu().numpy() for x in train_loss_history[-i-1:]])
        print('Loss on TRAIN data (mean):', train_loss_epoch)

        val_loss = val_net(model, val_loader, criterion)
        writer1.add_scalars('data', {'train': train_loss_epoch, 'val': val_loss}, epoch)
        #writer2.add_scalar('data/total_loss_epoch', val_loss, epoch)
        val_loss_history.append(val_loss)
    if verbose: print('FINISH.')
    
    return train_loss_history, val_loss_history

## Overfit

In [23]:
num_train = 2
OverfitSampler = SequentialSampler(range(num_train))

overfit_train_loader = DataLoader(train, batch_size=1, 
                                  shuffle=False, sampler=OverfitSampler)
overfit_val_loader = DataLoader(val, batch_size=1, shuffle=False, sampler=SequentialSampler(range(3)))


# Load model and run the solver
overfit_model = Net(in_channels=train[0].num_features, 
                    out_channels=train[0].y.shape[1])

print(overfit_model)
overfit_model.double()
overfit_model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(overfit_model.parameters(), lr=1e-4, betas=(0.9, 0.999), 
                       eps=1e-8, weight_decay=0.0)


train_loss_history, val_loss_history = train_net(overfit_model, overfit_train_loader, overfit_val_loader, optimizer, criterion, 
                               num_epochs=250, verbose=False)

Net(
  (conv1): GCNConv(128, 256)
  (conv2): GCNConv(256, 256)
  (lin1): Linear(in_features=256, out_features=128, bias=True)
  (lin2): Linear(in_features=128, out_features=2, bias=True)
)

Epoch:: 1 / 250
	Batch: 1 / 2 : Loss: tensor(964.4807, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(749.6680, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 857.0743821949195
Loss on VAL data: 459.2409165193465

Epoch:: 2 / 250
	Batch: 1 / 2 : Loss: tensor(119.8948, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(74.2903, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 97.09254918926513
Loss on VAL data: 338.018131624938

Epoch:: 3 / 250
	Batch: 1 / 2 : Loss: tensor(45.7826, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(29.4599, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 37.621247633654576
Loss on VAL data: 309.7544450945732

Epoch:: 4 / 250
	Batch: 1 / 2 : Loss: tensor(19.8380, dev

Loss on VAL data: 309.9438955258806

Epoch:: 36 / 250
	Batch: 1 / 2 : Loss: tensor(3.3890, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3869, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3879944403392286
Loss on VAL data: 307.85951708994526

Epoch:: 37 / 250
	Batch: 1 / 2 : Loss: tensor(3.3772, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3508, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3640099371626593
Loss on VAL data: 306.0915753472611

Epoch:: 38 / 250
	Batch: 1 / 2 : Loss: tensor(3.3912, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3435, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3673790123889393
Loss on VAL data: 305.20820900442334

Epoch:: 39 / 250
	Batch: 1 / 2 : Loss: tensor(3.4141, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3486, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.381315929118999

Loss on VAL data: 305.1868862547022

Epoch:: 71 / 250
	Batch: 1 / 2 : Loss: tensor(3.3598, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3291, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3444483232229754
Loss on VAL data: 305.09086032572606

Epoch:: 72 / 250
	Batch: 1 / 2 : Loss: tensor(3.3599, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3282, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3440479319650356
Loss on VAL data: 305.0382630935333

Epoch:: 73 / 250
	Batch: 1 / 2 : Loss: tensor(3.3597, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3276, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.343647829803241
Loss on VAL data: 305.0283060927862

Epoch:: 74 / 250
	Batch: 1 / 2 : Loss: tensor(3.3591, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3273, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3432037084276356


Loss on VAL data: 304.037800560025

Epoch:: 104 / 250
	Batch: 1 / 2 : Loss: tensor(3.3451, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3117, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.328362758938292
Loss on VAL data: 304.01534220517806

Epoch:: 105 / 250
	Batch: 1 / 2 : Loss: tensor(3.3446, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3111, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3278621128463435
Loss on VAL data: 303.9888237504005

Epoch:: 106 / 250
	Batch: 1 / 2 : Loss: tensor(3.3442, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3106, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.327395396490904
Loss on VAL data: 303.9570549338635

Epoch:: 107 / 250
	Batch: 1 / 2 : Loss: tensor(3.3437, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.3101, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.326923950312002

Loss on VAL data: 302.1322038265573

Epoch:: 139 / 250
	Batch: 1 / 2 : Loss: tensor(3.3303, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2951, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.312707517090969
Loss on VAL data: 302.1165808658707

Epoch:: 140 / 250
	Batch: 1 / 2 : Loss: tensor(3.3299, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2947, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3122713132855037
Loss on VAL data: 302.1152023293679

Epoch:: 141 / 250
	Batch: 1 / 2 : Loss: tensor(3.3296, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2942, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3119020901638963
Loss on VAL data: 301.99811434871526

Epoch:: 142 / 250
	Batch: 1 / 2 : Loss: tensor(3.3291, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2938, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.3114348146113

Loss on VAL data: 300.2043297549853

Epoch:: 172 / 250
	Batch: 1 / 2 : Loss: tensor(3.3173, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2807, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.298975324847868
Loss on VAL data: 300.2123176455935

Epoch:: 173 / 250
	Batch: 1 / 2 : Loss: tensor(3.3168, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2802, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.2985462585057213
Loss on VAL data: 300.11006133924246

Epoch:: 174 / 250
	Batch: 1 / 2 : Loss: tensor(3.3165, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2798, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.2981400066691697
Loss on VAL data: 300.04528208339775

Epoch:: 175 / 250
	Batch: 1 / 2 : Loss: tensor(3.3161, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2794, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.297727787910

	Batch: 2 / 2 : Loss: tensor(3.2663, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.2852085715071295
Loss on VAL data: 297.45846132316996

Epoch:: 208 / 250
	Batch: 1 / 2 : Loss: tensor(3.3036, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2660, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.284757992979396
Loss on VAL data: 297.27029797477746

Epoch:: 209 / 250
	Batch: 1 / 2 : Loss: tensor(3.3034, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2656, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.284510586353277
Loss on VAL data: 297.1891659360512

Epoch:: 210 / 250
	Batch: 1 / 2 : Loss: tensor(3.3031, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2652, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.2841819387073006
Loss on VAL data: 297.1276914192652

Epoch:: 211 / 250
	Batch: 1 / 2 : Loss: tensor(3.3027, device='cuda:0', dtype=torch.float

	Batch: 2 / 2 : Loss: tensor(3.2541, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.2732274288375955
Loss on VAL data: 294.67037070374613

Epoch:: 241 / 250
	Batch: 1 / 2 : Loss: tensor(3.2918, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2538, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.2727670301009786
Loss on VAL data: 294.7497963684805

Epoch:: 242 / 250
	Batch: 1 / 2 : Loss: tensor(3.2912, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2534, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.2723103186832194
Loss on VAL data: 294.87652028560603

Epoch:: 243 / 250
	Batch: 1 / 2 : Loss: tensor(3.2918, device='cuda:0', dtype=torch.float64)
	Batch: 2 / 2 : Loss: tensor(3.2530, device='cuda:0', dtype=torch.float64)
Loss on TRAIN data (mean): 3.2724205581586863
Loss on VAL data: 294.49714487596486

Epoch:: 244 / 250
	Batch: 1 / 2 : Loss: tensor(3.2906, device='cuda:0', dtype=torch.fl

In [None]:
train_loss_history_reduced = [i for idx, i in enumerate(train_loss_history) if idx%2==0]

print(len(train_loss_history_reduced), len(val_loss_history))
plt.plot(list(range(250)), train_loss_history_reduced, '-')
plt.plot(list(range(250)), val_loss_history, '-')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.show()

In [None]:
overfit_model.double()
overfit_model.cpu()
overfit_model.eval()

image = cv2.imread('DAVIS_2016/DAVIS/Annotations/480p/bear/00001.png')
sample = next(iter(overfit_train_loader))
y = sample.y.detach().numpy()

with torch.no_grad():
    pred = overfit_model(sample).detach().numpy()

contour = np.load('pg_datasets/DAVIS_2016/raw/Contours/bear/00000.npy')

translation_ground_truth = contour + y
translation_pred = contour + pred

In [None]:
def plot_translation(image, translation_ground_truth, translation_pred):
    
    fig, ax = plt.subplots(figsize=(15, 10))
    
    ax.scatter(translation_ground_truth[:, 0], translation_ground_truth[:, 1], color='g')
    ax.scatter(translation_pred[:, 0], translation_pred[:, 1], color='r')
    
    # Plot image
    ax.imshow(image)
    
    ax.axis('image')
    ax.set_xticks([])
    ax.set_yticks([])

    plt.show()

In [None]:
plot_translation(image, translation_ground_truth, translation_pred)

# Train

In [None]:
train_loader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val, batch_size=BATCH_SIZE, shuffle=False)
# Load model and run the solver
model = Net(in_channels=train[0].num_features, 
            out_channels=train[0].y.shape[1])
print(model)
model.double()
model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), 
                       eps=1e-8, weight_decay=0.0)
train_loss_history = train_net(model, train_loader, val_loader, optimizer, criterion,
                               num_epochs=10, log_nth=10, verbose=True)

In [None]:
plt.plot(train_loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.show()

# Validate

In [None]:
val_loader = DataLoader(val, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
val_net(model, val_loader, criterion)