# Create PyTorch Geometric DAVIS 2016 Dataset

In this notebook, a custom [PyTorch Geometric](https://rusty1s.github.io/pytorch_geometric/build/html/index.html) [InMemoryDataset](https://rusty1s.github.io/pytorch_geometric/build/html/_modules/torch_geometric/data/in_memory_dataset.html#InMemoryDataset) for the DAVIS 2016 dataset is created. The implementation is based on this [tutorial](https://rusty1s.github.io/pytorch_geometric/build/html/notes/create_dataset.html).

The dataset consists of single PyTorch Geometric [Data](https://rusty1s.github.io/pytorch_geometric/build/html/_modules/torch_geometric/data/data.html#Data) objects which model a single graph with various attributes. For this dataset, a graph for each contour is created. Hereby, each node of the graph represents one contour point. The feature of each node is the OSVOS feature vector from the next frame at this point. Each node is connected to its K nearest neighbours. The feature of each edge is the distance between the nodes it connects. The targets of each node is the translation it undergoes from the current to the next frame.

## Imports

In [1]:
from distutils.dir_util import copy_tree
import numpy as np
import os

import torch
from torch_geometric.data import Data, InMemoryDataset
from torch_geometric.nn import knn_graph
from torch_geometric.utils import to_undirected

## Paths & Constants

In [2]:
CONTOURS_FOLDERS_PATH = 'DAVIS_2016/DAVIS/Contours/480p'
TRANSLATIONS_FOLDERS_PATH = 'DAVIS_2016/DAVIS/Translations/480p'
PYTORCH_GEOMETRIC_DAVIS_2016_DATASET_PATH = 'PyTorch_Geometric_Datasets/DAVIS_2016'

SKIP_SEQUENCES = ['bmx-trees', 'bus', 'cows', 'dog-agility', 'horsejump-high', 
                  'horsejump-low', 'kite-walk', 'lucia', 'libby', 'motorbike',
                  'paragliding', 'rhino', 'scooter-gray', 'swing']
K = 32

## Functions

In [3]:
def get_edge_attribute(contour, edge_index):
    '''Returns an edge feature matrix of shape [num_edges, num_edge_features]
       containing the distances between the node each edge connects.'''
    
    edge_index = edge_index.numpy()
    edge_index = edge_index.T
    
    edge_attr = []
    for edge in edge_index:
        contour_point_0 = contour[edge[0]] 
        contour_point_1 = contour[edge[1]]
        dist = np.linalg.norm(contour_point_0-contour_point_1)
        edge_attr.append([dist])
    
    edge_atrr = np.array(edge_attr)
    return torch.from_numpy(edge_atrr)

In [4]:
def create_data(contour, translation):
    '''Returns data object.'''
    
    # x: Node feature matrix with shape [num_nodes, num_node_features]
    # The feature of each node is the OSVOS feature vector of the next frame
    # TODO 
    # x = get_OSVOS_feature_vectors(contour)
    x = None

    # edge_index: Graph connectivity in COO format with shape [2, num_edges] and type torch.long
    # Each node should be connected to its K nearest neighbours
    positions = torch.from_numpy(contour)
    edge_index = knn_graph(positions, K)
    edge_index = to_undirected(edge_index)

    # edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
    # The feature of each edge is the distance between the two nodes it connects
    edge_attr = get_edge_attribute(contour, edge_index)

    # y: Target to train against (may have arbitrary shape)
    # The target of each node is the displacement of the node between the current and the next frame
    y = torch.from_numpy(translation)

    # Create data object
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
    
    return data

## InMemoryDataset Class

In [5]:
class DAVIS2016(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None, contours_folders_path,):
        super(DAVIS2016, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def raw_file_names(self):
        raw_file_names = ['Contours', 'Translations']
        return raw_file_names

    @property
    def processed_file_names(self):
        return ['data.pt']

    def download(self):
        # Copy Contours folder to raw_dir
        raw_dir_contours = os.path.join(self.raw_dir, 'Contours')
        copy_tree(CONTOURS_FOLDERS_PATH, raw_dir_contours)
        
        # Copy Translations folder to raw_dir
        raw_dir_translations = os.path.join(self.raw_dir, 'Translations')
        copy_tree(TRANSLATIONS_FOLDERS_PATH, raw_dir_translations)
        
    def process(self):
        # Get paths to Contours and Translations
        raw_path_contours, raw_path_translations = self.raw_paths
        
        # Get list of folders (there is one for each sequence)
        translations_folders_list = os.listdir(raw_path_translations)
        
        # Create empty data list to which Data objects will be added
        data_list = []
        
        # Iterate through folders 
        for i, folder in enumerate(translations_folders_list):
            
            # Skip if it is a bad sequence
            if (folder in SKIP_SEQUENCES): continue
            
            # Debug
            # if (i > 2): break
            
            print('#{}: {}'.format(i, folder))
            
            # Get paths to current sequence in Contours and Translations folders
            contours_folder_path = os.path.join(raw_path_contours, folder)
            translations_folder_path = os.path.join(raw_path_translations, folder)
            
            # Get list of translations (one for each frame in the sequence)
            translations = os.listdir(translations_folder_path)
            translations.sort()
            
            # Iterate through translations
            for j, translation in enumerate(translations):
                
                # Debug
                # if (j > 4): break
                
                # print('\t#{}: {}'.format(j, translation))
                
                # Load corresponding contour
                contour_path = os.path.join(contours_folder_path, translation)
                contour = np.load(contour_path)
                
                # Load corresponding sequence
                translation_path = os.path.join(translations_folder_path, translation)
                translation = np.load(translation_path)
                
                # Get data and append it to data_list
                data = create_data(contour, translation)
                data_list.append(data)
                
        if self.pre_filter is not None:
            data_list = [data for data in data_list if self.pre_filter(data)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(data) for data in data_list]
        
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [6]:
dataset = DAVIS2016(root=PYTORCH_GEOMETRIC_DAVIS_2016_DATASET_PATH)

Processing...
#1: drift-chicane
#3: soapbox
#4: breakdance
#5: drift-turn
#6: mallard-fly
#9: scooter-black
#10: breakdance-flare
#12: elephant
#14: rollerblade
#15: dance-twirl
#16: dance-jump
#18: mallard-water
#19: car-turn
#22: car-shadow
#23: paragliding-launch
#24: stroller
#25: bear
#26: hockey
#27: dog
#28: boat
#29: car-roundabout
#30: soccerball
#31: train
#32: tennis
#33: parkour
#34: surf
#35: kite-surf
#37: drift-straight
#38: flamingo
#39: goat
#41: hike
#42: motocross-jump
#44: motocross-bumps
#47: camel
#48: blackswan
#49: bmx-bumps
Done!
