In [138]:
# Usual imports
import time
import math
import numpy as np
import os
#import matplotlib.pyplot as plt
import argparse
import pickle
from glob import glob
import random
import sys
import subprocess

#Torch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
from torch.backends import cudnn
from torch.optim import Adam

#tensorboard
from tensorboardX import SummaryWriter

#my modules
sys.path.append('../')
sys.path.append('../scripts')
from scripts.dataset import DigitsDataset, WordsDataset, VideosDataset
from scripts.order_matters import ReadProcessWrite
from scripts.digits_reordering import create_model


In [139]:
import io
import base64
from IPython.display import HTML
import skvideo.io

In [140]:
DATASET_CLASSES = {'linear': DigitsDataset, 'words': WordsDataset, 'videos': VideosDataset}
LETTERS = 'abcdefghijklmnopqrstuvwxyz'
PICKLE_FILE = '../../s3-drive/set_to_sequence/video_reordering_18374_3937_5_2019-06-18_11:45:26.327081.pkl' 
RESUME = '../checkpoints/1/ep_100_map_inf_latest.pth.tar'
BATCH_SIZE = 128
HIDDEN_DIMS = [256]
LSTM_STEPS = 10
READER = 'videos'
INPUT_DIM = 1280
DROPOUT = 0.2

if torch.cuda.is_available():
    USE_CUDA = True
    print('Using GPU, %i devices.' % torch.cuda.device_count())
else:
    USE_CUDA = False

parser = argparse.ArgumentParser()
ARGS =parser.parse_args(args=[])
ARGS.batch_size = BATCH_SIZE
ARGS.hidden_dims = HIDDEN_DIMS
ARGS.lstm_steps = LSTM_STEPS
ARGS.input_dim = INPUT_DIM
ARGS.reader = READER
ARGS.dropout = DROPOUT
ARGS.resume = RESUME
ARGS.USE_CUDA = USE_CUDA

Using GPU, 4 devices.


In [154]:
def test(test_loader, model):
    
    model.eval()
    
    # Training
    correct_orders = 0
    total_orders = 0
    loader_len = len(test_loader)
    for i, data in enumerate(test_loader, 0):
        X, Y, additional_dict = data
        boundaries_lists = additional_dict['blocks_boundaries']
        
        
        # Transfer to GPU
        device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'
        X, Y = X.to(device).float(), Y.to(device)
        #X, Y = X.cuda().float(), Y.cuda()


        # forward + backward + optimize
        outputs, pointers, hidden = model(X)
        
        outputs = outputs.contiguous().view(-1, outputs.size()[-1])
        #print(f'outputs: {outputs.size()}, Y: {Y.size()}')
        
        
        """
        if args.reader == 'words':
            words = X_to_words(X.cpu())
            #inds_x = np.tile(np.array(range(words.shape[0])), [words.shape[1], 1]).T
            predicted_inds = pointers.cpu().data.numpy()
            real_inds = Y.cpu().data.numpy()
            for i in range(real_inds.shape[0]):
                print(f' Predicted Words order: {words[i, predicted_inds[i,:]]}')
                print(f' Real Words order: {words[i, real_inds[i,:]]}\n')
        """
        
        
        print(f'Predictions: {pointers}')
        print(f'Real orders: {Y}')
        
            

        ###We display the predicted order and real order for the idx-th video of each batch
        idx = 28
        videofile = additional_dict['filename'][idx]
        print(f'Videofile: {videofile}, Y shape: {Y.shape}, len(boundaries_lists): {len(boundaries_lists)}')
        basename = os.path.basename(videofile)
        video = skvideo.io.vread(videofile)
        
        predicted_frame_blocks = [range(boundaries_lists[i-1][idx],boundaries_lists[i][idx]) for i in range(1,len(boundaries_lists))]
        predicted_frame_blocks = [predicted_frame_blocks[i] for i in Y[idx]]

        predicted_frame_order = [val for sublist in predicted_frame_blocks for val in sublist]

        reordered_video = video[predicted_frame_order,:,:,:]
        
        print(f'video n frames: {video.shape[0]}, reordered_video n frames: {len(predicted_frame_order)}')
        
        predicted_filename = f'../data/predicted_videos/{basename}'
        skvideo.io.vwrite(predicted_filename, reordered_video)

        #show_video(videofile)

        #show_video(predicted_filename)
        

        for _ in range(pointers.size(0)):
            total_orders += 1
            if Y[_,:].equal( pointers[_,:]):
                correct_orders +=1
                
    print(f'Fraction of perfectly sorted sets: {correct_orders/total_orders}')


def X_to_words(X):
    """
    X is of shape (batch, n_seq, max_word_length, vocab_size)
    """
    array = X.data.numpy()
    words =  np.ndarray((array.shape[0], array.shape[1]), dtype=object)
    words.fill('')
    #print(f'Words shape: {words.shape}')
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            for k in range(X.shape[2]):
                if max(X[i,j,k,:]) == 1:
                    words[i,j] += LETTERS[np.argmax(X[i,j,k,:])]
                else:
                    pass
    return words


In [155]:
def main():
    
    ###emptying the ../data/predicted_videos folder
    subprocess.run(['rm', '-rf', '../data/predicted_videos/*'])
    
    with open(PICKLE_FILE, 'rb') as f:
        dict_data = pickle.load(f)
        
    
    #runs = glob(args.saveprefix+'/*')
    #it = len(runs) + 1
    #writer = SummaryWriter(os.path.join(args.tensorboard_saveprefix, str(it)))
    #writer.add_text('Metadata', 'Run {} metadata :\n{}'.format(it, args,))
    
    dataset_class = DATASET_CLASSES[READER]
    
    test_ds = dataset_class(dict_data['test'])
    
    test_loader = torch.utils.data.DataLoader(
            test_ds,
            batch_size=BATCH_SIZE, shuffle=True,
            num_workers=4, pin_memory=True)
    
    
    model = create_model(ARGS)
    
    
    
    if USE_CUDA:
        device = torch.cuda.current_device()
        #model.cuda()
        device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'
        model.to(device)
        net = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
        cudnn.benchmark = True
        
    test(test_loader, model)

In [156]:
def show_video(filename):
    video = io.open(filename, 'r+b').read()
    encoded = base64.b64encode(video)
    return HTML(data='''<video alt="test" controls>
                    <source src="data:video/mp4;base64,{0}" type="video/mp4" />
                 </video>'''.format(encoded.decode('ascii')))

In [157]:
main()

=> creating model
=> loading checkpoint '../checkpoints/1/ep_100_map_inf_latest.pth.tar'
Predictions: tensor([[4, 0, 1, 2, 3],
        [4, 2, 0, 1, 3],
        [2, 0, 4, 3, 1],
        [3, 0, 4, 1, 2],
        [2, 3, 0, 1, 4],
        [3, 1, 2, 0, 4],
        [0, 3, 4, 1, 2],
        [2, 4, 3, 1, 0],
        [1, 3, 4, 0, 2],
        [1, 0, 4, 3, 2],
        [4, 2, 1, 3, 0],
        [2, 0, 1, 3, 4],
        [1, 4, 3, 2, 0],
        [1, 0, 2, 3, 4],
        [2, 0, 1, 4, 3],
        [4, 3, 0, 1, 2],
        [2, 4, 0, 1, 3],
        [3, 2, 1, 0, 4],
        [0, 3, 4, 1, 2],
        [1, 4, 3, 0, 2],
        [0, 2, 1, 4, 3],
        [3, 2, 4, 1, 0],
        [0, 1, 4, 3, 2],
        [4, 0, 3, 1, 2],
        [4, 0, 2, 3, 1],
        [4, 3, 1, 0, 2],
        [2, 1, 0, 4, 3],
        [3, 0, 1, 4, 2],
        [2, 3, 4, 0, 1],
        [1, 0, 4, 2, 3],
        [4, 1, 2, 0, 3],
        [4, 2, 3, 0, 1],
        [0, 4, 2, 1, 3],
        [4, 3, 1, 0, 2],
        [0, 3, 4, 1, 2],
        [1, 4, 0, 3, 2]

video n frames: 151, reordered_video n frames: 150
Predictions: tensor([[0, 1, 4, 3, 2],
        [1, 4, 0, 3, 2],
        [1, 2, 3, 0, 4],
        [0, 1, 4, 2, 3],
        [3, 4, 2, 0, 1],
        [2, 3, 0, 4, 1],
        [3, 4, 2, 1, 0],
        [2, 3, 1, 0, 4],
        [0, 1, 3, 4, 2],
        [1, 2, 3, 4, 0],
        [2, 4, 3, 0, 1],
        [3, 2, 0, 4, 1],
        [4, 3, 0, 1, 2],
        [2, 4, 3, 0, 1],
        [2, 1, 3, 4, 0],
        [4, 3, 2, 1, 0],
        [3, 1, 2, 0, 4],
        [1, 0, 4, 2, 3],
        [4, 0, 1, 3, 2],
        [1, 2, 3, 4, 0],
        [1, 2, 3, 0, 4],
        [0, 2, 1, 4, 3],
        [2, 4, 3, 0, 1],
        [0, 4, 1, 2, 3],
        [1, 3, 4, 2, 0],
        [3, 4, 1, 0, 2],
        [2, 1, 3, 0, 4],
        [2, 4, 0, 3, 1],
        [1, 3, 2, 0, 4],
        [1, 0, 2, 4, 3],
        [2, 1, 3, 4, 0],
        [4, 0, 3, 1, 2],
        [3, 1, 0, 4, 2],
        [0, 1, 4, 2, 3],
        [2, 4, 3, 1, 0],
        [2, 4, 3, 1, 0],
        [4, 2, 1, 0, 3],
        [0,

video n frames: 63, reordered_video n frames: 62
Predictions: tensor([[0, 3, 1, 2, 4],
        [3, 4, 1, 2, 0],
        [2, 0, 4, 3, 1],
        [0, 1, 2, 3, 4],
        [2, 1, 3, 4, 0],
        [3, 1, 2, 0, 4],
        [3, 4, 1, 2, 0],
        [3, 1, 0, 4, 2],
        [0, 3, 4, 1, 2],
        [1, 2, 0, 4, 3],
        [3, 4, 1, 2, 0],
        [1, 3, 2, 4, 0],
        [0, 1, 3, 4, 2],
        [3, 4, 2, 1, 0],
        [1, 4, 3, 2, 0],
        [2, 0, 1, 4, 3],
        [3, 4, 2, 1, 0],
        [0, 1, 4, 2, 3],
        [2, 3, 1, 4, 0],
        [4, 1, 0, 3, 2],
        [3, 0, 4, 1, 2],
        [1, 2, 0, 4, 3],
        [4, 0, 2, 3, 1],
        [3, 2, 1, 0, 4],
        [0, 3, 4, 2, 1],
        [4, 3, 2, 0, 1],
        [3, 1, 4, 2, 0],
        [3, 4, 2, 0, 1],
        [3, 1, 4, 2, 0],
        [2, 1, 3, 4, 0],
        [4, 0, 2, 3, 1],
        [1, 2, 3, 0, 4],
        [0, 2, 1, 4, 3],
        [1, 0, 4, 3, 2],
        [1, 4, 2, 3, 0],
        [3, 4, 2, 0, 1],
        [4, 3, 2, 1, 0],
        [0, 3

RuntimeError: 

In [158]:
predicted_videos = glob('../data/predicted_videos/*')
predicted_videofile = predicted_videos[1]
basename = os.path.basename(predicted_videofile)

original_videofile = f'../../s3-drive/RLY/RLYMedia/{basename}'
    


In [159]:
show_video(original_videofile)

In [160]:
show_video(predicted_videofile)