# This notebook shows how to explore the dataset

### First load useful tools and path

In [None]:
import os
import numpy as np

# this get our current location in the file system
import inspect
HERE_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
DATA_PATH = os.path.join(HERE_PATH, '..', 'data')

# adding parent directory to path, so we can access the utils easily
import sys
root_path = os.path.join(HERE_PATH, '..')
sys.path.append(root_path)

### We explore the octanoic dataset

In [None]:
# Load and print some information about the dataset
from datasets.tools import load_dataset
X, Y, info, path = load_dataset('octanoic')
print 'The dataset has {} entries'.format(info['count'])
print 'Input dimensions represent {}'.format(info['x_keys'])
print 'Output dimensions represent {}'.format(info['y_keys'])

### Preparing functions to plot animation in the notebook

In [None]:
from IPython.display import HTML
from tempfile import NamedTemporaryFile

VIDEO_TAG = """<video controls>
 <source src="data:video/x-m4v;base64,{0}" type="video/mp4">
 Your browser does not support the video tag.
</video>"""

def anim_to_html(anim):
    if not hasattr(anim, '_encoded_video'):
        with NamedTemporaryFile(suffix='.mp4') as f:
            anim.save(f.name, fps=20, extra_args=['-vcodec', 'libx264'])
            video = open(f.name, "rb").read()
        anim._encoded_video = video.encode("base64")
    
    return VIDEO_TAG.format(anim._encoded_video)


def display_animation(anim):
    plt.close(anim._fig)
    return HTML(anim_to_html(anim))

### Displaying each experiment in a 3D space, each axis representing an output dimension

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

Y_scaled = Y / np.max(Y, axis=0)  # we scale the data to use them directly as colors

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(Y[:, 0], Y[:,1], Y[:, 2], c=Y_scaled)
ax.set_xlabel(info['y_keys'][0])
ax.set_ylabel(info['y_keys'][1])
ax.set_zlabel(info['y_keys'][2])
ax.view_init(elev=20., azim=45)

Each point in this graph is the result of one experiment in the dataset. 

We can make an animation out of it to observe better the distribution. Next cell does just that, it takes quite some time to execute

In [None]:
from matplotlib import animation

fps = 20
n_frames = 100

# First set up the figure, the axis, and the plot element we want to animate
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# animation function.  This is called sequentially
def animate(i):
    ax.set_xlabel(info['y_keys'][0])
    ax.set_ylabel(info['y_keys'][1])
    ax.set_zlabel(info['y_keys'][2])
    ax.view_init(elev=20., azim=i * 360/n_frames)
    line = ax.scatter(Y[:, 0], Y[:,1], Y[:, 2], c=Y_scaled)
    return line,

# call the animator.  blit=True means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate,
                               frames=n_frames, blit=True)

# call our new function to display the animation
display_animation(anim)

**Uncomment the cell below to save the animation in a file**

In [None]:
# writer = animation.writers['ffmpeg']
# writer = writer(fps=fps, bitrate=1800)
# video_file = os.path.join(HERE_PATH, 'video.mp4')
# anim.save(video_file, writer=writer)

### You can look for the best experiment in each dimension

In [None]:
# function to open a video using vlc (should work on all Linux platform, providing vlc is installed)
def open_video(video_id):
    video_path = os.path.join(DATA_PATH, path[video_id], 'video.avi')
    print "Reading {}".format(video_path)
    print "X: {}".format(X[video_id, :])
    print "Y: {}".format(Y[video_id, :])
    os.system('vlc {}'.format(video_path))

In [None]:
# best division experiment
division_index = info['y_keys'].index('division')
division_id = np.argmax(Y[:, division_index])
open_video(division_id)

In [None]:
# best directionality experiment
directionality_index = info['y_keys'].index('directionality')
directionality_id = np.argmax(Y[:,1])
open_video(directionality_id)

In [None]:
# best movement experiment
movement_index = info['y_keys'].index('movement')
movement_id = np.argmax(Y[:,2])
open_video(movement_id)