# Notebook details

In [None]:
def setup_notebook(fix_python_path=True, reduce_margins=True, plot_inline=True):
    if reduce_margins:
        # Reduce side margins of the notebook
        from IPython.core.display import display, HTML
        display(HTML("<style>.container { width:100% !important; }</style>"))

    if fix_python_path:
        # add egosocial to the python path
        import os, sys
        sys.path.extend([os.path.dirname(os.path.abspath('.'))])

    if plot_inline:
        # Plots inside cells
        %matplotlib inline
    
    global __file__
    __file__ = 'Notebook'

setup_notebook()

# Imports and Constants Definition

In [None]:
# !/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import itertools
import json
import logging
import os

from google_drive_downloader import GoogleDriveDownloader as gdd
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd

import keras
from keras import backend as K

import egosocial
import egosocial.config
from egosocial.utils.filesystem import create_directory
from egosocial.utils.filesystem import check_directory
from egosocial.utils.keras.backend import limit_gpu_allocation_tensorflow
from egosocial.utils.keras.processing import TimeSeriesDataGenerator
from egosocial.utils.logging import setup_logging

# Limit GPU memory allocation with Tensorflow

In [None]:
limit_memory = False
if limit_memory and K.backend() == 'tensorflow':
    memory_ratio = 0.6
    limit_gpu_allocation_tensorflow(memory_ratio)

# Fake main

In [None]:
 def main(*fake_args):
    entry_msg = 'Extract features for egosocial photo-streams.'
    parser = argparse.ArgumentParser(description=entry_msg)

    parser.add_argument('--dataset_path', required=True,
                        help='Path to file containing the input data and labels information merged.')

    parser.add_argument('--features_dir', required=True,
                        help='Directory where the extracted features are stored.')
        
    if not os.path.isdir(egosocial.config.TMP_DIR):
        os.mkdir(egosocial.config.TMP_DIR)

    setup_logging(egosocial.config.LOGGING_CONFIG,
                  log_dir=egosocial.config.LOGS_DIR)
    
    # TODO: implement correctly
    args = parser.parse_args(*fake_args)

    return args

In [None]:
BASE_DIR = os.path.join(egosocial.config.TMP_DIR, 'egocentric', 'datasets')

args = [
    "--dataset_path", os.path.join(BASE_DIR, 'merged_dataset.json'),
    "--features_dir", os.path.join(BASE_DIR, 'extracted_features'),
]

conf = main(args)

# Helper functions

In [None]:
def load_dataset_defition(dataset_path):
    with open(dataset_path, 'r') as json_file:
        dataset_def = json.load(json_file)

    # flatten the segments structure
    samples = pd.DataFrame(list(itertools.chain(*dataset_def)))
    return samples

def load_features(features_path, data_frames):
    features = np.load(features_path)
    
    sequences_info = data_frames.groupby(['split', 'segment_id', 'group_id'])
    
    feature_sequences = []
    for seq_id, group in sequences_info:
        feature_seq = []

        for frame_idx in group.index:
            feature_seq.append(features[frame_idx])

        feature_sequences.append(feature_seq)
    
    return feature_sequences

# Main  class

In [None]:
# load dataset definition
frames = load_dataset_defition(conf.dataset_path)

# filter labels with few samples
valid_frames_idx = ~np.isin(frames['relation_label'], ['siblings', 'teacher-student'])
valid_frames = frames[valid_frames_idx]

In [None]:
# load features
features_path = os.path.join(conf.features_dir, 'activity.npy')
feature_sequences = load_features(features_path, valid_frames)
max_timesteps = max(len(seq) for seq in feature_sequences)

In [None]:
gen = TimeSeriesDataGenerator(fancy_pca=True)
gen.fit(feature_sequences)
flow_gen = gen.flow(feature_sequences, maxlen=max_timesteps)

In [None]:
next(flow_gen).shape