# Convert data from .npz files to .CSV trajectory files

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
import glob
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import trials

path_output_csv  = './data/Converted_to_CSV/'

# Process all the subdirectories of the collected data for memory experiments
filename_pattern = [sub + '/with_Pontin_Holonomic_*_*.npz' for sub in glob.glob('data/Conditions/Memory/*')]; only_inbound=True;

In [2]:
def check_num_of_entries_in_datafiles(filename, path_output_csv):
    """ This is used to check if the collected files are valid """
    
    # Create subdirectory structure if it does not exist
    Path(path_output_csv).joinpath(Path(filename).parent).mkdir(parents=True, exist_ok=True)

    h, v, log = trials.load_route(filename=filename)
    
    return len(v)


def convert_datafiles_to_csv_files(filename, path_output_csv, scaling=1.0, sampling_rate=25, 
                                   cut_once_crossing_x=None, only_inbound=False, keep_first_n_samples=None):
    """ 
        Gets velocity measurements from a npz file and converts it  
        to x,y coordinates series of a trajectory. Result is saved 
        in the path_output_csv .CSV file. 
        The velocity data are assumed to be sampled every 
        1/sampling_rate seconds. 
        If cut_once_crossing_x is a number the trajectory data is 
        cut off as soon as the simulated ant crosses a cut_once_crossing_x
        radius from the release point.
        If only_inbound is True the csv files will contain only the inbound 
        (homing) part of the route. 
        If keep_first_n_samples == None all samples will be stored. 
        If keep_first_n_samples(int) < length(path) then only the first 
        keep_first_n_samples samples will be stored. 
    """
    
    # Create subdirectory structure if it does not exist
    Path(path_output_csv).joinpath(Path(filename).parent).mkdir(parents=True, exist_ok=True)
    
    try:
        h, v, log = trials.load_route(filename=filename)

        # Convert velocity to x,y coordinates
        if not only_inbound:
            # Use the whole route
            xy = np.vstack([np.array([0.0, 0.0]), np.cumsum(v, axis=0)])
        else:
            # Use only the homing part of the route
            xy = np.vstack([np.array([0.0, 0.0]), np.cumsum(v[log.T_outbound:], axis=0)])
        x, y = xy[:, 0], xy[:, 1]

        # Create a time stamp vector
        t = np.array(range(0, len(x))) * (1.0 / sampling_rate)
        data = list(zip(x, y, t))

        # Construct pandas data frame
        columns = ['x', 'y', 'Time']
        df = pd.DataFrame(data, columns=columns)

        # Coordinates scalling to m
        df.x = df.x * scaling
        df.y = df.y * scaling

        if cut_once_crossing_x is not None and isinstance(cut_once_crossing_x, (int, float)):
            #cut_off_index = np.argmin(np.abs(np.hypot(df.x, df.y) - cut_once_crossing_x))
            cut_off_index = np.argmax(np.hypot(df.x, df.y) > cut_once_crossing_x)
            if cut_off_index > 0:
                df = df.head(cut_off_index+1)
            filename = filename.replace('.npz', '_platformReleases.npz')
        
        if keep_first_n_samples is not None and isinstance(keep_first_n_samples, int):
            if keep_first_n_samples < len(df):
                df = df.head(keep_first_n_samples)
        
        # Construct the CSV filename
        pathfilename = Path(path_output_csv).joinpath(filename).with_suffix('.csv')
        #print('Writing data frame to file:', pathfilename)
        print('.', end='')
        df.to_csv(pathfilename, index = False)
    except: 
        print('Error while processing ', filename)

In [3]:
# Check if any of the files has too few trajectory steps
if not isinstance(filename_pattern, list):
    filename_pattern = [filename_pattern]

for filename_pattern_i in filename_pattern:
    files_list = glob.glob(filename_pattern_i, recursive=False)

    lengths_list = []
    for f in files_list:
        try:
            lengths_list.append(check_num_of_entries_in_datafiles(f.replace('data/', ''), path_output_csv))
        except:
            print(f)
    print('Smallest number of steps in trajectory', min(lengths_list))

Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 1502
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in trajectory 3002
Smallest number of steps in traj

# Convert the trajectory files from .npz to .csv

## Convert the whole trajectory keep the filename and change the extension from .npz to .csv

In [3]:
# Convert the trajectory files from .npz to .csv
fps = 25 # in frames per sec
scaling = 0.03  # max distance = 0.03m/step * 1500steps = 45m

if not isinstance(filename_pattern, list):
    filename_pattern = [filename_pattern]

for filename_pattern_i in filename_pattern:
    files_list = glob.glob(filename_pattern_i, recursive=False)

    for f in files_list:
        convert_datafiles_to_csv_files(f.replace('data/', ''), path_output_csv, scaling = scaling, 
                                       sampling_rate=fps, only_inbound=only_inbound, keep_first_n_samples=600)

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

## Keep only the trajectory until the first crossing of a 20cm radius from release point. 
## Save with _platformReleases.csv appended to the filename.

In [4]:
# Convert the trajectory files from .npz to .csv
fps = 25 # in frames per sec
scaling = 0.03  # max distance = 0.03m/step * 1500steps = 45m

if not isinstance(filename_pattern, list):
    filename_pattern = [filename_pattern]

for filename_pattern_i in filename_pattern:
    files_list = glob.glob(filename_pattern_i, recursive=False)

    for f in files_list:
        convert_datafiles_to_csv_files(f.replace('data/', ''), path_output_csv, scaling = scaling, 
                                       sampling_rate=fps, only_inbound=only_inbound, 
                                       cut_once_crossing_x=0.20, keep_first_n_samples=600)

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................