In [1]:
import numpy as np
import os
import fnmatch
from sys import stdout

In [2]:
'''
* Parses data from the inv_damp controller.
* The different features are returned separately in dictionary form {dataset_id: data}.
*
@param monster: is the concatenation of all datasets and all features and can be used for
fitting scaler.
'''

def create_datasets(filepath, verbose = 0):
    """
    The final dataset will contain all the sub-datasets collected. Given the
    directory they are in, all txt files (forces and positions for now) will
    be located and executed.
    """
    # Get a list of the various txt files
    listOfPositionFiles = [] # actual cartesian position
    listOfForceFiles = [] # sensor forces
    listOfInputFiles = [] # f_ref "forces"

    monster_positions = np.empty((1,3))
    monster_forces = np.empty((1,3))
    monster_inputs = np.empty((1,3))

    force_data = {}
    position_data = {}
    input_data = {}

    for (dirpath, dirnames, filenames) in os.walk(filepath):
        dirnames.sort()

        for txtname in fnmatch.filter(filenames, 'positions.txt'):
            listOfPositionFiles.append(os.path.join(dirpath, txtname))

        for txtname in fnmatch.filter(filenames, 'forces.txt'):
            listOfForceFiles.append(os.path.join(dirpath, txtname))

        for txtname in fnmatch.filter(filenames, 'f_ref.txt'):
            listOfInputFiles.append(os.path.join(dirpath, txtname))

    
    print("Retrieved %d position files, %d force files and %d input files from base directory '%s' "
                     %(len(listOfPositionFiles), len(listOfForceFiles), len(listOfInputFiles), filepath))
    if verbose:
        for i in range(len(listOfPositionFiles)):
            print("%d %s  %s %s \n" %(i, listOfPositionFiles[i], listOfForceFiles[i], listOfInputFiles[i]))

    # Read the appropriate columns and parse them into np arrays
    for file_idx, filename in enumerate(listOfForceFiles):
        time, seq, f_x, f_y, f_z, t_x, t_y, t_z = np.loadtxt(filename, usecols = (0, 1, 4, 5, 6, 7, 8, 9), skiprows = 1, delimiter = ',', unpack = True)
        f_x = f_x.reshape(-1,1)
        f_y = f_y.reshape(-1,1)
        f_z = f_z.reshape(-1,1)
        forces = np.concatenate((f_x, f_y, f_z), axis = 1)
        force_data.update({file_idx : forces})
        monster_forces = np.append(monster_forces, forces, axis = 0)

    monster_forces = monster_forces[1:,] # Remove the first "trash" line that was created with np.empty

    for file_idx, filename in enumerate(listOfPositionFiles):
        time, seq, p_x, p_y, p_z = np.loadtxt(filename, usecols = (0, 1, 4, 5, 6), skiprows = 1, delimiter = ',', unpack = True)
        p_x = p_x.reshape(-1,1)
        p_y = p_y.reshape(-1,1)
        p_z = p_z.reshape(-1,1)
        positions = np.concatenate((p_x, p_y, p_z), axis = 1)
        position_data.update({file_idx : positions})
        monster_positions = np.append(monster_positions, positions, axis = 0)

    monster_positions = monster_positions[1:,] # remove the first "trash" line that was created with np.empty


    for file_idx, filename in enumerate(listOfInputFiles):
        time, seq, f_ref_x, f_ref_y, f_ref_z = np.loadtxt(filename, usecols = (0, 1, 4, 5, 6), skiprows = 1, delimiter = ',', unpack = True)
        f_ref_x = f_ref_x.reshape(-1,1)
        f_ref_y = f_ref_y.reshape(-1,1)
        f_ref_z = f_ref_z.reshape(-1,1)
        inputs = np.concatenate((f_ref_x, f_ref_y, f_ref_z), axis = 1)
        input_data.update({file_idx : inputs})
        monster_inputs = np.append(monster_inputs, inputs, axis = 0)

    monster_inputs = monster_inputs[1:,] # remove the first "trash" line that was created with np.empty

    stupid_files = 0
    for i in range(len(position_data)):
        if len(position_data[i]) == len(force_data[i]) and len(force_data[i]) == len(input_data[i]):
            if verbose:
                print(i, position_data[i].shape, force_data[i].shape, input_data[i].shape)
        else:
            stupid_files += 1
            print("In dataset %s there is a size mismatch. Pos : %d F_s %d F_ref %d " %(listOfPositionFiles[i], position_data[i].shape[0], force_data[i].shape[0], input_data[i].shape[0]))
    print("%d files need to be fixed" %stupid_files)

    monster = np.concatenate((monster_positions, monster_forces, monster_inputs), axis = 1)

    return position_data, force_data, input_data, monster

In [3]:
def reset_dataset(position_data, force_data, f_ref_data):
    combined_dict = {}
    for dataset_id in position_data.keys():
        combined_dict.update({dataset_id : np.concatenate((position_data[dataset_id], force_data[dataset_id], f_ref_data[dataset_id]), axis = 1)})
    return combined_dict

In [4]:
"""
ATTENTION SUMMONER! Don't forget to change the path to the data!
"""

path = "Envs/bananaenv/scripts/inv_damp_data/25_04"
position_data, force_data, input_data, monster = create_datasets(path, verbose = 1)


Retrieved 40 position files, 40 force files and 40 input files from base directory 'Envs/bananaenv/scripts/inv_damp_data/25_04' 
0 Envs/bananaenv/scripts/inv_damp_data/25_04/1/positions.txt  Envs/bananaenv/scripts/inv_damp_data/25_04/1/forces.txt Envs/bananaenv/scripts/inv_damp_data/25_04/1/f_ref.txt 

1 Envs/bananaenv/scripts/inv_damp_data/25_04/10/positions.txt  Envs/bananaenv/scripts/inv_damp_data/25_04/10/forces.txt Envs/bananaenv/scripts/inv_damp_data/25_04/10/f_ref.txt 

2 Envs/bananaenv/scripts/inv_damp_data/25_04/11/positions.txt  Envs/bananaenv/scripts/inv_damp_data/25_04/11/forces.txt Envs/bananaenv/scripts/inv_damp_data/25_04/11/f_ref.txt 

3 Envs/bananaenv/scripts/inv_damp_data/25_04/12/positions.txt  Envs/bananaenv/scripts/inv_damp_data/25_04/12/forces.txt Envs/bananaenv/scripts/inv_damp_data/25_04/12/f_ref.txt 

4 Envs/bananaenv/scripts/inv_damp_data/25_04/13/positions.txt  Envs/bananaenv/scripts/inv_damp_data/25_04/13/forces.txt Envs/bananaenv/scripts/inv_damp_data/25_04

In [5]:
'''
One way of iterating through datasets is by their key-values
'''
combined_dictionary = reset_dataset(position_data, force_data, input_data)
for d_idx, dataset in combined_dictionary.items():
    print("Dataset %d has length %d"%(d_idx+1, len(dataset)))

Dataset 1 has length 5636
Dataset 2 has length 5045
Dataset 3 has length 9200
Dataset 4 has length 7886
Dataset 5 has length 3998
Dataset 6 has length 9489
Dataset 7 has length 9707
Dataset 8 has length 5825
Dataset 9 has length 8227
Dataset 10 has length 6533
Dataset 11 has length 6236
Dataset 12 has length 8193
Dataset 13 has length 7901
Dataset 14 has length 2818
Dataset 15 has length 4706
Dataset 16 has length 4997
Dataset 17 has length 5550
Dataset 18 has length 3047
Dataset 19 has length 6028
Dataset 20 has length 3719
Dataset 21 has length 3329
Dataset 22 has length 3288
Dataset 23 has length 9414
Dataset 24 has length 2584
Dataset 25 has length 4249
Dataset 26 has length 5501
Dataset 27 has length 5167
Dataset 28 has length 6389
Dataset 29 has length 2700
Dataset 30 has length 2245
Dataset 31 has length 2702
Dataset 32 has length 4266
Dataset 33 has length 5384
Dataset 34 has length 5334
Dataset 35 has length 7197
Dataset 36 has length 4185
Dataset 37 has length 4504
Dataset 38