In [1]:
# imports
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os

In [32]:
def load_data(parent=".data", exclude_test=[], exclude_val=[], verbose=False):
    """
    Load all data from the given parent directory. The data is expected to be in the following format:
    parent
    ├── folder1
    │   ├── t_gps.csv
    │   ├── gps_mpu_left.csv
    │   ├── gps_mpu_right.csv
    │   └── labels.csv
    ├── folder2
    etc.

    The function will load all csv files into a dictionary of dataframes. The dictionary will be in the following format:
    {
        "train": {
            "t_gps": {folder1: df, folder2: df, ...},
            "gps_mpu_left": {folder1: df, folder2: df, ...},
            "gps_mpu_right": {folder1: df, folder2: df, ...},
            "labels": {folder1: df, folder2: df, ...},
            "folders": [folder1, folder2, ...]
        },
        "val": {
            "t_gps": {folder1: df, folder2: df, ...},
            ...
        },
        "test": {
            "t_gps": {folder1: df, folder2: df, ...},
            ...
        }
        "folders": [folder1, folder2, ...]
    }

    The function will also exclude any folders that are in the exclude_test or exclude_val lists. 
    If verbose is set to True, the function will print out which files are being loaded into which data set.

    Parameters:
    parent (str): The parent directory of the data
    exclude_test (list): A list of folders to exclude from the test data
    exclude_val (list): A list of folders to exclude from the validation data
    verbose (bool): Whether or not to print out verbose information

    Returns:
    dict: A dictionary of dataframes in the format described above
    """
    # initialize data dictionary variables
    csvs = {"t_gps": None,
            "gps_mpu_left": None,
            "gps_mpu_right": None,
            "labels": None,
            "folders": None}
    folders = os.listdir(parent)
    data_dict = {"train": csvs.copy(), "val": csvs.copy(), "test": csvs.copy()}

    # set folders value
    data_dict["val"]["folders"] = exclude_val
    data_dict["test"]["folders"] = exclude_test
    data_dict["train"]["folders"] = [f for f in folders if f not in exclude_test and f not in exclude_val]

    # iterate through all folders
    for dir in folders:
        path = os.path.join(parent, dir)
        curr_csv = os.listdir(path)

        # decide which chain and which type of information
        for name in curr_csv:
            for file_type in csvs.keys():
                if file_type in name:

                    # load data
                    data = pd.read_csv(os.path.join(path, name))

                    # decide which train grouping
                    train_type = "train"
                    if name in exclude_test:
                        train_type = "test"
                    elif name in exclude_val:
                        train_type = "val"                        

                    # add to data in appropriate location
                    if data_dict[train_type][file_type] == None:
                        data_dict[train_type][file_type] = {dir: data}
                    elif dir in data_dict[train_type][file_type].keys():
                        continue
                    else:
                        data_dict[train_type][file_type][dir] = data
                    
                    # print out verbose information
                    if verbose:
                        print(f"Loaded {name} from {dir} into {train_type} data")

    return data_dict

csvs = load_data(verbose=True)

In [None]:
# recursively print the structure of csvs
def print_structure(csvs, level=0):
    """
    Recursively print the structure of the csvs dictionary

    Parameters:
    csvs (dict): The dictionary to print the structure of
    level (int): The current level of recursion

    Returns:
    None
    """
    for key in csvs.keys():
        print("\t"*level, key)
        if type(csvs[key]) == dict:
            print_structure(csvs[key], level+1)

print_structure(csvs)
print(csvs["train"]["folders"])
print(csvs["val"]["folders"])
print(csvs["test"]["folders"])