# Generating tree diagrams of columns 

We need to understand the contents of each recording, due to the number of columns in each sheet exported by XSENS MVN, it is best to represent this as a tree. 

In [12]:
import pandas as pd 
from pathlib import Path 
from tqdm import tqdm
from multiprocessing import Pool

In [13]:
example_file = Path("PD046-OFF_Falling_GDPURS.xlsx") # Loaded in the example file 

# pandas requires the sheet name as an argument, otherwise it will load the first one by default. I have gone through the sheet and copied the names to this array. 
sheet_names = ["General Information", "Segment Orientation - Quat", "Segment Orientation - Euler", "Segment Position", "Segment Velocity", "Segment Acceleration", "Segment Angular Velocity", "Segment Angular Acceleration", "Joint Angles ZXY", "Joint Angles XZY", "Ergonomic Joint Angles ZXY", "Ergonomic Joint Angles XZY", "Center of Mass", "Sensor Free Acceleration", "Sensor Magnetic Field", "Sensor Orientation - Quat", "Sensor Orientation - Euler"]

In [21]:
sheet_name = sheet_names[2]

data_df = pd.read_excel(example_file, sheet_name=sheet_name)

In [39]:
# This function may not be handling some of the sheets correctly (Joint Angles)

def columns2md(file_path,sheet_name):

    data_df = pd.read_excel(file_path, sheet_name=sheet_name)

    split_column_names = pd.DataFrame.from_records((x.split(" ") for x in data_df.columns))

    def add_to_dict(x, dictionary):
        if len(x) == 1:
            if x[0] not in dictionary.keys():
                dictionary[x[0]] = []    
        elif len(x) > 2:
            if type(dictionary) == dict:
                if x[0] not in dictionary.keys():
                    dictionary[x[0]] = {}
                add_to_dict(x[1:], dictionary[x[0]])
        else:
            if type(dictionary) == dict:
                if x[0] not in dictionary.keys():
                    dictionary[x[0]] = []

                if type(dictionary[x[0]]) == list:
                    dictionary[x[0]].append(x[1])
            
    column_tree = {}

    split_column_names.T.apply(lambda x: add_to_dict(x.dropna().values, column_tree))

    def write_dict(dictionary, prefix, file):
        for name, item in dictionary.items():
            file.write(prefix + "- " + name + "\n")
            if type(item) == dict:
                write_dict(item, prefix + "\t", file)
            elif len(item) > 0:
                file.write(prefix + "\t" + "- " + ", ".join(item) + "\n")

    with open(sheet_name + ".md", 'w') as f:
        f.write("# " + sheet_name + "\n")
        write_dict(column_tree, "", f)

In [40]:
values = ((example_file, x) for x in sheet_names)

with Pool() as pool:
        pool.starmap(columns2md, values)