# Clean OptiTrack Data
This script reads the from Motive exported CSV takes and removes the frames where not all 50 expected markers are labeled.
The labeled markers of coplete frames are written to .pkl files batchwise.

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import os

In [2]:
motion_capture_files_path = 'MotionCaptureFiles'
clean_data_output_path = 'CleanedData'

In [3]:
def log(s):
    with open("clean_motive_data_log.txt", "a") as myfile:
        myfile.write("[" + str(datetime.now()) + "] " + s + "\n")
    print("[" + str(datetime.now()) + "] " + s)

In [4]:
def to_float(s):
    try:
        return float(s)
    except ValueError:
        return np.nan

In [5]:
def parse(line):
    columns = line.split(",")
    columns = [to_float(i) for i in columns]
    return columns

In [6]:
batch_size = 2500

output_columns = ['time', 'label', 'x', 'y', 'z']

for filename in os.listdir(motion_capture_files_path):
    if not filename.endswith(".csv"):
        continue
    log("Loading File " + filename)
    lstLines=[]
    line_counter = 0
    skipped = 0
    out_batch = 0
    # File Indices:
    # 1 : time
    # 2 : 152 labeled marker position x y z
    labels = pd.read_csv(motion_capture_files_path + "/" + filename, skiprows=3, nrows=1, header=None)
    labels = labels.iloc[0, 2:152]
    labels.reset_index(drop=True, inplace=True)

    with open(motion_capture_files_path + "/" + filename) as fp:
        # skip 7 rows heading
        fp.readline()
        fp.readline()
        fp.readline()
        fp.readline()
        fp.readline()
        fp.readline()
        fp.readline()

        line = fp.readline()

        while line:
            dfin = parse(line)
            dfin = pd.Series(dfin)

            if line_counter % batch_size == 0:
                log("Processing Line " + str(line_counter) + " --- Skipped " + str(skipped) + " / " + str(batch_size) + " lines")
                skipped = 0
            line_counter = line_counter + 1

            if dfin.iloc[2:152].isnull().any():
                line = fp.readline()
                skipped = skipped + 1
                continue

            time = dfin.iloc[1]

            # add labeled markers
            for m in range(0, 50):
                row = pd.Series(index=output_columns)
                row['time'] = time
                row['label'] = labels[3 * m]
                row['x'] = dfin.iloc[2 + 3 * m]
                row['y'] = dfin.iloc[3 + 3 * m]
                row['z'] = dfin.iloc[4 + 3 * m]
                lstLines.append(row)

            # save the output batchwise to .pkl files
            if len(lstLines) >= 500000:
                log("Saving file " + str(out_batch) + " with " + str(len(lstLines)) + " elements")
                df = pd.DataFrame(lstLines)
                df.to_pickle('%s%s_%i.pkl' % ((clean_data_output_path + "/"), filename.replace(".csv",""), out_batch))
                out_batch = out_batch + 1
                lstLines=[]

            line = fp.readline()

    log("Saving file " + str(out_batch) + " with " + str(len(lstLines)) + " elements")
    df = pd.DataFrame(lstLines)
    df.to_pickle('%s%s_%i.pkl' % ((clean_data_output_path + "/"), filename.replace(".csv",""), out_batch))
    out_batch = out_batch + 1
    lstLines=[]

    log("Finished File " + filename)
log("Finished")

[2018-11-20 11:05:14.339027] Loading File 07.csv
[2018-11-20 11:05:20.264810] Processing Line 0 --- Skipped 0 / 2500 lines
[2018-11-20 11:09:36.055352] Processing Line 2500 --- Skipped 601 / 2500 lines
[2018-11-20 11:12:31.183413] Processing Line 5000 --- Skipped 2485 / 2500 lines
[2018-11-20 11:15:26.342140] Processing Line 7500 --- Skipped 2479 / 2500 lines
[2018-11-20 11:18:20.020190] Processing Line 10000 --- Skipped 2498 / 2500 lines
[2018-11-20 11:21:13.755864] Processing Line 12500 --- Skipped 2500 / 2500 lines
[2018-11-20 11:24:10.738014] Processing Line 15000 --- Skipped 2477 / 2500 lines
[2018-11-20 11:27:45.678813] Processing Line 17500 --- Skipped 2461 / 2500 lines
[2018-11-20 11:30:43.791039] Processing Line 20000 --- Skipped 2463 / 2500 lines
[2018-11-20 11:33:42.775408] Processing Line 22500 --- Skipped 2498 / 2500 lines
[2018-11-20 11:36:37.024262] Processing Line 25000 --- Skipped 2500 / 2500 lines
[2018-11-20 11:39:59.651365] Processing Line 27500 --- Skipped 1859 / 2

[2018-11-20 16:24:33.282321] Processing Line 245000 --- Skipped 2500 / 2500 lines
[2018-11-20 16:27:32.835598] Processing Line 247500 --- Skipped 2384 / 2500 lines
[2018-11-20 16:28:05.810303] Saving file 3 with 500000 elements
[2018-11-20 16:33:21.555828] Processing Line 250000 --- Skipped 201 / 2500 lines
[2018-11-20 16:37:42.335336] Processing Line 252500 --- Skipped 538 / 2500 lines
[2018-11-20 16:40:57.212853] Processing Line 255000 --- Skipped 2006 / 2500 lines
[2018-11-20 16:45:37.307371] Processing Line 257500 --- Skipped 61 / 2500 lines
[2018-11-20 16:50:22.817606] Processing Line 260000 --- Skipped 0 / 2500 lines
[2018-11-20 16:51:27.101354] Saving file 4 with 500000 elements
[2018-11-20 16:56:20.289113] Processing Line 262500 --- Skipped 0 / 2500 lines
[2018-11-20 17:00:42.146149] Processing Line 265000 --- Skipped 510 / 2500 lines
[2018-11-20 17:03:42.894576] Processing Line 267500 --- Skipped 2374 / 2500 lines
[2018-11-20 17:06:46.764666] Processing Line 270000 --- Skipped