Takes recorded list of json files and turn them into a dataframe

TO DO
* try dictionary instead of dataframe, since adding rows to a dataframe is not very efficient

In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time as TIME, sys
from IPython.display import clear_output
import bisect

In [None]:
#Progess bar function
def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))

    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

In [None]:
# Read Sensorfloor data
sm = 'test_data_rb1_no_mag' # Sensorfloor raw file
vm = 'vicon_data_rb1_no_mag' # Vicon data
measurements = []
for line in open(sm +'.txt', 'r'):
    measurements.append(json.loads(line))
# Read vicon data
vicon = []
for line in open(vm + '.txt', 'r'):
    vicon.append(json.loads(line))

In [None]:
# Put data into a dataframe
df_data = pd.DataFrame(measurements)
df_vicon = pd.DataFrame(vicon)

df_data['timestamp'] = pd.to_datetime(df_data['timestamp'], unit = 's')
df_vicon['timestamp'] = pd.to_datetime(df_vicon['time'], unit = 's')
del df_vicon['time']

# sort values by timestamp
df_data = df_data.sort_values(by = 'timestamp',axis=0, kind ='mergesort')
df_data = df_data.reset_index(drop = True)
df_vicon = df_vicon.sort_values(by = 'timestamp',axis=0, kind ='mergesort')
df_vicon = df_vicon.reset_index(drop = True)

In [None]:
def set_strip_id(id):
    '''Returns strip_id'''
    switcher = {
        "b8:27:eb:41:99:a0" : 1,
        "b8:27:eb:c0:fd:6a" : 2,
        "b8:27:eb:18:92:c7" : 3,
        "b8:27:eb:53:f2:33" : 4,
        "b8:27:eb:e7:6f:dc" : 5,
        "b8:27:eb:38:4b:07" : 6,
        "b8:27:eb:1b:cf:26" : 7,
        "b8:27:eb:6d:0e:53" : 8,
        "b8:27:eb:b7:a3:b7" : 9,
        "b8:27:eb:be:dc:32" : 10,
        "b8:27:eb:ff:a4:48" : 11,
        "b8:27:eb:a9:7d:4d" : 12,
        "b8:27:eb:c4:f8:c7" : 13,
        "b8:27:eb:e4:43:6d" : 14,
        "b8:27:eb:98:69:6e" : 15,
        "b8:27:eb:75:c7:a2" : 16,
        "b8:27:eb:09:3d:77" : 17,
        "b8:27:eb:05:d8:4d" : 18,
        "b8:27:eb:36:da:22" : 19,
        "b8:27:eb:f5:5d:04" : 20,
        "b8:27:eb:88:8d:56" : 21,
        "b8:27:eb:00:be:93" : 22,
        "b8:27:eb:c0:10:ae" : 23,
        }
    return switcher.get(id)

In [None]:
df_data['strip_id'] = df_data['strip_id'].apply(lambda x : set_strip_id(x))
df_data['node_id'] = df_data['node_id'].astype('int')

In [None]:
# Get list of unique node_ids and strip_ids
nodes = df_data['node_id'].unique()
strips = df_data['strip_id'].unique()

In [None]:
df_start_times = pd.DataFrame(columns =['node_id','strip_id','starttime', 'row_index'])
for n in nodes:
    for s in strips:
        try:
            #get first recording time for each node
            tmin = df_data[(df_data['node_id'] == n) & (df_data['strip_id'] == s)]['timestamp'].min()
            imin = df_data[(df_data['node_id'] == n) & (df_data['strip_id'] == s)]['timestamp'].idxmin()
            df_start_times = df_start_times.append(pd.Series([n, s, tmin, imin], index=df_start_times.columns ), ignore_index=True)
        except ValueError:
            print('No data found for')
            # there is no data
            print('node_id: ' + str(n) + ' , strip_id: ' + str(s))

In [None]:
#add column for the previous timestamp, since the recordings are (roughly)
# linear spaced between the current and the previous timestamp
df_data['previous_timestamp'] = np.NaN
for n in nodes:
    for s in strips:
        df_tmp = df_data[(df_data['node_id'] == n) & (df_data['strip_id'] == s)]['timestamp']
        for i in range(1,len(df_tmp)):
            df_data.loc[df_tmp.index[i], 'previous_timestamp'] = pd.to_datetime(df_tmp[df_tmp.index[i-1]])

In [None]:
df_data = df_data.dropna()
df_data = df_data.reset_index(drop = True)

In [None]:
col = ['node_id', 'strip_id', 'timestamp','ax','ay', 'az','gx','gy','gz','mx','my','mz']
Data = pd.DataFrame(columns = col)

In [None]:
# Core of this file
# adds for each list of datareadings a new row

number_of_elements = len(df_data['data'])

for i in range(len(df_data['data'])):
    for j in range(len(df_data['data'][i])):
        delta_time = (df_data.iloc[i]['timestamp'] - df_data.iloc[i]['previous_timestamp'])/len(df_data['data'][i])
        time_interpolated = df_data.iloc[i]['previous_timestamp'] + j*delta_time
        n = df_data.iloc[i]['node_id']
        s = df_data.iloc[i]['strip_id']
        ax = df_data['data'][i][j]['a'][0]
        ay = df_data['data'][i][j]['a'][1]
        az = df_data['data'][i][j]['a'][2]
        gx = df_data['data'][i][j]['g'][0]
        gy = df_data['data'][i][j]['g'][1]
        gz = df_data['data'][i][j]['g'][2]
        mx = df_data['data'][i][j]['m'][0]
        my = df_data['data'][i][j]['m'][1]
        mz = df_data['data'][i][j]['m'][2]
        #Data = Data.append(pd.Series([n, s, time_interpolated, ax,ay,az,gx,gy,gz,mx,my,mz], index=Data.columns ), ignore_index=True)
        Data = pd.concat([Data,pd.DataFrame([[n, s, time_interpolated, ax,ay,az,gx,gy,gz,mx,my,mz]], columns = col)])
        update_progress(i / number_of_elements)
update_progress(1)

In [None]:
Data = Data.sort_values(by = 'timestamp')
Data = Data.reset_index(drop = True)

In [None]:
# Now add vicon position
df_vicon['posX'] = np.NaN
df_vicon['posY'] = np.NaN
df_vicon['posZ'] = np.NaN
for i in range(len(df_vicon)):
    df_vicon.loc[i,'posX'] = df_vicon['translation'][i][0]
    df_vicon.loc[i,'posY'] = df_vicon['translation'][i][1]
    df_vicon.loc[i,'posZ'] = df_vicon['translation'][i][2]

Data['posX'] = np.NaN
Data['posY'] = np.NaN
Data['posZ'] = np.NaN
for i in range(len(Data)):
    try:
        #find previous timestamp in vicon data, add corresponding position to data
        j =  bisect.bisect_right(df_vicon['timestamp'],Data['timestamp'][i])
        Data.loc[i,'posX'] = df_vicon['posX'][j]
        Data.loc[i,'posY'] = df_vicon['posY'][j]
        Data.loc[i,'posZ'] = df_vicon['posZ'][j]
        update_progress(i / len(Data))
    except Exception as exception:
        print('DebugInfo: An error occured!')
        print('Errorname: ' + str(exception))
        print('Most likely the vicon data is too short.')
update_progress(1)

In [None]:
# Save Dataframe to csv
Data.to_csv(sm + '_processed'+'.csv')