In [210]:
# generic imports
import scipy.io
import pandas as pd
import numpy as np
import math

In [None]:
# create the dataset from matlab file
mat = scipy.io.loadmat('dataset/ARS_DLR_DataSet.mat', squeeze_me=True, struct_as_record=True)

Lets explore the dataset

In [243]:
print('Number of keys of the dict element', len(mat.keys()))
print()
print('Keys:')
print(mat.keys())

Number of keys of the dict element 59

Keys:
dict_keys(['__header__', '__version__', '__globals__', 'ARS_Maria_Real_Sitting_Heading_2', 'ARS_Maria_Real_Sitting_Heading_3', 'ARS_Maria_Real_Sitting_Heading_4', 'ARS_Maria_FLGUp1', 'ARS_Maria_FLGUp2', 'ARS_Maria_FLGUp3', 'ARS_Maria_FLGUp4', 'ARS_Maria_Jump', 'ARS_Cristina_Test_4_Sensor_Left', 'ARS_Cristina_Test_5_Sensor_Left', 'ARS_Cristina_Test_2_Sensor_Left', 'ARS_Cristina_Test_3_Sensor_Left', 'ARS_Cristina_Test_1_Sensor_Left', 'ARS_Elena_Test_1_Sensor_Right', 'ARS_Elena_Test_2_Sensor_Right', 'ARS_Elena_Test_3_Sensor_Right', 'ARS_Elena_Test_4_Sensor_Right', 'ARS_Elena_Test_5_Sensor_Right', 'ARS_Elena_Walking', 'ARS_Fabian_Test_1_Sensor_Right', 'ARS_Fabian_Test_2_Sensor_Right', 'ARS_Fabian_Test_3_Sensor_Right', 'ARS_Fabian_Test_4_Sensor_Right', 'ARS_Fabian_Test_5_Sensor_Right', 'ARS_Jesus_Test_1_Sensor_Right', 'ARS_Jesus_Test_2_Sensor_Right', 'ARS_Jesus_Test_3_Inverse_Sensor_Right', 'ARS_Jesus_Test_4_Sensor_Right', 'ARS_Jesus_Test_5_Senso

Lets see what a single entry is composed of

In [273]:
example_entry = mat[list(mat.keys())[3]]
example_entry

array([ array([[  4.53400000e+01,  -9.43000200e+00,   1.97339400e+00, ...,
          4.93577200e-01,  -1.54302400e-01,   3.48073800e-01],
       [  4.53500000e+01,  -9.42630400e+00,   1.96610800e+00, ...,
          4.94766900e-01,  -1.55703100e-01,   3.46810900e-01],
       [  4.53600000e+01,  -9.43360500e+00,   1.96788500e+00, ...,
          4.93788400e-01,  -1.54264800e-01,   3.47441300e-01],
       ..., 
       [  5.26090000e+02,  -8.37665400e+00,   4.48864500e+00, ...,
          4.22742800e-01,  -2.76599000e-01,   4.07902500e-01],
       [  5.26100000e+02,  -8.40929300e+00,   4.50478200e+00, ...,
          4.21581900e-01,  -2.75860100e-01,   4.08134000e-01],
       [  5.26110000e+02,  -8.39455000e+00,   4.49753500e+00, ...,
          4.22797100e-01,  -2.75629300e-01,   4.08713200e-01]]),
       array([[  4.53400000e+01,  -2.38955600e-01,   9.77489600e-02, ...,
          8.65125200e-01,   4.73254700e-01,  -1.66097400e-01],
       [  4.53500000e+01,  -2.38969200e-01,   9.78752400e-02

And a single frame recorded by the IMU sensor is like

In [274]:
example_entry[0][0]

array([  4.53400000e+01,  -9.43000200e+00,   1.97339400e+00,
        -1.57358700e+00,  -5.30050200e-02,   4.51815800e-02,
         3.52342500e-02,   4.93577200e-01,  -1.54302400e-01,
         3.48073800e-01])

And the corresponding attitude

In [275]:
example_entry[1][0]

array([ 45.34      ,  -0.2389556 ,   0.09774896,  -0.966098  ,
        -0.4409745 ,   0.8754856 ,   0.1976519 ,   0.8651252 ,
         0.4732547 ,  -0.1660974 ])

According to the dataset documentation, this is what each column represent:

**`example_entry[0]`**

* 1st column is the time extracted from the sensor in seconds. For example: a value 15.6 means that it has passed 15.6 seconds since the sensor started to transmit data.


* 2nd column is the acceleration in the X axis measured by the sensor.
* 3rd column is the acceleration in the Y axis measured by the sensor.
* 4th column is the acceleration in the Z axis measured by the sensor.


* 5th column is the angular velocity in the X axis measured by the sensor.
* 6th column is the angular velocity in the Y axis measured by the sensor.
* 7th column is the angular velocity in the Z axis measured by the sensor.


* 8th column is the magnetic field in the X axis measured by the sensor.
* 9th column is the magnetic field in the Y axis measured by the sensor.
* 10th column is the magnetic field in the Z axis measured by the sensor.

**`example_entry[1]`**

Contains a matrix of double data with the direction cosine matrix extracted from the sensor.

In [276]:
print(example_entry[2])
print(example_entry[3])

['STNDING' 'SITTING' 'STNDING']
[    1  7743  8388 45040 45929 48078]


**`example_entry[2]`**

It contains a cell array where every position is a string. Every string is the identifier of the activity. The possible values of this strings are:
* 'RUNNING' = "running"
* 'WALKING' = "walking"
* 'WALKUPS' = "walking upstairs"
* 'WALKDWS' = "walking downstairs"
* 'STNDING' = "standing"
* 'SITTING' = "sitting"
* 'XLYINGX' = "lying on the floor"
* 'FALLING' = "falling"
* 'JUMPVRT' = "jumping vertically"
* 'JUMPFWD' = "jumping forward"
* 'JUMPBCK' = "jumping backward"
* 'TRANSIT' = "transition between the activities"

As an example, if the cell contains three strings as: "STNDING RUNNING STNDING" means that the person performed these three activities in this 	order.

**`example_entry[3]`**

It is a vector with the index of the rows that indicate the beginning and ending of an activity. The format is the following:

```
	t1_0 t1_f t2_0 t2_f t3_0 t3_f t4_0 t4_f ... tn_0 tn_f

	ti_0 for i=1...n contains the beginning of the activity i.
	ti_f for i=1...n contains the ending of the activity i.
```

Notice that the length of this vector should be twice the length of the cell containing the strings with the activities.

## From matlab to pandas
Lets convert it to a dataframe for later processing

In [302]:
keys = [k for k in mat.keys() if k[0] != '_']
data = {
    'person': [],
    'acc_x': [],
    'acc_y': [],
    'acc_z': [],
    'gyr_x': [],
    'gyr_y': [],
    'gyr_z': [],
    'attitude': [],
    'labels': []
}

for j, k in enumerate(keys):
    record = mat[k]
    sensor = record[0]
    attitude = record[1]
    # cycle through the labels
    labels = record[2]
    frame_ranges = record[3]
    '''
    print('### DEBUG - {} ###'.format(k))
    print(labels)
    print(frame_ranges)
    '''
    for i in range(0, len(frame_ranges), 2):
        # create current label
        label_index = math.floor(i/2)
        label = labels[label_index]
        # +1 because extremes are included
        frame_length = frame_ranges[i+1] - frame_ranges[i] +1
        data['labels'] += [label] * frame_length
        
        # manually create labels for transitions
        # if subsequent frame_range is not contiguous, create transition
        if (i+2 < len(frame_ranges)) and (frame_ranges[i+2] != frame_ranges[i+1]+1):
            frame_length = frame_ranges[i+2] - frame_ranges[i+1] -1
            # labeling transitions with current and subsequent actions
            current_action, next_action = (labels[label_index], labels[label_index+1])
            label = 'TRANS-{}-{}'.format(current_action, next_action)
            data['labels'] += [label] * frame_length
        
    print(attitude[:,1:4].shape)
    print(sensor[:,1:4].shape)
    att = np.matmul(attitude[:,1:4].T, sensor[:,1:4])
    print('shape is', att.shape)
    #att_x = attitude[:,1:4].dot(sensor[:,1:4].T)
    
    
    # sensor[:, 0] indicates time which is not useful, columns 7: are magnetometer, not useful
    # using list instead of numpy concatenation is more efficient
    data['acc_x'] += sensor[:, 1].tolist()
    data['acc_y'] += sensor[:, 2].tolist()
    data['acc_z'] += sensor[:, 3].tolist()
    data['gyr_x'] += sensor[:, 4].tolist()
    data['gyr_y'] += sensor[:, 5].tolist()
    data['gyr_z'] += sensor[:, 6].tolist()
    
    data['attitude'] += attitude[:, 1:].tolist()
    
    data['person'] += [k] * len(sensor[:, 1])
    break
    
df = pd.DataFrame(data = data)
df.head()

(48078, 3)
(48078, 3)
shape is (3, 3)


Unnamed: 0,acc_x,acc_y,acc_z,attitude,gyr_x,gyr_y,gyr_z,labels,person
0,-9.430002,1.973394,-1.573587,"[-0.2389556, 0.09774896, -0.966098, -0.4409745...",-0.053005,0.045182,0.035234,STNDING,ARS_Maria_Real_Sitting_Heading_2
1,-9.426304,1.966108,-1.573818,"[-0.2389692, 0.09787524, -0.9660817, -0.440980...",-0.055009,0.05224,0.061471,STNDING,ARS_Maria_Real_Sitting_Heading_2
2,-9.433605,1.967885,-1.577345,"[-0.2387688, 0.09793719, -0.966125, -0.4408015...",-0.03478,0.03052,0.05083,STNDING,ARS_Maria_Real_Sitting_Heading_2
3,-9.433682,1.956968,-1.603091,"[-0.2389078, 0.09797277, -0.966087, -0.4408588...",-0.060378,0.067733,0.059497,STNDING,ARS_Maria_Real_Sitting_Heading_2
4,-9.437164,1.944125,-1.599782,"[-0.2390312, 0.0980841, -0.9660453, -0.4409559...",-0.074121,0.052016,0.067737,STNDING,ARS_Maria_Real_Sitting_Heading_2


In [303]:
# compute the sensor measures in the global frame
def to_global_frame(row):
    attitude = np.array(row['attitude']).reshape(3,3)
    acceleration = np.array([row.acc_x, row.acc_y, row.acc_z])
    gyro = np.array([row.gyr_x, row.gyr_y, row.gyr_z])
    
    # global frame conversions
    acc_gf = np.dot(attitude.T, acceleration)
    gyro_gf = np.dot(attitude.T, gyro)
    
    row['acc_x_gf'] = acc_gf[0]
    row['acc_y_gf'] = acc_gf[1]
    row['acc_z_gf'] = acc_gf[2]
    
    row['gyr_x_gf'] = gyro_gf[0]
    row['gyr_y_gf'] = gyro_gf[1]
    row['gyr_z_gf'] = gyro_gf[2]
    
    return row

df = df.apply(to_global_frame, axis=1)
df = df.drop('attitude', axis=1)
df.head()

ValueError: labels ['attitude'] not contained in axis

Unnamed: 0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,labels,person,acc_x_gf,acc_y_gf,acc_z_gf,gyr_x_gf,gyr_y_gf,gyr_z_gf
0,-9.430002,1.973394,-1.573587,-0.053005,0.045182,0.035234,STNDING,ARS_Maria_Real_Sitting_Heading_2,0.021786,0.061198,9.76172,0.023224,0.051049,0.054286
1,-9.426304,1.966108,-1.573818,-0.055009,0.05224,0.061471,STNDING,ARS_Maria_Real_Sitting_Heading_2,0.024042,0.053772,9.756751,0.043289,0.069443,0.053268
2,-9.433605,1.967885,-1.577345,-0.03478,0.03052,0.05083,STNDING,ARS_Maria_Real_Sitting_Heading_2,0.020183,0.052906,9.764746,0.038832,0.047362,0.031204
3,-9.433682,1.956968,-1.603091,-0.060378,0.067733,0.059497,STNDING,ARS_Maria_Real_Sitting_Heading_2,0.004043,0.03057,9.766934,0.036041,0.081537,0.061853
4,-9.437164,1.944125,-1.599782,-0.074121,0.052016,0.067737,STNDING,ARS_Maria_Real_Sitting_Heading_2,0.01451,0.019096,9.767211,0.053381,0.070326,0.070659


In [296]:
df

0    None
1    None
2    None
3    None
4    None
dtype: object

In [278]:
a = np.arange(16).reshape(4,4)
print(a)
b = a.tolist()
np.array(b).T

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


array([[ 0,  4,  8, 12],
       [ 1,  5,  9, 13],
       [ 2,  6, 10, 14],
       [ 3,  7, 11, 15]])

In [242]:
start = 87636
df.loc[df.person == 'ARS_Maria_FLGUp1', ['labels', 'person']].iloc[6869:6873]

Unnamed: 0,labels,person
94505,XLYINGX,ARS_Maria_FLGUp1
94506,XLYINGX,ARS_Maria_FLGUp1
94507,TRANS-XLYINGX-STNDING,ARS_Maria_FLGUp1
94508,TRANS-XLYINGX-STNDING,ARS_Maria_FLGUp1


In [198]:
df['labels'].iloc[45929:].unique()

array(['STNDING'], dtype=object)

In [93]:
'ciao' * 3

'ciaociaociao'