In [404]:
import pandas as pd
pd.options.mode.chained_assignment = None

Input data is read from the SEMS files. The file names are identified by a column in an Excel sheet,
as well as the associated SEMS score. Each file has the readings of one child.

In [405]:
input_data = pd.read_excel(r'SEMS_Werte.xlsx').dropna(subset=['SEMS'])
children = input_data.iloc[:, 0].tolist()
input_data.head()

Unnamed: 0.1,Unnamed: 0,C (Standardformen),SEMS Text,Alter,Geschlecht,SEMS
0,KL22,,x,9,w,0.0
1,KL15,,x,9,m,1.0
2,KL15_2,,x,9,m,1.0
3,KL14,,x,8,w,1.0
4,KB5,,x,8,w,1.0


Interation over the list of children obtained from the Excel sheet. Each file in the folder is read, extracting the relevant data in the process. Additional information (SEMS, age, sex) is read from the Excel file. The additional information is attached to the pandas dataframe of measurements.




In [406]:
import itertools
import glob

data = []

for index, child in enumerate(children):
    files = glob.glob('data/' + child + '*SEMS*')
    for file in files:
        raw_data = pd.read_csv(file)
        selected_columns = raw_data.iloc[:, [1, 4, 7, 8, 9, 10, 11, 12, 13, 14]]
        sems = int(input_data.loc[index]['SEMS'])
        age = input_data.loc[index]['Alter']
        if input_data.loc[index]['Geschlecht'] == 'w':
            sex = 1
        else:
            sex = 0
        additional_information = {'sems': sems, 'age': age, 'sex': sex}
        for information, value in additional_information.items():
            i = list(itertools.repeat(value, len(selected_columns)))
            selected_columns[information] = i
        data.append(selected_columns)
    full_data = pd.concat(data)
full_data.head()

Unnamed: 0,tipPressure,fingerPressure,angle,writtingSpeed,accX,accY,accZ,gyroX,gyroY,gyroZ,sems,age,sex
0,0,5,51,16,-0.820742,-0.148008,-0.466416,-4.941526,-2.814809,8.887481,0,9,1
1,0,7,51,18,-0.826357,-0.216611,-0.55333,-8.468245,-6.921679,-3.249924,0,9,1
2,0,8,51,11,-0.812441,-0.277402,-0.554307,-5.063664,-3.776641,3.024885,0,9,1
3,0,9,52,6,-0.826602,-0.227354,-0.561387,1.058473,4.101222,-1.784275,0,9,1
4,0,9,52,5,-0.83124,-0.24127,-0.543809,3.119542,1.704275,0.597405,0,9,1


In [407]:
print("Length of entire data frame:", len(full_data), "rows.")
print("Shape:", full_data.shape)

Length of entire data frame: 118294 rows.
Shape: (118294, 13)


In [408]:
train_split = 0.8
test_split = 0.15
validation_split = 0.5

In [409]:
import numpy as np

df_sample = full_data.sample(frac=1, random_state=12)
splits = [int(train_split * len(full_data)), int((1 - validation_split - test_split) * len(full_data))]
train_ds, val_ds, test_ds = np.split(df_sample, splits)
print("Training Set: {}, Test Set: {}, Validation Set {}".format(len(train_ds), len(test_ds), len(val_ds)))
print("Data type: {}".format(type(train_ds)))
train_ds.head()

Training Set: 94635, Test Set: 76892, Validation Set 0
Data type: <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,tipPressure,fingerPressure,angle,writtingSpeed,accX,accY,accZ,gyroX,gyroY,gyroZ,sems,age,sex
2773,2,0,53,33,-0.769961,-0.170469,-0.62584,-12.529313,-12.143053,8.963817,7,7,0
1002,28,136,47,83,-0.772168,-0.448994,-0.585479,46.126492,10.74626,-26.880762,4,9,0
2155,91,40,51,47,-0.84125,-5.9e-05,-0.546982,-14.010229,-13.593435,-19.540001,5,9,0
2760,35,19,51,64,-0.81415,0.324893,-0.903428,14.279847,-24.051451,-25.707939,7,7,0
893,0,0,41,86,-0.782412,0.210879,-0.670518,36.417252,-6.875878,-43.219391,5,9,0


The target value (label) is extracted from the training set, as well as the features. The features
stored in the array 'data_columns.'

In [410]:
target = train_ds.pop('sems')
headers = ['tipPressure', 'fingerPressure', 'angle', 'writtingSpeed', 'accX', 'accY', 'accZ', 'gyroX', 'gyroY', 'gyroZ']
data_columns = train_ds[headers]
print("Data type: {}".format(type(train_ds)))
data_columns.head()

Data type: <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,tipPressure,fingerPressure,angle,writtingSpeed,accX,accY,accZ,gyroX,gyroY,gyroZ
2773,2,0,53,33,-0.769961,-0.170469,-0.62584,-12.529313,-12.143053,8.963817
1002,28,136,47,83,-0.772168,-0.448994,-0.585479,46.126492,10.74626,-26.880762
2155,91,40,51,47,-0.84125,-5.9e-05,-0.546982,-14.010229,-13.593435,-19.540001
2760,35,19,51,64,-0.81415,0.324893,-0.903428,14.279847,-24.051451,-25.707939
893,0,0,41,86,-0.782412,0.210879,-0.670518,36.417252,-6.875878,-43.219391


Combining features and labels in one Tensorflow dataset

In [413]:
import tensorflow as tf
n = tf.convert_to_tensor(data_columns)
n_shape = tf.shape(n)
batch_size = 25
print("Data type: {}, Shape: {}".format(type(n), n_shape))

  s = s[:max_items]
  for item in s.iteritems():




  s = s[:max_items]
  for item in s.iteritems():


In [412]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(n)

In [413]:
from tensorflow.python.keras.layers import Reshape, LSTM, Dense
from tensorflow.python.keras import Input
from keras import Sequential
import keras

input_shape = (94635, 10)

model = Sequential()
print(tf.shape(n))
model.add(LSTM(units=50, input_shape=(25, 94635, 10)))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(loss='MeanSquaredError', optimizer='adam', metrics=['accuracy'])
model.fit(n, target, batch_size=25)

tf.Tensor([94635    10], shape=(2,), dtype=int32)


ValueError: in user code:

    File "/Users/benjaminvogels/PycharmProjects/sensogrip/venv/lib/python3.9/site-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/Users/benjaminvogels/PycharmProjects/sensogrip/venv/lib/python3.9/site-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/benjaminvogels/PycharmProjects/sensogrip/venv/lib/python3.9/site-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/Users/benjaminvogels/PycharmProjects/sensogrip/venv/lib/python3.9/site-packages/keras/engine/training.py", line 1023, in train_step
        y_pred = self(x, training=True)
    File "/Users/benjaminvogels/PycharmProjects/sensogrip/venv/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None

    ValueError: Exception encountered when calling layer 'module_wrapper_76' (type ModuleWrapper).
    
    Input 0 of layer lstm_33 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 10)
    
    Call arguments received by layer 'module_wrapper_76' (type ModuleWrapper):
      • args=('tf.Tensor(shape=(None, 10), dtype=float32)',)
      • kwargs={'training': 'True'}
