In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pathlib import Path

from scipy.signal import stft

from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten

BASEDIR = Path("/store/projects/fallki/")
FAKEDIR = BASEDIR / "Fake-Events" / "2. Fake-Events Messung"

# Data preparation
In this notebook we try to train a deep neural network to seperate Dummy falls from the other fall events by training it on the stft picture we generate for all of them.

In [36]:
def SignalToSpectrum(signal, samplerate=1600):
    SIGNAL_LENGTH = samplerate * 10
    F = stft(signal[:SIGNAL_LENGTH], 1600, nperseg=128)[2]
    F = F[2:,]
    return np.expand_dims(np.abs(F), axis=2) # Add extra dimension for TF Conv2D

In [37]:
fakeEvents = pd.read_parquet(FAKEDIR / "gSensoren_FakeEvents.parquet")

In [38]:
L = []
for (teensy, event, run, rep), ss in fakeEvents.groupby(["teensy", "experiment", "run", "rep"]):
    L.append({"event": event,
              "run": run,
              "istSturz": int(event == "Dummy"),
              "signalLenght": len(ss["z"]),
              "spectrum": SignalToSpectrum(ss["z"])})
df = pd.DataFrame.from_dict(L)

In [39]:
df.head()

Unnamed: 0,event,run,istSturz,signalLenght,spectrum
0,1.5L Flasche,L1,0,15976,"[[[220.68784142956994], [0.11347713586453584],..."
1,1.5L Flasche,L1,0,16038,"[[[220.7760221941459], [0.4810628935081976], [..."
2,1.5L Flasche,L1,0,15997,"[[[220.56718777220033], [0.12610317723679612],..."
3,1.5L Flasche,L2,0,15994,"[[[220.36006931821052], [0.1674748618352673], ..."
4,1.5L Flasche,L2,0,15981,"[[[220.2963925540494], [0.10592890921923297], ..."


In [40]:
# normalize the spectrum to facilitate training
df["normalizedSpectrum"] = df["spectrum"] / df["spectrum"].map(np.max).max()

# Model 

In [62]:
mdl = Sequential()
mdl.add(Conv2D(8, (63, 5), activation="relu", input_shape=(63, 251, 1)))
mdl.add(MaxPooling2D((1, 4)))
#mdl.add(Dense(1))
mdl.add(Flatten())
mdl.add(Dense(1))
mdl.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
mdl.summary()


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 1, 247, 8)         2528      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 1, 61, 8)         0         
 2D)                                                             
                                                                 
 flatten_4 (Flatten)         (None, 488)               0         
                                                                 
 dense_4 (Dense)             (None, 1)                 489       
                                                                 
Total params: 3,017
Trainable params: 3,017
Non-trainable params: 0
_________________________________________________________________


In [63]:
X = np.stack(df["normalizedSpectrum"].to_list())
y = df["istSturz"].to_numpy()

# split data in train and test set
train, test = train_test_split(range(len(y)), test_size=0.2)
# how many of the respective events are contained in the test set
df["event"][test].value_counts()

Springen               20
2 Bücher               19
Stuhl kippt            18
Handy                  16
Getränkeflasche        16
1.5L Flasche           16
Möbel verrücken        15
Schlüsselbund          14
Krücken                14
Hantel                 13
Tablett mit Besteck    13
Wasserkasten           12
Dummy                  11
Koffer                 11
Pflegeartikel           8
Name: event, dtype: int64

In [64]:
# actual training..
history = mdl.fit(X[train, ], y[train], epochs=75, validation_data=(X[test,], y[test]))

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


In [65]:
df.istSturz.sum()

72

## Problems
We do not have a large amount of training data, in total we have 1079 spectrum with only 72 positive samples.
Even though we already went for a rather small model (3017 parameters) it is keen to overfit, thus one needs to be really careful during fitting the model. 

Ideally we would need more data, this data is currently recorded inside rooms of a retirement home, so far we (luckily) did not record any real fall events but this does not make training easier. Thus we need to augment our training data with different methods.

In [89]:
# here we evaluate our trained model on all of the data, by adjusting the threshold  value we can manually test how clear the separation between positive and negative events really is
# in the current state the model is heavily overfitted
ypred = mdl.predict(X).ravel()
threshold = 0.15
pd.crosstab(df['event'], ypred >= threshold)



col_0,False,True
event,Unnamed: 1_level_1,Unnamed: 2_level_1
1.5L Flasche,61,11
2 Bücher,70,2
Dummy,0,72
Getränkeflasche,71,1
Handy,72,0
Hantel,71,0
Koffer,72,0
Krücken,72,0
Möbel verrücken,71,1
Pflegeartikel,72,0
