<a href="https://colab.research.google.com/github/mojtabaSefidi/Fall-Detection-System/blob/master/Fall_Detection_System_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


Import libraries and frameworks

In [2]:
from sklearn import preprocessing
import tensorflow as tf
from math import sqrt
import pandas as pd
import numpy as np
import glob
import os
window_size = 200
from sklearn.utils import class_weight
import matplotlib.pyplot as plt
import seaborn as sn
sn.set()
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

Get the sisfall and sisfall_enhanced dataset

In [3]:

# !unzip /content/gdrive/MyDrive/Datasets/SisFall_dataset.zip
# !unzip /content/gdrive/MyDrive/Datasets/SisFall_enhanced.zip

# Implementation

In [4]:
def plot_confusion_matrix(confusion_matrix):
    df = pd.DataFrame(confusion_matrix, range(len(confusion_matrix)), range(len(confusion_matrix)))
    plt.figure(figsize=(6,4))
    plt.title('Confusion Matrix')
    sn.set(font_scale=1) # for label size
    sn.heatmap(df, annot=True, annot_kws={"size": 12},fmt='.0f',cmap='Greens') # font size
    plt.show()

In [16]:
class AddressProcessor():

  def __init__(self, 
               data_path = "SisFall_dataset/"):
    self.data_path = data_path
  
  def __get_file_name(self):
    allfiles = []
    allFolders = glob.glob(self.data_path + "*")
    for files in allFolders:
        allfiles.append(glob.glob(files+"/*.txt"))
    if 'desktop.ini' in allfiles:
          allfiles.remove('desktop.ini')
    return np.hstack(allfiles)

  def split_address(self):
    dataset_address = self.get_file_name()
    np.random.shuffle(dataset_address)
    return np.split(dataset_address, [int(len(dataset_address)*0.7)])



Get all addresses

In [36]:
def get_file_name(path):
  allfiles = []
  allFolders = sorted(glob.glob(path + "*"))
  for files in allFolders:
    allfiles.append(sorted(glob.glob(files+"/*.txt")))
  if 'desktop.ini' in allfiles:
        allfiles.remove('desktop.ini')
  return np.hstack(allfiles)

In [None]:
weight = 200
duration = {'D01':100,'D02':100,'D03':100,'D04':100,'D05':25,'D06':25,'D07':12,
            'D08':12,'D09':12,'D10':12,'D11':12,'D12':12,'D13':12,'D14':12,
            'D15':12,'D16':12,'D17':25,'D18':12,'D19':12,'F01':15,'F02':15,
            'F03':15,'F04':15,'F05':15,'F06':15,'F07':15,'F08':15,'F09':15,
            'F10':15,'F11':15,'F12':15,'F13':15,'F14':15,'F15':15}

trials =  {'D01':1,'D02':1,'D03':1,'D04':1,'D05':5,'D06':5,'D07':5,
            'D08':5,'D09':5,'D10':5,'D11':5,'D12':5,'D13':5,'D14':5,
            'D15':5,'D16':5,'D17':5,'D18':5,'D19':5,'F01':5,'F02':5,
            'F03':5,'F04':5,'F05':5,'F06':5,'F07':5,'F08':5,'F09':5,
            'F10':5,'F11':5,'F12':5,'F13':5,'F14':5,'F15':5}
s = 0
for key in duration.keys():
  s += weight * duration[key] * trials[key]
s*38

19912000

read dataset from address path

In [7]:
class DatasetProcessor():
  
  def __init__(self, 
               train_dataset_saving_path=None,
               test_dataset_saving_path=None
               ):
    self.train_dataset_saving_path = train_dataset_saving_path
    self.test_dataset_saving_path = test_dataset_saving_path
  
  def read_data(self, data_path):
    data = pd.read_csv(data_path, header=None)
    data.columns = ['ADXL345_x', 'ADXL345_y', 'ADXL345_z', 'ITG3200_x', 'ITG3200_y', 'ITG3200_z', 'MMA8451Q_x',
                    'MMA8451Q_y', 'MMA8451Q_z']
    data['MMA8451Q_z'] = data['MMA8451Q_z'].map(lambda x: str(x)[:-1])
    for name in data.columns :
      data[name] = data[name].astype(float)
    return data

  def merge_features(self, data_path, flag=True):

    dataset = self.read_data(data_path)
    
    if flag:
      dataset['label'] = self.get_label(data_path)
      # print(dataset.shape)
      return dataset.to_numpy()
    
    else:
      new_dataset = pd.DataFrame()
      new_dataset['acc_1'] = dataset.apply(
          lambda row: sqrt((row.ADXL345_x ** 2 + row.ADXL345_y ** 2 + row.ADXL345_z ** 2)), axis=1)
      new_dataset['acc_2'] = dataset.apply(
          lambda row: sqrt((row.MMA8451Q_x ** 2 + row.MMA8451Q_y ** 2 + row.MMA8451Q_z ** 2)), axis=1)
      new_dataset['geo'] = dataset.apply(
          lambda row: sqrt((row.ITG3200_x ** 2 + row.ITG3200_y ** 2 + row.ITG3200_z ** 2)), axis=1)
      new_dataset['label'] = self.get_label(data_path)

      return np.round(new_dataset.to_numpy(),2)

  def get_label(self, data_path):
    label = data_path[54]
    if label =='D':
      return int(0)
    elif label =='F':  
      label_path = data_path.replace('dataset', 'enhanced')
      labels = pd.read_csv(label_path,header=None)
      labels[labels == 2] = 1
      return labels

  def datasets_to_nparray(self, datasets_address_array, outputsize=20000000, column_dimension=10):
    result = np.zeros((outputsize, column_dimension), 'int16')
    first_index = 0
    for address in datasets_address_array:
      feature = self.merge_features(address)
      # print(len(feature), first_index)
      result[first_index : (first_index+len(feature))] = feature
      first_index += len(feature)
    
    return result[result.sum(axis=1) != 0]
  
  def windowing2d(self, dataset, window_size=200):
    window = window_size * (dataset.shape[1]-1)
    cut = dataset.shape[0] % window_size
    feature = dataset[:-cut,0:-1]
    label = dataset[:-cut,-1]
    feature = feature.ravel().reshape(feature.size//window,window)
    label = label.reshape(label.size// window_size, window_size)
    label = label.sum(axis=1)
    label[label > 0] = 1
    return feature, label

  def windowing3d(self, dataset, window_size=200):
    n_windows = len(dataset) // window_size
    cut = dataset.shape[0] % window_size
    feature = dataset[:-cut,0:-1]
    label = dataset[:-cut,-1]
    feature = feature.reshape(n_windows, window_size, dataset.shape[1]-1)
    label = label.reshape(n_windows, window_size, 1)
    label = label.sum(axis=1)
    label[label > 0] = 1
    return feature, label

  def split_dataset(self, feature, label, test_ratio=0.2, validation=True, validation_ratio=0.2):
    X_train, X_test, y_train, y_test = train_test_split(feature, label, test_size=test_ratio, shuffle=True, stratify=label)
    
    if validation:
      X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_ratio, shuffle=True, stratify=y_train)
      return X_train, X_test, X_validation, y_train, y_test, y_validation
    
    else:
      return X_train, X_test, y_train, y_test

  def normalizer(self, scaler, X_train, X_test, X_validation):
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    X_validation = scaler.transform(X_validation)
    return X_train, X_test, X_validation
    
  def dataset_to_tensor(self, window_size, dataset, saving_path):
    features, labels = self.windowing(self.datasets_to_nparray(dataset), window_size)
    return np.savez(saving_path, inputs=features, targets=labels)
    
  def dataset_loader(self, data_path):
    npz = np.load(data_path)
    return npz["inputs"].astype(np.float), npz["targets"].astype(np.int)
  
  def downsampling(self, dataset, down_sampleing_factor):
      positive = dataset[dataset['targets']==1]
      negative = dataset[dataset['targets']==0].sample(n=int(len(positive)* down_sampleing_factor))
      return pd.concat([positive, negative], ignore_index=True).sample(frac=1).reset_index(drop=True)
  
  def generate_class_weight(self, label):
    return class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(label), y=label)

In [9]:
from itertools import groupby
list1 = ['hello','hope','hate','hack','bit','basket','code','come','chess']
[list(g) for k, g in groupby(list1, key=lambda x: x[0:2])]

[['hello'],
 ['hope'],
 ['hate', 'hack'],
 ['bit'],
 ['basket'],
 ['code', 'come'],
 ['chess']]

In [34]:
x = '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D02_SA01_R01.txt'
start = x.rfind('/') + 1
end = x[start:].find('_') + start
x[start:end]
start, end

(54, 57)

In [39]:
[list(g) for k, g in groupby(ad, key=lambda x: x[start:end])]

[['/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D01_SA01_R01.txt'],
 ['/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D02_SA01_R01.txt'],
 ['/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D03_SA01_R01.txt'],
 ['/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D04_SA01_R01.txt'],
 ['/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D05_SA01_R01.txt',
  '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D05_SA01_R02.txt',
  '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D05_SA01_R03.txt',
  '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D05_SA01_R04.txt',
  '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D05_SA01_R05.txt'],
 ['/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D06_SA01_R01.txt',
  '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D06_SA01_R02.txt',
  '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D06_SA01_R03.txt',
  '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D06_SA01_R04.txt',
  '/con

In [37]:
ad = get_file_name('/content/gdrive/MyDrive/Datasets/SisFall_dataset/')
ad

array(['/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D01_SA01_R01.txt',
       '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D02_SA01_R01.txt',
       '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SA01/D03_SA01_R01.txt',
       ...,
       '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SE15/D17_SE15_R03.txt',
       '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SE15/D17_SE15_R04.txt',
       '/content/gdrive/MyDrive/Datasets/SisFall_dataset/SE15/D17_SE15_R05.txt'],
      dtype='<U70')

In [None]:
%%time
ad = get_file_name('/content/gdrive/MyDrive/Datasets/SisFall_dataset/')
ad.sort()
dp = DatasetProcessor()
result = dp.datasets_to_nparray(ad[:50])


In [None]:
%%time
feature, label = dp.windowing2d(result)
feature.shape, label.shape

CPU times: user 6.65 ms, sys: 0 ns, total: 6.65 ms
Wall time: 13.7 ms


((1081, 1800), (1081,))

In [None]:
weight = dp.generate_class_weight(label)
weight

array([1.])

In [None]:
%%time
X_train, X_test, X_validation, y_train, y_test, y_validation = dp.split_dataset(feature, label)
X_train.shape, X_test.shape, X_validation.shape, y_train.shape, y_test.shape, y_validation.shape

CPU times: user 6.59 ms, sys: 971 µs, total: 7.56 ms
Wall time: 12.6 ms


((691, 1800), (217, 1800), (173, 1800), (691,), (217,), (173,))

In [None]:
X_train, X_test, X_validation = dp.normalizer(StandardScaler(), X_train, X_test, X_validation)

In [None]:
%%time
# feature, label = dp.windowing3d(result)

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 9.3 µs


In [None]:
# feature.shape, label.shape

((549, 200, 9), (549, 1))

# Main

Data Pre-processing

Neural Network Training

In [None]:
class Models():

  def __init__(self, 
               X_train,
               y_train,
               X_test,
               y_test,
               ):
        
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        

  def define_mlp(self, input_size, hidden_layer_size, output_size):

    self.mlp_model = tf.keras.Sequential([
                                tf.keras.layers.Dense(input_size,activation="relu"),
                                tf.keras.layers.Dense(hidden_layer_size, activation="relu"),
                                tf.keras.layers.Dense(hidden_layer_size,activation="relu"),
                                tf.keras.layers.Dense(output_size, activation='sigmoid')
                                ])

    self.mlp_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  def train_mlp(self, n_epochs, batch_size, class_weight):
    history = self.mlp_model.fit(
        self.X_train,
        self.y_train,
        batch_size = batch_size,
        epochs = n_epochs,
        shuffle = True,
        class_weight = class_weight,
        verbose = 1)
    return history

  def evaluation(self, model, batch_size):
    prediction = model.predict(self.X_test, verbose=1, batch_size=batch_size)
    print(classification_report(self.y_test, prediction))
    plot_confusion_matrix(confusion_matrix(self.y_test, prediction))
    return prediction

  def data_process_lstm(self):
    lstm_X_train = self.X_train.reshape(self.X_train.shape[0], self.X_train.shape[-1], 1)
    self.lstm_X_test = self.X_test.reshape(self.X_test.shape[0], self.X_test.shape[-1], 1)

    self.train_dataset_sequence = tf.data.Dataset.from_tensor_slices((lstm_X_train , self.y_train))
    self.train_dataset_sequence = self.train_dataset_sequence.cache().shuffle(self.lstm_buffer_size).batch(self.lstm_batch_size).repeat()

    # self.test_dataset_sequence = tf.data.Dataset.from_tensor_slices((lstm_X_test , self.y_test))
    # self.test_dataset_sequence = self.test_dataset_sequence.cache().shuffle(self.lstm_buffer_size).batch(self.lstm_batch_size).repeat()

  def define_lstm_model(self, input_shape, output_shape=1):
    self.lstm_model = tf.keras.models.Sequential([tf.keras.layers.LSTM(units=32, return_sequences=True, input_shape=input_shape),
                                            tf.keras.layers.LSTM(units=64, return_sequences=True),
                                            tf.keras.layers.LSTM(units=32),
                                            tf.keras.layers.Dense(1,activation='sigmoid')])

    self.lstm_model.compile(loss = 'binary_crossentropy', metrics = ['accuracy'])
  
  def train_lstm(self):
    steps = int(np.ceil(self.X_train_sequence.shape[0] / self.lstm_batch_size))
    
    self.lstm_history = self.lstm_model.fit(
      self.train_dataset_sequence, 
      epochs = self.lstm_epochs,
      steps_per_epoch = steps,
      # class_weight=class_weight,
      )
    
  def define_svm(self):
    self.svm_model = LinearSVC(C = 0.0001)
    self.svm_model.fit(self.X_train, self.y_train)

  def evaluate_svm(self):
    self.svm_prediction = self.svm_model.predict(self.X_test)
    print(classification_report(self.y_test, self.svm_prediction))
    plot_confusion_matrix(confusion_matrix(self.y_test, self.svm_prediction))
  
    def define_LGR(self):
    self.LGR_model = LogisticRegression()
    self.LGR_model.fit(self.X_train, self.y_train)
    
  def evaluate_LGR(self):
    self.LGR_prediction = self.LGR_model.predict(self.X_test)
    print(classification_report(self.y_test, self.LGR_prediction))
    plot_confusion_matrix(confusion_matrix(self.y_test, self.LGR_prediction))
  
  def define_knn(self):
    self.knn_model = KNeighborsClassifier(n_neighbors=4)
    self.knn_model.fit(self.X_train, self.y_train)
    
  def evaluate_knn(self):
    self.knn_prediction = self.knn_model.predict(self.X_test)
    print(classification_report(self.y_test, self.knn_prediction))
    plot_confusion_matrix(confusion_matrix(self.y_test, self.knn_prediction))

  def define_ensemble_concept(prediction1, prediction2):
    return np.logical_or(prediction1 , prediction2.T.ravel().round())
  
  def evaluate_ensemble_concept(self):
    print(classification_report(self.y_test, define_ensemble_concept(prediction1, prediction2)))
    plot_confusion_matrix(confusion_matrix(self.y_test, define_ensemble_concept(prediction1, prediction2)))

Prediction based on Ensemble Concept

Models' Summary

In [None]:
conclusion = pd.DataFrame([['Neural Network',precision_1,recall_1,f1Score_1],
              ['Logistic Regression',0.53,0.17,0.26],
              ['SVM',0.38,0.01,0.03],
              ['KNN',0.94,0.56,0.70],
              ['Neural Network after Balancing',precision_2,recall_2,f1Score_2],
              ['Logistic Regression after Balancing',0.08,0.93,0.15],
              ['SVM after Balancing',0.06,0.97,0.11],
              ['KNN after Balancing',0.73,0.73,0.73],
              ['Ensemble concept after Balancing',0.65,0.81,0.72]],
              columns=["Algorithm","Precision","Recall","F1score"])
conclusion = conclusion.set_index('Algorithm')
conclusion.style.background_gradient(cmap="YlOrRd")


Unnamed: 0_level_0,Precision,Recall,F1score
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Neural Network,0.99,0.34,0.51
Logistic Regression,0.53,0.17,0.26
SVM,0.38,0.01,0.03
KNN,0.94,0.56,0.7
Neural Network after Balancing,0.88,0.58,0.7
Logistic Regression after Balancing,0.08,0.93,0.15
SVM after Balancing,0.06,0.97,0.11
KNN after Balancing,0.73,0.73,0.73
Ensemble concept after Balancing,0.65,0.81,0.72


As you see After Blancing the dataset Ensemble concept and KNN model do the best !