**Load Libraries**

In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
import pickle as pkl

#Classification with KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Data Preprocessing

### Define Functions

In [8]:
def loadDataFrames(name, pathAcc, pathGyr, pathOri):
    df_Acc= pd.read_csv(pathAcc)
    df_Gyr= pd.read_csv(pathGyr)
    df_Ori= pd.read_csv(pathOri)
    print("Data Frames for " + name + " loaded.")
    return df_Acc, df_Gyr, df_Ori

def loadJsonData(name, pathJson):
    df = pd.read_json(pathJson)
    print("Data Frame for " + name + " loaded.")
    return df	   

#Concat Funktion
def concatDataFrames(name, df_Acc, df_Gyr, df_Ori):
  df_Gyr.drop(['time','seconds_elapsed'], axis=1, inplace=True)
  df_Ori.drop(['time','seconds_elapsed'], axis=1, inplace=True)
  df = pd.concat([df_Acc, df_Gyr, df_Ori], axis=1)
  print("Data Frames for " + name + " concatenated.")
  return df

#Function to plot Gyr Data
def plotDataGyr(data, title):
    plt.plot(data['time'], data['z'], color='red', marker='.')
    plt.plot(data['time'], data['y'], color='blue', marker='.')
    plt.plot(data['time'], data['x'], color='green', marker='.')
    plt.xlabel('time')
    plt.ylabel('x,y,z')
    plt.legend(['z', 'y', 'x'])
    plt.title(title)
    plt.show()

#Function to Plot Acc Data
def plotDataAcc(data, title):
    plt.plot(data['time'], data['z'], color='red', marker='.')
    plt.plot(data['time'], data['y'], color='blue', marker='.')
    plt.plot(data['time'], data['x'], color='green', marker='.')
    plt.xlabel('time')
    plt.ylabel('x,y,z')
    plt.legend(['z', 'y', 'x'])
    plt.title(title)
    plt.show()

#Function to Plot Ori Data
def plotDataOri(data, title):
    plt.plot(data['time'], data['qz'], color='red', marker='.')
    plt.plot(data['time'], data['qy'], color='blue', marker='.')
    plt.plot(data['time'], data['qx'], color='green', marker='.')
    plt.plot(data['time'], data['qw'], color='yellow', marker='.')
    #plt.plot(data['time'], data['roll'], color='black', marker='.')
    plt.xlabel('time')
    plt.ylabel('qz,qy,qx,qw,roll')
    plt.legend(['qz', 'qy', 'qx', 'qw', 'roll'])
    plt.title(title)
    plt.show()

#Clean unused Sensors
def deleteSensorData(df):
  notUsedSensors = ['Annotation', 'Barometer', 'Battery', 'Brightness', 'Gravity', 'Light', 'Location', 'Magnetometer', 'MagnetometerUncalibrated', 'Microphone']
  df = df[~df['sensor'].isin(notUsedSensors)]
  #df.drop(['relativeAltitude','pressure', 'batteryLevel', 'batteryState', 'lowPowerMode','brightness', 'lux', 'bearingAccuracy', 'speedAccuracy', 'verticalAccuracy', 'horizontalAccuracy', 'speed', 'bearing','altitude', 'longitude', 'latitude'], axis=1, inplace=True)

  return df


#Clean Function Json Data
def cleanDataJson(df):
    #Drop unused Columns
    columns_to_drop = ['version','device name','recording time','platform','appVersion', 'device id', 'sampleRateMs','relativeAltitude','pressure', 'batteryLevel', 'batteryState', 'lowPowerMode','brightness', 'lux', 'bearingAccuracy', 'speedAccuracy', 'verticalAccuracy', 'horizontalAccuracy', 'speed', 'bearing','altitude', 'longitude', 'latitude']
    columns_to_drop = list(set(columns_to_drop).intersection(df.columns))

    if columns_to_drop:
        df.drop(columns=columns_to_drop, inplace=True)
        print("Spalten wurden erfolgreich entfernt.")
    else:
        print("Keine der Spalten zum Entfernen gefunden.")
        #df.drop(['version','device name','recording time','platform','appVersion', 'device id', 'sampleRateMs' ], axis=1, inplace=True)
    return df


### Load Data and give them a name

In [14]:
#load mixed data
df_mixed_Acc, df_mixed_Gyr, df_mixed_Ori =loadDataFrames('mixed','data/MixedData/Accelerometer.csv', 'data/MixedData/Gyroscope.csv', 'data/MixedData/Orientation.csv')

##############################################################################################################

#load data frames for walking
#df_walk_Acc, df_walk_Gyr, df_walk_Ori =loadDataFrames('walking','data/NormalWalk/Accelerometer.csv', 'data/NormalWalk/Gyroscope.csv', 'data/NormalWalk/Orientation.csv')
df_walk1 = loadJsonData('walking 1', 'data/NormalWalk/NormalWalk.json')

#load data walk2
#df_walk2_Acc, df_walk2_Gyr, df_walk2_Ori =loadDataFrames('walking 2','data/Walk2/Accelerometer.csv', 'data/Walk2/Gyroscope.csv', 'data/Walk2/Orientation.csv')
df_walk2 = loadJsonData('walking 2', 'data/Walk2/Walk2.json')

#load data walk3 Alex
df_walk3 = loadJsonData('walking 3', 'data\Alex_Rumstehen_-2023-05-02_15-30-58.json')

#load data walk4 Alex
df_walk4 = loadJsonData('walking 4', 'data\Alex_Rumstehen_2-2023-05-23_14-53-09.json')


##############################################################################################################

#load data frames for push ups
#df_push_Acc, df_push_Gyr, df_push_Ori =loadDataFrames('push ups','data/PushUps/Accelerometer.csv', 'data/PushUps/Gyroscope.csv', 'data/PushUps/Orientation.csv')
df_push1 = loadJsonData('push ups 1', 'data/PushUps/PushUps.json')

#load data pushups2
#df_push2_Acc, df_push2_Gyr, df_push2_Ori =loadDataFrames('push ups 2','data/PushUps2/Accelerometer.csv', 'data/PushUps2/Gyroscope.csv', 'data/PushUps2/Orientation.csv')
df_push2 = loadJsonData('push ups 2', 'data/PushUps2/PushUps2.json')

#load data pushups3 Alex
df_push3 = loadJsonData('push ups 3', 'data/Alex_Push_Up_2-2023-05-23_15-04-54.json')

#load data pushups4 Alex
df_push4 = loadJsonData('push ups 4', 'data\Alex_10_Liegestütz-2023-05-02_15-29-25.json')

#load data pushups5 Pierre
df_push5 = loadJsonData('push ups 5', 'data\PushUps-pierre.json')

##############################################################################################################

#load data frames for jumping jacks
#df_JJ_Acc, df_JJ_Gyr, df_JJ_Ori =loadDataFrames('jumping jacks ','data/JJ_rightHand/Accelerometer.csv', 'data/JJ_rightHand/Gyroscope.csv', 'data/JJ_rightHand/Orientation.csv')
df_JJ1 = loadJsonData('jumping jacks 1', 'data/JJ_rightHand/JJ1.json')

#load data JJ2
#df_JJ2_Acc, df_JJ2_Gyr, df_JJ2_Ori =loadDataFrames('jumping jacks 2','data/JJ2/Accelerometer.csv', 'data/JJ2/Gyroscope.csv', 'data/JJ2/Orientation.csv')
df_JJ2 = loadJsonData('jumping jacks 2', 'data/JJ2/JJ2.json')

#load data JJ3 Alex
df_JJ3 = loadJsonData('jumping jacks 3', 'data/Alex_10_Hampelmänner-2023-05-02_15-30-18.json')

#load data JJ4 Pierre
df_JJ4 = loadJsonData('jumping jacks 4', 'data/JumpingsJacks-pierre-1.json')

#load data JJ5 Pierre
df_JJ5 = loadJsonData('jumping jacks 5', 'data/JumpingsJacks-pierre-2.json')

Data Frames for mixed loaded.
Data Frame for walking 1 loaded.
Data Frame for walking 2 loaded.
Data Frame for walking 3 loaded.
Data Frame for walking 4 loaded.
Data Frame for push ups 1 loaded.
Data Frame for push ups 2 loaded.
Data Frame for push ups 3 loaded.
Data Frame for push ups 4 loaded.
Data Frame for push ups 5 loaded.
Data Frame for jumping jacks 1 loaded.
Data Frame for jumping jacks 2 loaded.
Data Frame for jumping jacks 3 loaded.
Data Frame for jumping jacks 4 loaded.
Data Frame for jumping jacks 5 loaded.


**Clean Up Json Data**

In [4]:
df_walk1.columns

Index(['sensor', 'time', 'seconds_elapsed', 'z', 'y', 'x', 'relativeAltitude',
       'pressure', 'batteryLevel', 'batteryState', 'lowPowerMode',
       'brightness', 'lux', 'bearingAccuracy', 'speedAccuracy',
       'verticalAccuracy', 'horizontalAccuracy', 'speed', 'bearing',
       'altitude', 'longitude', 'latitude', 'version', 'device name',
       'recording time', 'platform', 'appVersion', 'device id', 'sensors',
       'sampleRateMs', 'qz', 'qy', 'qx', 'qw', 'roll', 'pitch', 'yaw'],
      dtype='object')

In [13]:
df_walk1 = deleteSensorData(df_walk1)
df_walk2 = deleteSensorData(df_walk2)
df_JJ1 = deleteSensorData(df_JJ1)
df_JJ2 = deleteSensorData(df_JJ2) 
df_push1 = deleteSensorData(df_push1)
df_push2 = deleteSensorData(df_push2)

In [11]:
df_push1 = cleanDataJson(df_push1)
df_push2 = cleanDataJson(df_push2)
df_push3 = cleanDataJson(df_push3)
df_push4 = cleanDataJson(df_push4)
df_push5 = cleanDataJson(df_push5)
df_JJ1 = cleanDataJson(df_JJ1)
df_JJ2 = cleanDataJson(df_JJ2)
df_JJ3 = cleanDataJson(df_JJ3)
df_JJ4 = cleanDataJson(df_JJ4)
df_JJ5 = cleanDataJson(df_JJ5)
df_walk1 = cleanDataJson(df_walk1)
df_walk2 = cleanDataJson(df_walk2)
df_walk3 = cleanDataJson(df_walk3)
df_walk4 = cleanDataJson(df_walk4)

AttributeError: 'NoneType' object has no attribute 'drop'

**Concat Dataframes**

In [None]:
#Concat DataFrames Walk
df_walk1 = concatDataFrames('walking', df_walk_Acc, df_walk_Gyr, df_walk_Ori)

#Concat DataFrames PushUps
df_push1 = concatDataFrames('push ups', df_push_Acc, df_push_Gyr, df_push_Ori)

#Concat DataFrames JumpingJacks
df_JJ1 = concatDataFrames('jumping jacks', df_JJ_Acc, df_JJ_Gyr, df_JJ_Ori)

#Concat DataFrames JumpingJacks2
df_JJ2 = concatDataFrames('jumping jacks 2', df_JJ2_Acc, df_JJ2_Gyr, df_JJ2_Ori)

#Concat DataFrames PushUps2
df_push2 = concatDataFrames('push ups 2', df_push2_Acc, df_push2_Gyr, df_push2_Ori)

#Concat DataFrames Walk2
df_walk2 = concatDataFrames('walking 2', df_walk2_Acc, df_walk2_Gyr, df_walk2_Ori)



#Concat Mixed DataFrames
df_mixed = concatDataFrames('mixed', df_mixed_Acc, df_mixed_Gyr, df_mixed_Ori)
df_mixed.head(10)

# Plot Data

In [None]:
df_push_Gyr.plot(x='time', y='z', kind='line')
df_push_Gyr.plot(x='time', y='y', kind='line')
df_push_Gyr.plot(x='time', y='x', kind='line')

In [None]:
df_JJ3.plot(x='time', y='z', kind='line')

In [None]:
#Plot Walk
plotDataGyr(df_walk_Gyr, 'Gyroscope Walk')

#Plot PushUps
plotDataGyr(df_push_Gyr, 'Gyroscope PushUps')

#Plot JumpingJacks
plotDataGyr(df_JJ_Gyr, 'Gyroscope JumpingJacks')

In [None]:
#Plotting Accelerometer Data

#Plot Walk
plotDataAcc(df_walk_Acc, 'Accelerometer Walk')

#Plot PushUps
plotDataAcc(df_push_Acc, 'Accelerometer PushUps')

#Plot JumpingJacks
plotDataAcc(df_JJ_Acc, 'Accelerometer JumpingJacks')

In [None]:
#Plot Walk
plotDataOri(df_walk_Ori, 'Orientation Walk')

#Plot PushUps
plotDataOri(df_push_Ori, 'Orientation PushUps')

#Plot JumpingJacks
plotDataOri(df_JJ_Ori, 'Orientation JumpingJacks')
    

# **KNN Classifikation**
1. Label Data
2. Concat Dataframes
3. Split Data
4. Train Model
5. Predict
6. Evaluate

### 1. Label Data

In [None]:
#Classification with KNN
#Function to label the data
def getData(data, label):
    data['label'] = label
    return data

#Label Data
#Label Data Walk
df_walk1 = getData(df_walk1, 'walk')
df_walk2 = getData(df_walk2, 'walk')
df_walk3 = getData(df_walk3, 'walk')
df_walk4 = getData(df_walk4, 'walk')


#Label Data PushUps
df_push1 = getData(df_push1, 'pushups')
df_push2 = getData(df_push2, 'pushups')
df_push3 = getData(df_push3, 'pushups')
df_push4 = getData(df_push4, 'pushups')
df_push5 = getData(df_push5, 'pushups')

#Label Data JumpingJacks
df_JJ1 = getData(df_JJ1, 'jumpingjacks')
df_JJ2 = getData(df_JJ2, 'jumpingjacks')
df_JJ3 = getData(df_JJ3, 'jumpingjacks')
df_JJ4 = getData(df_JJ4, 'jumpingjacks')
df_JJ5 = getData(df_JJ5, 'jumpingjacks')

In [None]:
#plot json data
df_push4.head(10)

### 2. Concat Dataframes

In [None]:
#Concat DataFrames
df = pd.concat([df_walk1, df_push1, df_JJ1], axis=0)
df_test = pd.concat([df_JJ2, df_push2, df_walk2], axis=0)

#Short Df_test for predciton
df_test= df_test.head(1517)



#Fill NaN Values with propagation
df.fillna(method='ffill', inplace=True)
df_test.fillna(method='ffill', inplace=True)



#kein random split sondern spezifizieren
#erste 70% train, letzte 30% test
#fünf trainings df und quasi im train nur die ersten 3 und die zwei anderen dann als test

In [None]:
#drop sensor column
df_push4 = df_push4.drop(columns=['sensor'])
df_push5 = df_push5.drop(columns=['sensor'])
df_JJ3 = df_JJ3.drop(columns=['sensor'])
df_JJ4 = df_JJ4.drop(columns=['sensor'])
df_JJ5 = df_JJ5.drop(columns=['sensor'])
df_walk3 = df_walk3.drop(columns=['sensor'])
df_walk4 = df_walk4.drop(columns=['sensor'])


In [None]:
#Concat all Dataframes into one
df = pd.concat([df_walk1, df_walk2, df_push1, df_push2, df_push3, df_JJ1, df_JJ2, df_JJ3], axis=0)
df_test = pd.concat([df_walk3, df_walk4, df_push4, df_push5, df_JJ4, df_JJ5], axis=0)

#Fill NaN Values with propagation
df.fillna(method='ffill', inplace=True)
df_test.fillna(method='ffill', inplace=True)


In [None]:
print('Test: ', len(df_test))
print('Df: ', len(df))

df_test.drop(['label'], axis=1, inplace=True)

ToDo: Marvin
#gemischten Datensatz vlt mal testen, weiß noch nicht genau wie ich den einbauen soll

In [None]:
#df_test.head(10)
df_test.describe()

### 3. Split Data

In [None]:
#Split Data erste 70% train, letzte 30% test
X = df.drop(['label'], axis=1)
y = df['label']


X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle= False, test_size=0.3)

In [None]:
#Show Training Data
def showXTrainingData(data):
  print(data.shape)
  print(data.columns)
  plt.plot(data['z'], color='red', marker='.')#
  plt.plot(data['y'], color='blue', marker='.')#
  plt.plot(data['x'], color='green', marker='.')#
  plt.plot(data['qz'], color='yellow', marker='.')#
  plt.plot(data['qy'], color='black', marker='.')#
  plt.plot(data['qx'], color='orange', marker='.')#
  plt.plot(data['qw'], color='pink', marker='.')#
  plt.plot(data['roll'], color='purple', marker='.')#
  plt.xlabel('time')
  plt.ylabel('x,y,z,qz,qy,qx,qw,roll')
  plt.legend(['z', 'y', 'x', 'qz', 'qy', 'qx', 'qw', 'roll'])
  plt.title('Train Data')
  plt.show()

showXTrainingData(X_train)


### 4. Train Model

In [None]:
#KNN
knn = KNeighborsClassifier(n_neighbors=3, weights='distance')
knn.fit(X_train, y_train)

#size passt noch nicht, da der ytest mehr data hat als der ypred
#shuffle data and then split

### 5. Predict

In [None]:
y_pred = knn.predict(X_test)
print(len(y_test))

### 6. Evaluate

In [None]:
#Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix: \n', cm)

#Accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy: ', accuracy * 100, '%')

#Recall
recall = recall_score(y_test, y_pred, average='weighted')
print('Recall: ', recall * 100, '%')


In [None]:
#Split Data Mixed
X_mixed = df_mixed.drop(['label'], axis=1)
y_mixed = df_mixed['label']
X_train_mixed, X_test_mixed, y_train_mixed, y_test_mixed = train_test_split(X_mixed, y_mixed, test_size=0.3)

#KNN
knn = KNeighborsClassifier(n_neighbors=3, weights='distance')
knn.fit(X_train, y_train)
y_pred_mixed = knn.predict(X_test)

# Sonstiges

**Export Model into Pickle**

In [None]:
#pickle wird benutzt um das trainierte Modell zu speichern und dann in streamlit zu laden, dort können wir es dann auf den datensatz anwenden
pkl.dump(knn, open('knn.pkl', 'wb'))

Lösung von ChatGpt, leider nicht so einfach...

In [None]:
# Daten laden
training_data = df.drop(['label'], axis=1)
training_labels = df['label']

# Sensordaten in eine zweidimensionale Form umwandeln
training_data = training_data.reshape(training_data.shape[0], -1)


# KNN-Modell erstellen und trainieren
knn = KNeighborsClassifier(n_neighbors=3, weights='distance')
knn.fit(training_data, training_labels)

# Sensordaten für eine neue Instanz
new_data = df_test
new_data = new_data.reshape(1, -1)

# Vorhersage für die neue Instanz
prediction = knn.predict([new_data])

# Wahrscheinlichkeiten der Vorhersagen
probabilities = knn.predict_proba([new_data])

# Index der vorhergesagten Klasse
predicted_class_index = prediction[0]

# Wahrscheinlichkeit der vorhergesagten Klasse
predicted_class_probability = probabilities[0][predicted_class_index]

# Klassenbezeichnungen
class_names = ['Jumping Jacks', 'Pushups', 'Laufen']

# Ergebnis ausgeben
result = f"Mit {predicted_class_probability*100:.2f}% Sicherheit hat die Person {class_names[predicted_class_index]} gemacht."
print(result)
