**Load Libraries**

In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
import pickle as pkl
import tsfresh as tsf
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute

#Classification with KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Data Preprocessing

### Define Functions

In [2]:
def loadDataFrames(name, pathAcc, pathGyr, pathOri):
    df_Acc= pd.read_csv(pathAcc)
    df_Gyr= pd.read_csv(pathGyr)
    df_Ori= pd.read_csv(pathOri)
    print("Data Frames for " + name + " loaded.")
    return df_Acc, df_Gyr, df_Ori

def loadJsonData(name, pathJson):
    df = pd.read_json(pathJson)
    print("Data Frame for " + name + " loaded.")
    return df	   

#Concat Funktion
def concatDataFrames(name, df_Acc, df_Gyr, df_Ori):
  df_Gyr.drop(['time','seconds_elapsed'], axis=1, inplace=True)
  df_Ori.drop(['time','seconds_elapsed'], axis=1, inplace=True)
  df = pd.concat([df_Acc, df_Gyr, df_Ori], axis=1)
  print("Data Frames for " + name + " concatenated.")
  return df

#Function to plot Gyr Data
def plotDataGyr(data, title):
    plt.plot(data['time'], data['z'], color='red', marker='.')
    plt.plot(data['time'], data['y'], color='blue', marker='.')
    plt.plot(data['time'], data['x'], color='green', marker='.')
    plt.xlabel('time')
    plt.ylabel('x,y,z')
    plt.legend(['z', 'y', 'x'])
    plt.title(title)
    plt.show()

#Function to Plot Acc Data
def plotDataAcc(data, title):
    plt.plot(data['time'], data['z'], color='red', marker='.')
    plt.plot(data['time'], data['y'], color='blue', marker='.')
    plt.plot(data['time'], data['x'], color='green', marker='.')
    plt.xlabel('time')
    plt.ylabel('x,y,z')
    plt.legend(['z', 'y', 'x'])
    plt.title(title)
    plt.show()

#Function to Plot Ori Data
def plotDataOri(data, title):
    plt.plot(data['time'], data['qz'], color='red', marker='.')
    plt.plot(data['time'], data['qy'], color='blue', marker='.')
    plt.plot(data['time'], data['qx'], color='green', marker='.')
    plt.plot(data['time'], data['qw'], color='yellow', marker='.')
    #plt.plot(data['time'], data['roll'], color='black', marker='.')
    plt.xlabel('time')
    plt.ylabel('qz,qy,qx,qw,roll')
    plt.legend(['qz', 'qy', 'qx', 'qw', 'roll'])
    plt.title(title)
    plt.show()

#Clean unused Sensors
def deleteSensorData(df):
  notUsedSensors = ['Annotation', 'Barometer', 'Battery', 'Brightness', 'Gravity', 'Light', 'Location', 'Magnetometer', 'MagnetometerUncalibrated', 'Microphone']
  df = df[~df['sensor'].isin(notUsedSensors)]
  return df


#Clean Function Json Data
def cleanDataJson(df):
    #Drop unused Columns
    columns_to_drop = ['sensors', 'version','device name','recording time','platform','appVersion', 'device id', 'sampleRateMs','relativeAltitude','pressure', 'batteryLevel', 'batteryState', 'lowPowerMode','brightness', 'lux', 'bearingAccuracy', 'speedAccuracy', 'verticalAccuracy', 'horizontalAccuracy', 'speed', 'bearing','altitude', 'longitude', 'latitude']
    columns_to_drop = list(set(columns_to_drop).intersection(df.columns))

    if columns_to_drop:
        df.drop(columns=columns_to_drop, inplace=True)
        print("Spalten wurden erfolgreich entfernt.")
    else:
        print("Keine der Spalten zum Entfernen gefunden.")
        #df.drop(['version','device name','recording time','platform','appVersion', 'device id', 'sampleRateMs' ], axis=1, inplace=True)
    return df


#Function to get Accelometer, Gyroscope and Orientation Data from Json in one Dataframe each
def getSensorData(df):
    df_Acc = df[df['sensor'] == 'Accelerometer']
    df_Gyr = df[df['sensor'] == 'Gyroscope']
    df_Ori = df[df['sensor'] == 'Orientation']
  #Drop all Columns with NaN Values
    df_Acc.dropna(axis=1, how='all', inplace=True)
    df_Gyr.dropna(axis=1, how='all', inplace=True)
    df_Ori.dropna(axis=1, how='all', inplace=True)

  #Drop sensor column
    df_Acc.drop(['sensor', 'seconds_elapsed'], axis=1, inplace=True)
    df_Gyr.drop(['sensor', 'seconds_elapsed'], axis=1, inplace=True)
    df_Ori.drop(['sensor', 'seconds_elapsed'], axis=1, inplace=True)
    

    return df_Acc, df_Gyr, df_Ori

### Load Data and give them a name

In [3]:
#load mixed data
df_mixed_Acc, df_mixed_Gyr, df_mixed_Ori =loadDataFrames('mixed','data/MixedData/Accelerometer.csv', 'data/MixedData/Gyroscope.csv', 'data/MixedData/Orientation.csv')

##############################################################################################################

#load data frames for walking
df_walk1 = loadJsonData('walking 1', 'data/NormalWalk/NormalWalk.json')

#load data walk2
df_walk2 = loadJsonData('walking 2', 'data/Walk2/Walk2.json')

#load data walk3 Alex
df_walk3 = loadJsonData('walking 3', 'data\Alex_Rumstehen_-2023-05-02_15-30-58.json')

#load data walk4 Alex
df_walk4 = loadJsonData('walking 4', 'data\Alex_Rumstehen_2-2023-05-23_14-53-09.json')


##############################################################################################################

#load data frames for push ups
df_push1 = loadJsonData('push ups 1', 'data/PushUps/PushUps.json')

#load data pushups2
df_push2 = loadJsonData('push ups 2', 'data/PushUps2/PushUps2.json')

#load data pushups3 Alex
df_push3 = loadJsonData('push ups 3', 'data/Alex_Push_Up_2-2023-05-23_15-04-54.json')

#load data pushups4 Alex
df_push4 = loadJsonData('push ups 4', 'data\Alex_10_Liegestütz-2023-05-02_15-29-25.json')

#load data pushups5 Pierre
df_push5 = loadJsonData('push ups 5', 'data\PushUps-pierre.json')

##############################################################################################################

#load data frames for jumping jacks
df_JJ1 = loadJsonData('jumping jacks 1', 'data/JJ_rightHand/JJ1.json')

#load data JJ2
df_JJ2 = loadJsonData('jumping jacks 2', 'data/JJ2/JJ2.json')

#load data JJ3 Alex
df_JJ3 = loadJsonData('jumping jacks 3', 'data/Alex_10_Hampelmänner-2023-05-02_15-30-18.json')

#load data JJ4 Pierre
df_JJ4 = loadJsonData('jumping jacks 4', 'data/JumpingsJacks-pierre-1.json')

#load data JJ5 Pierre
df_JJ5 = loadJsonData('jumping jacks 5', 'data/JumpingsJacks-pierre-2.json')

Data Frames for mixed loaded.
Data Frame for walking 1 loaded.
Data Frame for walking 2 loaded.
Data Frame for walking 3 loaded.
Data Frame for walking 4 loaded.
Data Frame for push ups 1 loaded.
Data Frame for push ups 2 loaded.
Data Frame for push ups 3 loaded.
Data Frame for push ups 4 loaded.
Data Frame for push ups 5 loaded.
Data Frame for jumping jacks 1 loaded.
Data Frame for jumping jacks 2 loaded.
Data Frame for jumping jacks 3 loaded.
Data Frame for jumping jacks 4 loaded.
Data Frame for jumping jacks 5 loaded.


In [4]:
#Label Dataframes with Activity
df_walk1['activity'] = 'walking'
df_walk2['activity'] = 'walking'
df_walk3['activity'] = 'walking'
df_walk4['activity'] = 'walking'
df_push1['activity'] = 'push ups'
df_push2['activity'] = 'push ups'
df_push3['activity'] = 'push ups'
df_push4['activity'] = 'push ups'
df_push5['activity'] = 'push ups'
df_JJ1['activity'] = 'jumping jacks'
df_JJ2['activity'] = 'jumping jacks'
df_JJ3['activity'] = 'jumping jacks'
df_JJ4['activity'] = 'jumping jacks'
df_JJ5['activity'] = 'jumping jacks'


In [33]:
#iterieren über alldf mit der funktioin cleandatajson
for df in allDf:
    df = cleanDataJson(df)

Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.
Keine der Spalten zum Entfernen gefunden.


**Get the Dataframes for each Sensor**


In [5]:
df_walk1_Acc, df_walk1_Gyr, df_walk1_Ori = getSensorData(df_walk1)
df_walk2_Acc, df_walk2_Gyr, df_walk2_Ori = getSensorData(df_walk2)
df_walk3_Acc, df_walk3_Gyr, df_walk3_Ori = getSensorData(df_walk3)
df_walk4_Acc, df_walk4_Gyr, df_walk4_Ori = getSensorData(df_walk4)

df_push1_Acc, df_push1_Gyr, df_push1_Ori = getSensorData(df_push1)
df_push2_Acc, df_push2_Gyr, df_push2_Ori = getSensorData(df_push2)
df_push3_Acc, df_push3_Gyr, df_push3_Ori = getSensorData(df_push3)
df_push4_Acc, df_push4_Gyr, df_push4_Ori = getSensorData(df_push4)
df_push5_Acc, df_push5_Gyr, df_push5_Ori = getSensorData(df_push5)

df_JJ1_Acc, df_JJ1_Gyr, df_JJ1_Ori = getSensorData(df_JJ1)
df_JJ2_Acc, df_JJ2_Gyr, df_JJ2_Ori = getSensorData(df_JJ2)
df_JJ3_Acc, df_JJ3_Gyr, df_JJ3_Ori = getSensorData(df_JJ3)
df_JJ4_Acc, df_JJ4_Gyr, df_JJ4_Ori = getSensorData(df_JJ4)
df_JJ5_Acc, df_JJ5_Gyr, df_JJ5_Ori = getSensorData(df_JJ5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Acc.dropna(axis=1, how='all', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Gyr.dropna(axis=1, how='all', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Ori.dropna(axis=1, how='all', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Acc.drop(['sensor', 'sec

In [9]:
#alle Accelometer Dataframes in eine liste
allAccDf = [df_walk1_Acc, df_walk2_Acc, df_walk3_Acc, df_walk4_Acc, df_push1_Acc, df_push2_Acc, df_push3_Acc, df_push4_Acc, df_push5_Acc, df_JJ1_Acc, df_JJ2_Acc, df_JJ3_Acc, df_JJ4_Acc, df_JJ5_Acc]

#alle Gyroscope Dataframes in eine liste
allGyrDf = [df_walk1_Gyr, df_walk2_Gyr, df_walk3_Gyr, df_walk4_Gyr, df_push1_Gyr, df_push2_Gyr, df_push3_Gyr, df_push4_Gyr, df_push5_Gyr, df_JJ1_Gyr, df_JJ2_Gyr, df_JJ3_Gyr, df_JJ4_Gyr, df_JJ5_Gyr]

#alle Orientation Dataframes in eine liste
allOriDf = [df_walk1_Ori, df_walk2_Ori, df_walk3_Ori, df_walk4_Ori, df_push1_Ori, df_push2_Ori, df_push3_Ori, df_push4_Ori, df_push5_Ori, df_JJ1_Ori, df_JJ2_Ori, df_JJ3_Ori, df_JJ4_Ori, df_JJ5_Ori]


In [41]:
df_walk1_Acc.describe()

Unnamed: 0,time,z,y,x
count,1694.0,1694.0,1694.0,1694.0
mean,1.682881e+18,-0.121552,-0.373166,0.089478
std,8230018000.0,2.596474,2.7769,2.452725
min,1.682881e+18,-10.472376,-19.543915,-12.545299
25%,1.682881e+18,-1.398104,-1.73325,-1.494583
50%,1.682881e+18,-0.236431,-0.108898,0.133959
75%,1.682881e+18,0.796619,1.053636,1.559253
max,1.682881e+18,39.480579,7.83236,9.757957


In [6]:
#all walking dataframes in one list
allWalkDf = [df_walk1_Acc, df_walk2_Acc, df_walk3_Acc, df_walk4_Acc, df_walk1_Gyr, df_walk2_Gyr, df_walk3_Gyr, df_walk4_Gyr, df_walk1_Ori, df_walk2_Ori, df_walk3_Ori, df_walk4_Ori]

#all push ups dataframes in one list
allPushDf = [df_push1_Acc, df_push2_Acc, df_push3_Acc, df_push4_Acc, df_push5_Acc, df_push1_Gyr, df_push2_Gyr, df_push3_Gyr, df_push4_Gyr, df_push5_Gyr, df_push1_Ori, df_push2_Ori, df_push3_Ori, df_push4_Ori, df_push5_Ori]

#all jumping jacks dataframes in one list
allJJDf = [df_JJ1_Acc, df_JJ2_Acc, df_JJ3_Acc, df_JJ4_Acc, df_JJ5_Acc, df_JJ1_Gyr, df_JJ2_Gyr, df_JJ3_Gyr, df_JJ4_Gyr, df_JJ5_Gyr, df_JJ1_Ori, df_JJ2_Ori, df_JJ3_Ori, df_JJ4_Ori, df_JJ5_Ori]


In [7]:
#all dataframes in one list
allDf = [df_walk1_Acc,  df_walk2_Acc, df_walk3_Acc, df_walk4_Acc, df_push1_Acc, df_push2_Acc, df_push3_Acc, df_push4_Acc, df_push5_Acc, df_JJ1_Acc, df_JJ2_Acc, df_JJ3_Acc, df_JJ4_Acc, df_JJ5_Acc, df_walk1_Gyr, df_walk2_Gyr, df_walk3_Gyr, df_walk4_Gyr, df_push1_Gyr, df_push2_Gyr, df_push3_Gyr, df_push4_Gyr, df_push5_Gyr, df_JJ1_Gyr, df_JJ2_Gyr, df_JJ3_Gyr, df_JJ4_Gyr, df_JJ5_Gyr, df_walk1_Ori, df_walk2_Ori, df_walk3_Ori, df_walk4_Ori, df_push1_Ori, df_push2_Ori, df_push3_Ori, df_push4_Ori, df_push5_Ori, df_JJ1_Ori, df_JJ2_Ori, df_JJ3_Ori, df_JJ4_Ori, df_JJ5_Ori]


In [53]:
df_walk1_Acc.describe()

Unnamed: 0,time,z,y,x
count,1694.0,1694.0,1694.0,1694.0
mean,1.682881e+18,-0.121552,-0.373166,0.089478
std,8230018000.0,2.596474,2.7769,2.452725
min,1.682881e+18,-10.472376,-19.543915,-12.545299
25%,1.682881e+18,-1.398104,-1.73325,-1.494583
50%,1.682881e+18,-0.236431,-0.108898,0.133959
75%,1.682881e+18,0.796619,1.053636,1.559253
max,1.682881e+18,39.480579,7.83236,9.757957


**Metrics for Accelerometer**

In [17]:
def getMetricsAcc(df):
  Acc_metrics = pd.DataFrame()
  metrics = {}  # Dictionary to store the metrics for each dataframe

  metrics['mean_z'] = df['z'].mean()
  metrics['sum_z'] = df['z'].sum()
  metrics['var_z'] = df['z'].var()
  metrics['std_z'] = df['z'].std()

  metrics['mean_y'] = df['y'].mean()
  metrics['sum_y'] = df['y'].sum()
  metrics['var_y'] = df['y'].var()
  metrics['std_y'] = df['y'].std()

  metrics['mean_x'] = df['x'].mean()
  metrics['sum_x'] = df['x'].sum()
  metrics['var_x'] = df['x'].var()
  metrics['std_x'] = df['x'].std()

 # Append the label column from the current dataframe to the metrics dictionary
  metrics['activity'] = df['activity']
    
  # Append the metrics dictionary as a new row to the metrics dataframe
  Acc_metrics = Acc_metrics.append(metrics, ignore_index=True)

  #change activity to string
  Acc_metrics['activity'] = Acc_metrics['activity'].astype(str)
  #short string to 10 characters
  Acc_metrics['activity'] = Acc_metrics['activity'].str[:21]
  #delete all numbers from string
  Acc_metrics['activity'] = Acc_metrics['activity'].str.replace('\d+', '')
  #detelte all empty spaces
  Acc_metrics['activity'] = Acc_metrics['activity'].str.replace(' ', '')
  return Acc_metrics

In [None]:
Acc_metrics_df = pd.DataFrame()

for df in allAccDf:
    Acc_metrics_df = Acc_metrics_df.append(getMetricsAcc(df), ignore_index=True)

In [27]:
test = getMetricsAcc(df_walk1_Acc)

  Acc_metrics = Acc_metrics.append(metrics, ignore_index=True)
  Acc_metrics['activity'] = Acc_metrics['activity'].str.replace('\d+', '')


**Metrics for Gyroscope**

In [21]:
def getMetricsGyr(df):
  Gyr_metrics = pd.DataFrame()

  metrics = {}  # Dictionary to store the metrics for each dataframe
  
  # Calculate and store the metrics for the current dataframe
  metrics['mean_z'] = df['z'].mean()
  metrics['sum_z'] = df['z'].sum()
  metrics['var_z'] = df['z'].var()
  metrics['std_z'] = df['z'].std()

  metrics['mean_y'] = df['y'].mean()
  metrics['sum_y'] = df['y'].sum()
  metrics['var_y'] = df['y'].var()
  metrics['std_y'] = df['y'].std()

  metrics['mean_x'] = df['x'].mean()
  metrics['sum_x'] = df['x'].sum()
  metrics['var_x'] = df['x'].var()
  metrics['std_x'] = df['x'].std()

  # Add more metrics as needed
  
  # Append the label column from the current dataframe to the metrics dictionary
  metrics['activity'] = df['activity']
  
  # Append the metrics dictionary as a new row to the metrics dataframe
  Gyr_metrics = Gyr_metrics.append(metrics, ignore_index=True)
  #change activity to string
  Gyr_metrics['activity'] = Gyr_metrics['activity'].astype(str)
  #short string to 21 characters
  Gyr_metrics['activity'] = Gyr_metrics['activity'].str[:21]
  #delete all numbers from string
  Gyr_metrics['activity'] = Gyr_metrics['activity'].str.replace('\d+', '')
  #detelte all empty spaces
  Gyr_metrics['activity'] = Gyr_metrics['activity'].str.replace(' ', '')

  return Gyr_metrics

In [None]:
Gyr_metrics_df = pd.DataFrame()

for df in allGyrDf:
    Gyr_metrics_df = Gyr_metrics_df.append(getMetricsGyr(df), ignore_index=True)


**Metrics for Orientation**

In [24]:
def getMetricsOri(df):
  Ori_metrics_df = pd.DataFrame()

  metrics = {}  # Dictionary to store the metrics for each dataframe
  
  # Calculate and store the metrics for the current dataframe
  metrics['mean_qz'] = df['qz'].mean()
  metrics['sum_qz'] = df['qz'].sum()
  metrics['var_qz'] = df['qz'].var()
  metrics['std_qz'] = df['qz'].std()

  metrics['mean_qy'] = df['qy'].mean()
  metrics['sum_qy'] = df['qy'].sum()
  metrics['var_qy'] = df['qy'].var()
  metrics['std_qy'] = df['qy'].std()

  metrics['mean_qx'] = df['qx'].mean()
  metrics['sum_qx'] = df['qx'].sum()
  metrics['var_qx'] = df['qx'].var()
  metrics['std_qx'] = df['qx'].std()

  metrics['mean_qw'] = df['qw'].mean()
  metrics['sum_qw'] = df['qw'].sum()
  metrics['var_qw'] = df['qw'].var()
  metrics['std_qw'] = df['qw'].std()

  metrics['mean_roll'] = df['roll'].mean()
  metrics['sum_roll'] = df['roll'].sum()
  metrics['var_roll'] = df['roll'].var()
  metrics['std_roll'] = df['roll'].std()

  metrics['mean_pitch'] = df['pitch'].mean()
  metrics['sum_pitch'] = df['pitch'].sum()
  metrics['var_pitch'] = df['pitch'].var()
  metrics['std_pitch'] = df['pitch'].std()

  metrics['mean_yaw'] = df['yaw'].mean()
  metrics['sum_yaw'] = df['yaw'].sum()
  metrics['var_yaw'] = df['yaw'].var()
  metrics['std_yaw'] = df['yaw'].std()

  # Add more metrics as needed
  
  # Append the label column from the current dataframe to the metrics dictionary
  metrics['activity'] = df['activity']
  
  # Append the metrics dictionary as a new row to the metrics dataframe
  Ori_metrics_df = Ori_metrics_df.append(metrics, ignore_index=True)
  #change activity to string
  Ori_metrics_df['activity'] = Ori_metrics_df['activity'].astype(str)
  #short string to 21 characters
  Ori_metrics_df['activity'] = Ori_metrics_df['activity'].str[:21]
  #delete all numbers from string
  Ori_metrics_df['activity'] = Ori_metrics_df['activity'].str.replace('\d+', '')
  #detelte all empty spaces
  Ori_metrics_df['activity'] = Ori_metrics_df['activity'].str.replace(' ', '')

  return Ori_metrics_df

In [None]:
Ori_metrics_df = pd.DataFrame()

for df in allOriDf:
    Ori_metrics_df = Ori_metrics_df.append(getMetricsOri(df), ignore_index=True)

In [26]:
print(Acc_metrics_df.dtypes)


mean_z      float64
sum_z       float64
var_z       float64
std_z       float64
mean_y      float64
sum_y       float64
var_y       float64
std_y       float64
mean_x      float64
sum_x       float64
var_x       float64
std_x       float64
activity     object
dtype: object


In [None]:
Acc_metrics_df.head()

# Plot Data

In [None]:
df_push_Gyr.plot(x='time', y='z', kind='line')
df_push_Gyr.plot(x='time', y='y', kind='line')
df_push_Gyr.plot(x='time', y='x', kind='line')

In [None]:
df_JJ3.plot(x='time', y='z', kind='line')

In [None]:
#Plot Walk
plotDataGyr(df_walk_Gyr, 'Gyroscope Walk')

#Plot PushUps
plotDataGyr(df_push_Gyr, 'Gyroscope PushUps')

#Plot JumpingJacks
plotDataGyr(df_JJ_Gyr, 'Gyroscope JumpingJacks')

In [None]:
#Plotting Accelerometer Data

#Plot Walk
plotDataAcc(df_walk_Acc, 'Accelerometer Walk')

#Plot PushUps
plotDataAcc(df_push_Acc, 'Accelerometer PushUps')

#Plot JumpingJacks
plotDataAcc(df_JJ_Acc, 'Accelerometer JumpingJacks')

In [None]:
#Plot Walk
plotDataOri(df_walk_Ori, 'Orientation Walk')

#Plot PushUps
plotDataOri(df_push_Ori, 'Orientation PushUps')

#Plot JumpingJacks
plotDataOri(df_JJ_Ori, 'Orientation JumpingJacks')
    

# **KNN Classifikation**
1. Label Data
2. Concat Dataframes
3. Split Data
4. Train Model
5. Predict
6. Evaluate

### 1. Label Data

In [None]:
#Classification with KNN
#Function to label the data
def getData(data, label):
    data['label'] = label
    return data

#Label Data
#Label Data Walk
df_walk1 = getData(df_walk1, 'walk')
df_walk2 = getData(df_walk2, 'walk')
df_walk3 = getData(df_walk3, 'walk')
df_walk4 = getData(df_walk4, 'walk')


#Label Data PushUps
df_push1 = getData(df_push1, 'pushups')
df_push2 = getData(df_push2, 'pushups')
df_push3 = getData(df_push3, 'pushups')
df_push4 = getData(df_push4, 'pushups')
df_push5 = getData(df_push5, 'pushups')

#Label Data JumpingJacks
df_JJ1 = getData(df_JJ1, 'jumpingjacks')
df_JJ2 = getData(df_JJ2, 'jumpingjacks')
df_JJ3 = getData(df_JJ3, 'jumpingjacks')
df_JJ4 = getData(df_JJ4, 'jumpingjacks')
df_JJ5 = getData(df_JJ5, 'jumpingjacks')

In [None]:
#plot json data
df_push4.head(10)

### 2. Concat Dataframes

In [None]:
#Concat DataFrames
df = pd.concat([df_walk1, df_push1, df_JJ1], axis=0)
df_test = pd.concat([df_JJ2, df_push2, df_walk2], axis=0)

#Short Df_test for predciton
df_test= df_test.head(1517)



#Fill NaN Values with propagation
df.fillna(method='ffill', inplace=True)
df_test.fillna(method='ffill', inplace=True)



#kein random split sondern spezifizieren
#erste 70% train, letzte 30% test
#fünf trainings df und quasi im train nur die ersten 3 und die zwei anderen dann als test

In [None]:
#Concat all Dataframes into one
df = pd.concat([df_walk1, df_walk2, df_push1, df_push2, df_push3, df_JJ1, df_JJ2, df_JJ3], axis=0)
df_test = pd.concat([df_walk3, df_walk4, df_push4, df_push5, df_JJ4, df_JJ5], axis=0)

In [None]:
#Fill NaN Values with propagation
df.fillna(method='bfill', inplace=True)
df_test.fillna(method='bfill', inplace=True)

In [None]:
df.head(10)

In [None]:
print('Test: ', len(df_test))
print('Df: ', len(df))

df_test.drop(['label'], axis=1, inplace=True)

ToDo: Marvin
#gemischten Datensatz vlt mal testen, weiß noch nicht genau wie ich den einbauen soll

In [None]:
#df_test.head(10)
df_test.describe()

In [137]:
#Concat metrics dataframes
df = pd.concat([Acc_metrics_df, Gyr_metrics_df, Ori_metrics_df], axis=1)

### 3. Split Data

In [142]:
#Split Data erste 70% train, letzte 30% test
X = df.drop(['activity'], axis=1)
y = df['activity']
y = y.loc[:, ~y.columns.duplicated()]

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle= True, test_size=0.3)

In [None]:
df.drop(['sensor'], axis=1, inplace=True)

# muss die daten aggregieren und zeitfenster statt zeitpunkte nehmen 
# damit kann ich dann die daten klassifizieren 
# große Dataframes in kleine Dataframes splitten und dann die kleinen Dataframes klassifizieren


In [None]:
#Show Training Data
def showXTrainingData(data):
  print(data.shape)
  print(data.columns)
  plt.plot(data['z'], color='red', marker='.')#
  plt.plot(data['y'], color='blue', marker='.')#
  plt.plot(data['x'], color='green', marker='.')#
  plt.plot(data['qz'], color='yellow', marker='.')#
  plt.plot(data['qy'], color='black', marker='.')#
  plt.plot(data['qx'], color='orange', marker='.')#
  plt.plot(data['qw'], color='pink', marker='.')#
  plt.plot(data['roll'], color='purple', marker='.')#
  plt.xlabel('time')
  plt.ylabel('x,y,z,qz,qy,qx,qw,roll')
  plt.legend(['z', 'y', 'x', 'qz', 'qy', 'qx', 'qw', 'roll'])
  plt.title('Train Data')
  plt.show()

showXTrainingData(X_train)


### 4. Train Model

In [143]:
#KNN
knn = KNeighborsClassifier(n_neighbors=3, weights='distance')
knn.fit(X_train, y_train)

#shuffle data and then split

  return self._fit(X, y)


### 5. Predict

In [144]:
y_pred = knn.predict(X_test)
print(len(y_test))

5


### 6. Evaluate

In [145]:
#Confusion Matrix
#label confustion matrix

cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix: \n', cm)

#Accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy: ', accuracy * 100, '%')

#Recall
recall = recall_score(y_test, y_pred, average='weighted')
print('Recall: ', recall * 100, '%')

Confusion Matrix: 
 [[2 0]
 [3 0]]
Accuracy:  40.0 %
Recall:  40.0 %


**Export Model into Pickle**

In [None]:
#export the model with pickle
filename = 'knn.pickle'
pkl.dump(knn, open(filename, 'wb'))


# Sonstiges

In [None]:
#take the uploaded file and load it into a dataframe and then apply the model to it
#df = pd.read_csv('uploaded_file.csv')
#model = pickle.load(open('knn.pkl', 'rb'))
#model.predict(df)


Lösung von ChatGpt, leider nicht so einfach...

In [None]:
# Daten laden
training_data = df.drop(['label'], axis=1)
training_labels = df['label']

# Sensordaten in eine zweidimensionale Form umwandeln
training_data = training_data.reshape(training_data.shape[0], -1)


# KNN-Modell erstellen und trainieren
knn = KNeighborsClassifier(n_neighbors=3, weights='distance')
knn.fit(training_data, training_labels)

# Sensordaten für eine neue Instanz
new_data = df_test
new_data = new_data.reshape(1, -1)

# Vorhersage für die neue Instanz
prediction = knn.predict([new_data])

# Wahrscheinlichkeiten der Vorhersagen
probabilities = knn.predict_proba([new_data])

# Index der vorhergesagten Klasse
predicted_class_index = prediction[0]

# Wahrscheinlichkeit der vorhergesagten Klasse
predicted_class_probability = probabilities[0][predicted_class_index]

# Klassenbezeichnungen
class_names = ['Jumping Jacks', 'Pushups', 'Laufen']

# Ergebnis ausgeben
result = f"Mit {predicted_class_probability*100:.2f}% Sicherheit hat die Person {class_names[predicted_class_index]} gemacht."
print(result)
