# Acceleration Classification

## 1. Data load and Preprocessing

### - Extract Features and Labels
Relevant Features for Acceleration:
- accelerometerXAxis
- accelerometerYAxis
- accelerometerZAxis
- speedKmh
<br></br>
- timestamp ?
- gyroscope ?

In [655]:
import os
import json
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np



from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.saving import save_model

data_bmw = []
data_honda = []
labels_bmw = []
labels_honda = []

prepared_data = []
prepared_labels = []

scaler = MinMaxScaler(feature_range=(0, 1))

In [656]:
# Get the current directory path
current_dir = os.getcwd()

# Go up two directories from the current directory
root_dir = os.path.abspath(os.path.join(current_dir, os.pardir, os.pardir))

# Now join the desired directory ('Datasets') to the path that is two levels up
datasets_dir = os.path.join(root_dir, 'Datasets')

# Iterate over all files in the directory
for root, dirs, files in os.walk(datasets_dir):
    for file in files:
        if file.endswith('.json'):
            car = os.path.basename(root).split()[0].upper()            
            label = os.path.basename(os.path.dirname(root))
            
            file_data = json.load(open(os.path.join(root, file)))
            file_data = file_data['capturedData']
                                    
            # Convert to dataframe
            file_data = pd.DataFrame(file_data)           
            
            # Drop unnecessary columns
            file_data = file_data.drop(['id', 'Latitude', 'Longitude', 'gyroscopeXAxis', 'gyroscopeYAxis', 'gyroscopeZAxis'], axis=1)
            
            # Rename speed Km/h to speed
            file_data.rename(columns={'speed Km/h': 'speed'}, inplace=True)
            file_data.rename(columns={'speedKmh': 'speed'}, inplace=True)
            
            # Drop timestamp
            file_data = file_data.drop(['createdAt'], axis=1, errors='ignore')
            file_data = file_data.drop(['timestamp'], axis=1, errors='ignore')                    
            
            if car == 'BMW':
                data_bmw.append(file_data.copy())
                labels_bmw.append(label)
            elif car == 'HONDA':
                data_honda.append(file_data.copy())
                labels_honda.append(label)            

#check for NaNs
if data_bmw[0].isnull().values.any() or data_honda[0].isnull().values.any():
    print('NaNs in data')
else:
    print('NO NaNs in data')

NO NaNs in data


### - Feature Engineering
New Features:
- Speed (Calculated by accelerometerY)
- Accumulated Acceleration
- Distance moved ?

In [657]:
# # calculate speed with accelerometerX (in g force)
# # Starts with speed 0 and adds the acceleration with accelaration formula in each time step

# # create SpeedCalc column
# for i in range(len(data_bmw)):
#     data_bmw[i]['SpeedCalc'] = 0.0

# # sensor reading 4 times per second
# dt = 0.25

# # calculate speed using accelerometerY
# for i in range(len(data_bmw)):
#     for j in range(len(data_bmw[i])):
#         if j == 0:
#             data_bmw[i].at[j, 'SpeedCalc'] = data_bmw[i].at[j, 'accelerometerYAxis']*10 * dt
#         else:
#             if data_bmw[i].at[j, 'accelerometerYAxis'] < 0:
#                 accel = 0
#             else:
#                 accel = data_bmw[i].at[j, 'accelerometerYAxis']*10

#             current_speed = data_bmw[i].at[j-1, 'SpeedCalc']
#             data_bmw[i].at[j, 'SpeedCalc'] += (accel * dt) + current_speed
#             print('Speedometer: ', data_bmw[i].at[j, 'speed'], 'SpeedCalc: ', data_bmw[i].at[j, 'SpeedCalc'], 'Difference: ', data_bmw[i].at[j, 'SpeedCalc'] - data_bmw[i].at[j, 'speed'], 'Accel: ', data_bmw[i].at[j, 'accelerometerYAxis'])
#     print('')    


### - Normalize and Scale Features

In [658]:
# Normalize Each car
concat_data_bmw = pd.concat(data_bmw)
concat_data_honda = pd.concat(data_honda)

print('Concat Data BMW: ')
print(concat_data_bmw.values[1])

# Choose columns to normalize
concat_data_bmw = concat_data_bmw[['accelerometerXAxis', 'accelerometerYAxis', 'accelerometerZAxis', 'speed']]
concat_data_honda = concat_data_honda[['accelerometerXAxis', 'accelerometerYAxis', 'accelerometerZAxis', 'speed']]

# Normalize
scaled_data_bmw = scaler.fit_transform(concat_data_bmw)
print('Scaled Data BMW: ')
print(scaled_data_bmw)

scaled_data_honda = scaler.fit_transform(concat_data_honda)
print('Scaled Data Honda: ')
print(scaled_data_honda)

# Substitute from data_bmw and data_honda to normalized data in prepared_data
for i in range(len(data_bmw)):
    data_bmw[i] = pd.DataFrame(scaled_data_bmw[i*len(data_bmw[i]):(i+1)*len(data_bmw[i])], columns=['accelerometerXAxis', 'accelerometerYAxis', 'accelerometerZAxis', 'speed'])
    prepared_data.append(data_bmw[i])

for i in range(len(data_honda)):
    data_honda[i] = pd.DataFrame(scaled_data_honda[i*len(data_honda[i]):(i+1)*len(data_honda[i])], columns=['accelerometerXAxis', 'accelerometerYAxis', 'accelerometerZAxis', 'speed'])
    prepared_data.append(data_honda[i])



print('Aggregated Data: ')
print(prepared_data)

prepared_labels = labels_bmw + labels_honda
print('Labels: ')
print(prepared_labels)

Concat Data BMW: 
[0.23034243 0.17552605 0.12298107 0.        ]
Scaled Data BMW: 
[[0.42432991 0.59656404 0.72300274 0.        ]
 [0.43609964 0.5425548  0.73695225 0.        ]
 [0.42231019 0.69527125 0.69672298 0.        ]
 ...
 [0.24617761 0.53066339 0.81379612 0.95426202]
 [0.36474162 0.48877002 0.740912   0.95426202]
 [0.47802334 0.4854856  0.63905244 0.95426202]]
Scaled Data Honda: 
[[0.74303088 0.44561212 0.51776463 0.        ]
 [0.80168045 0.49898038 0.52469522 0.        ]
 [0.80317997 0.50692158 0.47638032 0.00923695]
 ...
 [0.76952134 0.38315083 0.60441686 0.93172695]
 [0.79364197 0.42399552 0.59923704 0.93172695]
 [0.73517428 0.40522551 0.44928707 0.93172695]]
Aggregated Data: 
[    accelerometerXAxis  accelerometerYAxis  accelerometerZAxis     speed
0             0.424330            0.596564            0.723003  0.000000
1             0.436100            0.542555            0.736952  0.000000
2             0.422310            0.695271            0.696723  0.000000
3          

In [659]:
time_steps = 30

X = prepared_data
y = prepared_labels

# Encode the labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

#Pad sequences to the maximum length
X_padded = pad_sequences(X, maxlen=time_steps, padding='post', truncating='post', dtype='float32')
print('Padded Data: ')
print(X_padded.shape)

# Train test split
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

Padded Data: 
(60, 30, 4)


In [660]:
# LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(time_steps, 4)))
model.add(Dense(3, activation='softmax'))  # 3 output classes

# Compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=2000, batch_size=32, validation_split=0.1)

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")

save_model(model, 'model.keras')


  super().__init__(**kwargs)


Epoch 1/2000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 367ms/step - accuracy: 0.2953 - loss: 1.0975 - val_accuracy: 0.4000 - val_loss: 1.0976
Epoch 2/2000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.3626 - loss: 1.0868 - val_accuracy: 0.4000 - val_loss: 1.1244
Epoch 3/2000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.3990 - loss: 1.0850 - val_accuracy: 0.2000 - val_loss: 1.1516
Epoch 4/2000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.4198 - loss: 1.0824 - val_accuracy: 0.2000 - val_loss: 1.1737
Epoch 5/2000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.3886 - loss: 1.0682 - val_accuracy: 0.2000 - val_loss: 1.1914
Epoch 6/2000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.3781 - loss: 1.0616 - val_accuracy: 0.2000 - val_loss: 1.2200
Epoch 7/2000
[1m2/2[0m [32m━━━