# Acceleration Classification

## 1. Data load and Preprocessing

### - Extract Features and Labels
Relevant Features for Acceleration:
- accelerometerXAxis
- accelerometerYAxis
- accelerometerZAxis
- speedKmh
<br></br>
- timestamp ?
- gyroscope ?

In [None]:
import os
import json
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np



from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.saving import save_model

data_bmw = []
data_honda = []
labels_bmw = []
labels_honda = []

prepared_data = []
prepared_labels = []

scaler = MinMaxScaler(feature_range=(0, 1))

# Get the current directory path
current_dir = os.getcwd()

# Go up two directories from the current directory
root_dir = os.path.abspath(os.path.join(current_dir, os.pardir, os.pardir))

# Now join the desired directory ('Datasets') to the path that is two levels up
datasets_dir = os.path.join(root_dir, 'Datasets')

accel_dir = os.path.join(datasets_dir, 'Acceleration')

# Iterate over all files in the directory
for root, dirs, files in os.walk(accel_dir):
    for file in files:
        if file.endswith('.json'):
            car = os.path.basename(root).split()[0].upper()            
            label = os.path.basename(os.path.dirname(root))
            # print(f'Processing {car} {label} {file}...')
            
            file_data = json.load(open(os.path.join(root, file)))
            file_data = file_data['capturedData']
                                    
            # Convert to dataframe
            file_data = pd.DataFrame(file_data)           
            
            # Drop unnecessary columns
            file_data = file_data.drop(['id', 'gyroscopeXAxis', 'gyroscopeYAxis', 'gyroscopeZAxis'], axis=1)
            
            # Rename speed Km/h to speed
            file_data.rename(columns={'speed Km/h': 'speed'}, inplace=True)
            file_data.rename(columns={'speedKmh': 'speed'}, inplace=True)
            
            # Drop timestamp
            file_data = file_data.drop(['createdAt'], axis=1, errors='ignore')
            file_data = file_data.drop(['timestamp'], axis=1, errors='ignore')                    
            
            if car == 'BMW':
                data_bmw.append(file_data.copy())
                labels_bmw.append(label)
            elif car == 'HONDA':
                data_honda.append(file_data.copy())
                labels_honda.append(label)            

#check for NaNs
if data_bmw[0].isnull().values.any() or data_honda[0].isnull().values.any():
    print('NaNs in data')
else:
    print('NO NaNs in data')
    
# add data_bmw and data_honda to dataset
dataset = data_bmw + data_honda
dataset_labels = labels_bmw + labels_honda

In [None]:
import numpy as np
import pandas as pd

# Define window size and overlap
window_size = 4
overlap = 2

# Create sequences and labels
sequences = []
labels = []

for i in range(len(dataset)):
    for j in range(0, len(dataset[i]) - window_size + 1, overlap):
        sequence = dataset[i].iloc[j:j+window_size].values
        sequences.append(sequence)
        
        # Calculate mean of accelerometerYAxis in the current window
        mean = np.mean(dataset[i][j:j+window_size]['accelerometerYAxis'])
        
        # Determine label based on the mean value
        if mean > 0.5:
            label = 'aggressive'
        elif mean > 0.2:
            label = 'normal'
        else:
            label = 'slow'
        
        labels.append(label)

sequences = np.array(sequences)
labels = np.array(labels)

print('Sequences:')
print(sequences)
print('Labels:')
print(labels)

# Save sequences and labels to .npy files
np.save('saves/sequences.npy', sequences)
np.save('saves/labels.npy', labels)

# Save dataset and labels to .pkl file
# import pickle
# with open('dataset.pkl', 'wb') as f:
#     pickle.dump((sequences, labels), f)


### - Feature Engineering
New Features:
- Speed (Calculated by accelerometerY)
- Accumulated Acceleration
- Distance moved ?

In [None]:
pass

### - Normalize and Scale Features

In [None]:
# # Normalize Each car
# concat_data_bmw = pd.concat(data_bmw)
# concat_data_honda = pd.concat(data_honda)

# print('Concat Data BMW: ')
# print(concat_data_bmw.values[1])

# # Choose columns to normalize
# concat_data_bmw = concat_data_bmw[['accelerometerXAxis', 'accelerometerYAxis', 'accelerometerZAxis', 'speed']]
# concat_data_honda = concat_data_honda[['accelerometerXAxis', 'accelerometerYAxis', 'accelerometerZAxis', 'speed']]

# # Normalize
# scaled_data_bmw = scaler.fit_transform(concat_data_bmw)
# print('Scaled Data BMW: ')
# print(scaled_data_bmw)

# scaled_data_honda = scaler.fit_transform(concat_data_honda)
# print('Scaled Data Honda: ')
# print(scaled_data_honda)

# # Substitute from data_bmw and data_honda to normalized data in prepared_data
# for i in range(len(data_bmw)):
#     data_bmw[i] = pd.DataFrame(scaled_data_bmw[i*len(data_bmw[i]):(i+1)*len(data_bmw[i])], columns=['accelerometerXAxis', 'accelerometerYAxis', 'accelerometerZAxis', 'speed'])
#     prepared_data.append(data_bmw[i])

# for i in range(len(data_honda)):
#     data_honda[i] = pd.DataFrame(scaled_data_honda[i*len(data_honda[i]):(i+1)*len(data_honda[i])], columns=['accelerometerXAxis', 'accelerometerYAxis', 'accelerometerZAxis', 'speed'])
#     prepared_data.append(data_honda[i])



# print('Aggregated Data: ')
# print(prepared_data)

# prepared_labels = labels_bmw + labels_honda
# print('Labels: ')
# print(prepared_labels)