In [45]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import keras as keras
from keras.models import Sequential
from keras.layers import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from os import listdir
import math

Read every dataset in the folder

In [46]:
files = listdir('datasets')

datasets = []
datasets_train = []
datasets_test = []

def add_activity(name, df):
    if 'still' in name:
        df['activity'] = 'still'
    elif 'walking' in name:
        df['activity'] = 'walking'
    else:
        df['activity'] = 'running'
    return df

def split_data(df):
    num_rows = df.shape[0]
    num_test = math.floor(num_rows / 3)
    num_train = num_rows - num_test

    df_train = df.iloc[:num_train, :]
    df_test = df.iloc[num_train:, :]

    datasets_train.append(df_train)
    datasets_test.append(df_test)

    print(df_train.shape)
    print(df_test.shape)


for file in files:
    df = add_activity(file, pd.read_csv(f'datasets/{file}'))
    datasets.append(df)
    print("Dimensione dataset {} ".format(file), df.shape)

for df in datasets:
    split_data(df)

df_train = pd.concat(datasets_train, ignore_index=True)
df_test = pd.concat(datasets_test, ignore_index=True)

print("Dimensione merged dataset train ", df_train.shape)
print("Dimensione merged dataset test ", df_test.shape)



Dimensione dataset running_16_02_2023_15_12_04.csv  (130, 4)
Dimensione dataset still_16_02_2023_15_09_23.csv  (57, 4)
Dimensione dataset walking_16_02_2023_15_11_18.csv  (158, 4)
(87, 4)
(43, 4)
(38, 4)
(19, 4)
(106, 4)
(52, 4)
Dimensione merged dataset train  (231, 4)
Dimensione merged dataset test  (114, 4)


Normalizzazione dei dati

In [47]:
normalizer = MinMaxScaler()
num_columns = df_train.shape[1]

normalizer.fit(df_train.iloc[:, 0 : num_columns-1])

def normalize_df(df):
    
    norm_df = df.copy()
    norm_column = normalizer.transform(df.iloc[:, 0 : num_columns-1])
    norm_df.iloc[:, 0 : num_columns-1] = norm_column

    return norm_df

norm_train_df = normalize_df(df_train)
norm_test_df = normalize_df(df_test)


# fig, ((ax1, ax2),( ax3, ax4), (ax5, ax6)) = plt.subplots(3,2)
# ax1.plot(np.arange(0, len(df_train.xa)), df_train.xa)
# ax3.plot(np.arange(0, len(df_train.ya)), df_train.ya)
# ax5.plot(np.arange(0, len(df_train.za)), df_train.za)

# ax2.plot(np.arange(0, len(norm_train_df.xa)), norm_train_df.xa)
# ax4.plot(np.arange(0, len(norm_train_df.ya)), norm_train_df.ya)
# ax6.plot(np.arange(0, len(norm_train_df.za)), norm_train_df.za)

# fig, ((ax1, ax2),( ax3, ax4), (ax5, ax6)) = plt.subplots(3,2)
# ax1.plot(np.arange(0, len(df_test.xa)), df_test.xa)
# ax3.plot(np.arange(0, len(df_test.ya)), df_test.ya)
# ax5.plot(np.arange(0, len(df_test.za)), df_test.za)

# ax2.plot(np.arange(0, len(norm_test_df.xa)), norm_test_df.xa)
# ax4.plot(np.arange(0, len(norm_test_df.ya)), norm_test_df.ya)
# ax6.plot(np.arange(0, len(norm_test_df.za)), norm_test_df.za)

norm_train_df

Unnamed: 0,xa,ya,za,activity
0,0.559771,0.579491,0.620094,running
1,0.577511,0.452969,0.797305,running
2,0.278930,0.677610,0.724956,running
3,0.200055,0.753965,0.755126,running
4,0.697052,0.390631,0.651142,running
...,...,...,...,...
226,0.613537,0.829583,0.548916,walking
227,0.614083,0.847658,0.512595,walking
228,0.627456,0.808558,0.456649,walking
229,0.603166,0.806345,0.453134,walking


Resize datasets

In [48]:
def reshape_data(df, time_steps, step):
    Xs = []
    ys = []

    num_cols = df.shape[1]

    X = df.iloc[:, :num_cols - 1]
    y = df.iloc[:, num_cols - 1:]

    print(X.shape)

    for i in range(0, len(X) - time_steps, step):
        values = X.iloc[i : (i+time_steps)].values
        labels = y.iloc[i : i + time_steps]
        Xs.append(values)
        ys.append(labels.mode()['activity'][0])
    
    return np.array(Xs), np.array(ys).reshape(-1, 1)


X_train , y_train = reshape_data(norm_train_df, 10, 10)
X_test , y_test = reshape_data(norm_test_df, 10, 10)


(231, 3)
(114, 3)


OneHotEncoder per la y

In [49]:
def encode_y(y):
    encoder = OneHotEncoder(sparse_output=False)

    encoded_y = encoder.fit_transform(y)
    return encoded_y

encoded_y_train = encode_y(y_train)
encoded_y_test = encode_y(y_test)

print(encoded_y_train.shape)
    

(23, 3)


In [50]:
# stampa delle dimensioni

print(X_train.shape)
print(X_test.shape)

print(encoded_y_train.shape)
print(encoded_y_test.shape)
encoded_y_train

(23, 10, 3)
(11, 10, 3)
(23, 3)
(11, 3)


array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]])

# Modello

In [58]:
model = Sequential()
# model.add(Dense(256, input_shape=[X_train.shape[1], X_train.shape[2]], activation='relu'))
model.add(LSTM(units=128, input_shape=[X_train.shape[1], X_train.shape[2]]))
model.add(Dropout(rate=0.3))
model.add(BatchNormalization())
model.add(Dense(256, activation = 'relu'))
model.add(Dropout(rate=0.3))
model.add(BatchNormalization())
model.add(Dense(256, activation = 'relu'))
model.add(Dropout(rate=0.3))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(encoded_y_train.shape[1], activation = 'softmax'))
print(model.summary())


Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 128)               67584     
                                                                 
 dropout_18 (Dropout)        (None, 128)               0         
                                                                 
 batch_normalization_18 (Bat  (None, 128)              512       
 chNormalization)                                                
                                                                 
 dense_23 (Dense)            (None, 256)               33024     
                                                                 
 dropout_19 (Dropout)        (None, 256)               0         
                                                                 
 batch_normalization_19 (Bat  (None, 256)              1024      
 chNormalization)                                     

Modello - ottimizzatore e funzione di perdita

In [59]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)

Early stopping per evitare overfitting

In [60]:
early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)

In [61]:
history = model.fit(
    X_train, encoded_y_train,
    validation_split=0.1,
    batch_size=3,
    epochs=20,
    callbacks=[early_stopping],
    shuffle=False
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [56]:
model.evaluate(X_test, encoded_y_test)



[1.1291143894195557, 0.3636363744735718]