In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import keras as keras
from keras.models import Sequential
from keras.layers import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from os import listdir
import math

Read every dataset in the folder

In [None]:
files = listdir('datasets')

datasets = []
datasets_train = []
datasets_test = []

def add_activity(name, df):
    if 'still' in name:
        df['activity'] = 'still'
    elif 'walking' in name:
        df['activity'] = 'walking'
    else:
        df['activity'] = 'running'
    return df

def split_data(df):
    num_rows = df.shape[0]
    num_test = math.floor(num_rows / 3)
    num_train = num_rows - num_test

    df_train = df.iloc[:num_train, :]
    df_test = df.iloc[num_train:, :]

    datasets_train.append(df_train)
    datasets_test.append(df_test)

    print(df_train.shape)
    print(df_test.shape)


for file in files:
    df = add_activity(file, pd.read_csv(f'datasets/{file}'))
    datasets.append(df)
    print("Dimensione dataset {} ".format(file), df.shape)

for df in datasets:
    split_data(df)

df_train = pd.concat(datasets_train, ignore_index=True)
df_test = pd.concat(datasets_test, ignore_index=True)

print("Dimensione merged dataset train ", df_train.shape)
print("Dimensione merged dataset test ", df_test.shape)



Normalizzazione dei dati

In [49]:
normalizer = MinMaxScaler()
num_columns = df_train.shape[1]

normalizer.fit(df_train.iloc[:, 0 : num_columns-1])

def normalize_df(df):
    
    norm_df = df.copy()
    norm_column = normalizer.transform(df.iloc[:, 0 : num_columns-1])
    norm_df.iloc[:, 0 : num_columns-1] = norm_column

    return norm_df

norm_train_df = normalize_df(df_train)
norm_test_df = normalize_df(df_test)


# fig, ((ax1, ax2),( ax3, ax4), (ax5, ax6)) = plt.subplots(3,2)
# ax1.plot(np.arange(0, len(df_train.xa)), df_train.xa)
# ax3.plot(np.arange(0, len(df_train.ya)), df_train.ya)
# ax5.plot(np.arange(0, len(df_train.za)), df_train.za)

# ax2.plot(np.arange(0, len(norm_train_df.xa)), norm_train_df.xa)
# ax4.plot(np.arange(0, len(norm_train_df.ya)), norm_train_df.ya)
# ax6.plot(np.arange(0, len(norm_train_df.za)), norm_train_df.za)

# fig, ((ax1, ax2),( ax3, ax4), (ax5, ax6)) = plt.subplots(3,2)
# ax1.plot(np.arange(0, len(df_test.xa)), df_test.xa)
# ax3.plot(np.arange(0, len(df_test.ya)), df_test.ya)
# ax5.plot(np.arange(0, len(df_test.za)), df_test.za)

# ax2.plot(np.arange(0, len(norm_test_df.xa)), norm_test_df.xa)
# ax4.plot(np.arange(0, len(norm_test_df.ya)), norm_test_df.ya)
# ax6.plot(np.arange(0, len(norm_test_df.za)), norm_test_df.za)

norm_train_df

Unnamed: 0,xa,ya,za,activity
0,0.559771,0.579491,0.620094,running
1,0.577511,0.452969,0.797305,running
2,0.278930,0.677610,0.724956,running
3,0.200055,0.753965,0.755126,running
4,0.697052,0.390631,0.651142,running
...,...,...,...,...
226,0.613537,0.829583,0.548916,walking
227,0.614083,0.847658,0.512595,walking
228,0.627456,0.808558,0.456649,walking
229,0.603166,0.806345,0.453134,walking


Resize datasets

In [52]:
from scipy import stats
num_cols = norm_train_df.shape[1]

X = df.iloc[:, :num_cols - 1]
y = df.iloc[:, num_cols - 1:]


def reshape_data(df, time_steps, step):
    Xs = []
    ys = []

    X = df.iloc[:, :num_cols - 1]
    y = df.iloc[:, num_cols - 1:]

    for i in range(0, len(X) - time_steps, step):
        values = X.iloc[i : (i+time_steps)].values
        labels = y.iloc[i : i + time_steps]
        Xs.append(values)
        ys.append(stats.mode(labels)[0][0])
    
    return np.array(Xs), np.array(ys).reshape(-1, 1)

OneHotEncoder per la y

In [None]:
def encode_y(df):
    encoder = OneHotEncoder(sparse_output=False)

    encoded_y = encoder.fit_transform(df.activity.to_numpy().reshape(-1, 1))

    df[encoder.categories_[0]] = encoded_y
    encoded_df = df.drop('activity', axis=1)
    return encoded_df

encoded_train_df = encode_y(norm_train_df)
encoded_test_df = encode_y(norm_test_df)

print(encoded_train_df)
print(encoded_test_df)
    

Separate X and Y

In [None]:
num_cols = encoded_train_df.shape[1]
num_cols_y = 3
num_cols_x = num_cols - num_cols_y

X_train = encoded_train_df.iloc[:, :num_cols_x]
Y_train = encoded_train_df.iloc[:, num_cols_x:]
X_test = encoded_test_df.iloc[:, :num_cols_x]
Y_test = encoded_test_df.iloc[:, num_cols_x:]

# Modello

In [None]:
model = Sequential()
model.add(Dense(256, input_shape=(11,), activation='relu'))
model.add(Dropout(rate=0.3))
model.add(BatchNormalization())
model.add(Dense(256, activation = 'relu'))
model.add(Dropout(rate=0.3))
model.add(BatchNormalization())
model.add(Dense(256, activation = 'relu'))
model.add(Dropout(rate=0.3))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(3, activation = 'softmax'))
print(model.summary())


Modello - ottimizzatore e funzione di perdita

In [None]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)

Early stopping per evitare overfitting

In [None]:
early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)

In [None]:
history = model.fit(
    X_train, Y_train,
    validation_data=(X_test, Y_test),
    batch_size=512,
    epochs=100,
    callbacks=[early_stopping],
)