In [2]:
import os
import numpy as np
import pandas as pd
import time
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler 
from sklearn.model_selection import train_test_split
from sklearn import metrics
import scipy.stats as stats

import tensorflow.keras as keras
import tensorflow as tf

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

In [3]:
screens = ['Focus', 'Mathisis', 'Memoria', 'Reacton', 'Speedy']
screens_code = ['1', '2', '3', '4', '5']

base_path = "/home/joaoneto/biometria/sensors"
phone_accel_file_paths = []
phone_gyro_file_paths = []

for directories, subdirectories, files in os.walk(base_path):
    for filename in files:
        if "accel" in filename:
            phone_accel_file_paths.append(f"{base_path}/accel/{filename}")
            
data = pd.concat(map(pd.read_csv, phone_accel_file_paths))

ValueError: No objects to concatenate

In [None]:
data

In [None]:
users = data['player_id'].unique()

In [None]:
def load_data_1():
    train = np.empty((0, frame_size, 3))
    user_list = []
    frame_size = 500
    step = 50

    for user in users:
        data_user = data[data['player_id']==user]  
        data_user = data_user.iloc[:,[0,1,2]]
        data_user = data_user[500:-500]
        data_user = data_user.values
        data_user = data_user.astype('float32')
        frames = [data_user[i:i+frame_size, :] for i in range(0,data_user.shape[0]-frame_size,step)]
        user_list.extend([user]*len(frames))           
        frames = np.dstack(frames)
        frames = np.rollaxis(frames,-1)
        train = np.vstack((train, frames))
        

    return train, user_list

In [None]:
def load_data_2():
    train = []
    frame_size = 500
    step = 50

    for user in users:
        data_user = data[data['player_id']==user]  
        data_user = data_user.iloc[:,[0,1,2]]
        for w in range(0, data_user.shape[0] - frame_size, step):
            end = w + frame_size        
            frame = data_user.iloc[w:end,[0, 1, 2]]        
            train.append(frame)

    return train

In [None]:
def load_data_3():
    data['session'] = data['player_id'] + "_" + data['timestamp'].apply(str)
    
    counts = data['session'].value_counts()
    counts = counts[counts >= 128]
    counts_list = list(counts.keys())
    df = data[data.session.isin(counts_list) == True]
    
    for idx, val in enumerate(screens):
        df.loc[df.screen.str.contains(screens[idx]), 'screen'] = screens_code[idx]
        
    win_count = 0
    total_win_count = 0
    range_screen = range(1, 6)
    raw_signal = df
    axis_list = ['x_accel', 'y_accel', 'z_accel']
    user_list = []
    window_size = 128
    axis_dict = {}

    for axis in axis_list:  
        features_one = []
        for class_label in range_screen:   
            screen_ID = screens_code[class_label - 1]    
            raw_data_one_activity = np.array(raw_signal.loc[raw_signal['screen'] == screen_ID, [axis]])
            raw_data_one_activity = pd.DataFrame(raw_data_one_activity)   
            player_id_data = np.array(raw_signal.loc[raw_signal['screen'] == screen_ID, ['player_id']])
            player_id_data = pd.DataFrame(player_id_data)  

            for data_point in range(0, len(raw_data_one_activity), window_size):        
                win_count += 1
                start = data_point
                end = start + window_size
                time_domain_window = raw_data_one_activity[start:end] 

                if (len(time_domain_window) == 128):                
                    features_one.append(time_domain_window)
                    if (axis == 'z_accel'):                    
                        user_list.append(player_id_data[start:end][0].unique()[0])                    

        axis_dict[axis] = features_one
        
    new = (axis_dict[axis_list[0]], axis_dict[axis_list[1]], axis_dict[axis_list[2]])
    new_x = new[0]
    new_x = np.asarray(new_x)
    new_x = new_x.reshape(28473,-1)
    print(new_x.shape)

    new_y = new[1]
    new_y = np.asarray(new_y)
    new_y = new_y.reshape(28473,-1)
    print(new_y.shape)

    new_z = new[2]
    new_z = np.asarray(new_z)
    new_z = new_z.reshape(28473,-1)
    print(new_z.shape)
    
    data_join = pd.DataFrame(np.concatenate((new_x, new_y, new_z), axis=1))
    data_join['user'] = user_list
    
    return data_join

In [None]:
def normalize_rows(df):
    array = df.values    
    nsamples, nfeatures = array.shape
    nfeatures = nfeatures - 1
    X = array[:, 0:nfeatures]
    y = array[:, -1]
    
    rows, cols = X.shape
    
    for i in range(0, rows):
        row = X[i,:]
        mu = np.mean( row )
        sigma = np.std( row )
        if( sigma == 0 ):
            sigma = 0.0001
        X[i,:] = (X[i,:] - mu) / sigma
            
    df = pd.DataFrame( X )
    df['user'] = y 
    return df

def unique(list1):       
    list_set = set(list1) 
    unique_list = (list(list_set)) 
    unique_list.sort()
    return unique_list

def create_userids( df ):
    array = df.values
    y = array[:, -1]
    return unique( y )

def build_fcn(input_shape, nb_classes, file_path, num_filters = 128):
    input_layer = keras.layers.Input(input_shape) 

    conv1 = keras.layers.Conv1D(filters=num_filters, kernel_size=8, padding='same')(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.Activation(activation='relu')(conv1)

    conv2 = keras.layers.Conv1D(filters=2*num_filters, kernel_size=5, padding='same')(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.Activation('relu')(conv2)

    conv3 = keras.layers.Conv1D(num_filters, kernel_size=3,padding='same')(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)

    gap_layer = keras.layers.GlobalAveragePooling1D()(conv3)
    output_layer = keras.layers.Dense(nb_classes, activation='softmax')(gap_layer)
    model = keras.models.Model(inputs=input_layer, outputs=output_layer)
    model.compile(loss='categorical_crossentropy', optimizer = keras.optimizers.Adam(), metrics=['categorical_accuracy'])
    learning_rate = 0.0001
    reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=50, 
                                                  min_lr=learning_rate)
    
    model_checkpoint = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='val_loss', save_best_only=True, verbose=1)

    callbacks = [reduce_lr,model_checkpoint]

    return callbacks, model

In [None]:
RANDOM_STATE = 11235
EPOCHS = 500

def train_model(df, model_name = "foo.h5" ):
    userids = create_userids( df )
    nbclasses = len(userids)
    print(nbclasses)
    array = df.values
    nsamples, nfeatures = array.shape
    nfeatures = nfeatures -1 
    X = array[:,0:nfeatures]
    y = array[:,-1]
    
    enc = OneHotEncoder()
    enc.fit(y.reshape(-1,1))
    y = enc.transform(y.reshape(-1, 1)).toarray()
    X = X.reshape(-1, 128, 3)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=RANDOM_STATE)
    
    print(X_train.shape)
    print(X_test.shape)
    print(X_val.shape)
    
    mini_batch_size = int(min(X_train.shape[0]/10, 16))    
    filepath = model_name
    
    cb, model = build_fcn((128, 3), nbclasses, filepath)
    
    model.summary()

    X_train = np.asarray(X_train).astype(np.float32)
    X_val = np.asarray(X_val).astype(np.float32)
    
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val))

    BATCH_SIZE = mini_batch_size
    SHUFFLE_BUFFER_SIZE = 100
    
    train_ds = train_ds.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
    val_ds = val_ds.batch(BATCH_SIZE)
    
    start_time = time.time()
    hist = model.fit(train_ds, 
                      epochs=EPOCHS,
                      verbose=True, 
                      validation_data=val_ds, 
                      callbacks=cb)
    
    hist_df = pd.DataFrame(hist.history) 
    
    print(hist_df)
    
    plot_training(hist, model_name, metrics ='accuracy')
    
    hist_csv_file = 'history.csv'
    with open(hist_csv_file, mode='w') as f:
        hist_df.to_csv(f)
    duration = time.time() - start_time
    print("Training duration: "+str(duration/60))
    
    # EVALUATION 
    X_test = np.asarray(X_test).astype(np.float32)    
    y_true = np.argmax( y_test, axis=1)
    y_pred = np.argmax( model.predict(X_test), axis=1)
    accuracy = metrics.accuracy_score(y_true, y_pred)     

    _, accuracy = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE, verbose=0)

    print(accuracy)
    
    return model

def plot_training(history, model_name, metrics ='loss'):
    # list all data in history
    print(history.history.keys())
    keys = list(history.history.keys())
    plt.figure()
    if( metrics == 'loss'):
        plt.plot(history.history[keys[0]])
        plt.plot(history.history[keys[2]])
        plt.title('Model loss ' + model_name)
        plt.ylabel('loss')
    
    if( metrics == 'accuracy'):
        plt.plot(history.history[keys[1]])
        plt.plot(history.history[keys[3]])
        plt.title('Model accuracy '+model_name)
        plt.ylabel('accuracy')
    

    plt.xlabel('epoch')
    plt.legend(['training', 'validation'], loc='upper left')
    plt.show()

In [None]:
def scale_data(data):
    """ Normalizes the data using StandardScaler() function """
    
    data = data.drop(["screen", "timestamp"], axis = 1).copy()
    
    data.columns = ['X', 'Y', 'Z', 'User']
        
    le = LabelEncoder()
    data['s'] = le.fit_transform(data['User'])
    
    X = data[['X', 'Y', 'Z']]
    y = data['User']
    
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    scaled_df = pd.DataFrame(data = X, columns = ['X', 'Y', 'Z'])
    scaled_df['User'] = y.values
    
    return scaled_df

frequency = 100 # Based on Hertz
time_period = 2 # Based on Second
frame_size = frequency * time_period
step_size = frame_size # In order not to have an overlap

def get_frames(df):
    n_features = 3
    frames = []
    labels = []
    for i in range(0, len(df) - frame_size, step_size):
        x = df['X'].values[i: i + frame_size]
        y = df['Y'].values[i: i + frame_size]
        z = df['Z'].values[i: i + frame_size]
        
        label = stats.mode(df['User'][i: i + frame_size])[0][0]
        frames.append([x, y, z])
        labels.append(label)

    frames = np.asarray(frames).reshape(-1, frame_size, n_features)
    labels = np.asarray(labels)
    
    print(frames.shape)
    print(labels.shape)

    return frames, labels

In [None]:
train_set_1, user_list1 = load_data_1()
train_set_join_1 = train_set_1.reshape(train_set_1.shape[0], 1500)
data_join = pd.DataFrame(train_set_join_1)
data_join['user'] = user_list1
data_join.shape

In [None]:
train_set_2 = load_data_2()

In [None]:
train_set_3 = load_data_3()

In [None]:
train_set_2 = np.asarray(train_set_2)
train_set_join_2 = train_set_2.reshape(train_set_2.shape[0], 1500)

In [None]:
processed_data = scale_data(data)

In [None]:
data_frames, labels = get_frames(processed_data)

In [None]:
data_frames_join = data_frames.reshape(data_frames.shape[0], 600)
data_join = pd.DataFrame(data_frames_join)
data_join['user'] = labels
data_join.shape

In [None]:
df_idnet = data_join

In [None]:
df_idnet = normalize_rows(train_set_3)
df_idnet

In [None]:
model_name = "cnn.h5"
model = train_model(df_idnet, model_name)