In [None]:
import tensorflow as tf
import numpy as np
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)
np.random.seed(100)

In [8]:
import os
import pandas as pd
import pickle
import numpy as np
import tensorflow as tf
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv1D,BatchNormalization,Dropout,Input,MaxPooling1D,Flatten,Dense,Input,Activation,GRU
from tensorflow.keras.models import Model, Sequential
from sklearn.metrics import accuracy_score
from joblib import Parallel,delayed
from scipy.stats import mode

In [None]:
directory = './data/10/Sitting/'
X_train = []
X_test = []
y_train = []
y_test = []
for f in list(os.listdir(directory)):
    if f[0]=='.':
        continue
    df  = pickle.load(open(directory+f,'rb'))
    df['time'] = df['window'].apply(lambda a:a[0])
    df['data'] = df['data'].apply(lambda a:np.array([np.array(b) for b in a]))
    df['data'] = df['data'].apply(lambda a:a[a[:,0].argsort(),:].reshape(1,-1,4))
    df['data'] = df['data'].apply(lambda a:a[:,:,1:])
    df = df.sort_values('time').reset_index(drop=True)
    user_id = df.user.values[0]
    X_train.append(np.concatenate(list(df['data'].values)))
    X_test.append(np.concatenate(list(df['data'].values)))
    y_train.extend([user_id]*X_train[-1].shape[0])
    y_test.extend([user_id]*X_test[-1].shape[0])
    del df

In [None]:
X_train,X_test,y_train,y_test = np.concatenate(X_train),np.concatenate(X_test),y_train,y_test

In [3]:
directory = './data/10/Sitting/'
data_all = []
for f in list(os.listdir(directory))[:200]:
    if f[0]=='.':
        continue
    data_all.append(pickle.load(open(directory+f,'rb')))
data_all[-1].head()  
data  = pd.concat(data_all)
del data_all
unique_users = data.user.unique()
unique_users_dict = {u:i for i,u in enumerate(unique_users)}
data['user_id'] = data['user'].apply(lambda a:unique_users_dict[a])
data['time'] = data['window'].apply(lambda a:a[0])
data['data'] = data['data'].apply(lambda a:np.array([np.array(b) for b in a]))
data['data'] = data['data'].apply(lambda a:a[a[:,0].argsort(),:].reshape(1,-1,4))
data['data'] = data['data'].apply(lambda a:a[:,:,1:])

pickle.dump(data,open('./data/10/sitting_all_data.p','wb'))

In [None]:
def get_model(input_shape=(500,3),n_classes=1):
    model =  Sequential()
    model.add(Conv1D(128,2,input_shape=input_shape,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
#     model.add(Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same'))
#     model.add(MaxPooling1D(2))
#     model.add(Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same'))
#     model.add(MaxPooling1D(2))
    model.add(Activation('tanh'))
#     model.add(BatchNormalization())
    model.add(Dropout(.5))
    model.add(GRU(128,return_sequences=True,activation='tanh'))
    model.add(Flatten())
    model.add(Dense(n_classes))
    model.add(Dense(n_classes,activation='softmax'))
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),optimizer='adam')
    return model




def get_score(df,test_size,n_iter,base_window_size):
    score1 = []
    score2 = []
    test_lengths = []
    for t in test_size:
        n  = np.int64(t*60/base_window_size)
        if n>df.shape[0]:
            continue
        y_true = [df['original'].values[0]]*n_iter
        y_pred_majority = []
        y_pred_maxmean = []
        for i in range(n_iter):
            temp_df = df.sample(n,replace=False)
            y_pred_majority.append(mode(temp_df['prediction'].values)[0][0])
            temp_df = np.mean(np.concatenate(list(temp_df['probability'])),axis=0)
            y_pred_maxmean.append(np.argmax(temp_df))
        
            
        majority_score = accuracy_score(y_true,y_pred_majority)
        maxmean_score = accuracy_score(y_true,y_pred_maxmean)
        score1.append(majority_score)
        score2.append(maxmean_score)
        test_lengths.append(t)
    return pd.DataFrame({'user':[df['original'].values[0]]*len(score1),
                        'majority_score':score1,
                        'maxmean_score':score2,
                        'test_lengths':test_lengths})


def get_test_scores(model,X_test,y_test,test_size=None,n_iter=1000,base_window_size=20):
    y_pred = model.predict(X_test)
    y_pred1 = y_pred.argmax(axis=1)
    test_df = pd.DataFrame({'prediction':y_pred1,'original':y_test,'probability':list(y_pred)})
    test_df['probability'] = test_df['probability'].apply(lambda a:a.reshape(1,-1)) 
    if test_size is None:
        test_size = np.arange(1,120,1)
    result  = Parallel(n_jobs=-1,verbose=2)(delayed(get_score)(df,test_size,n_iter,base_window_size) for i,df in test_df.groupby('original',as_index=False))
    test_score = pd.concat(result)
    return test_score,test_df



def get_trained_model(X_train,y_train,n_timesteps,n_channels,window_size):
    n_classes = len(np.unique(y_train))
    model =  get_model(input_shape=(n_timesteps,n_channels),n_classes=n_classes)
    model.summary()
    filepath = './models/walking/person_estimator_mperf_base_10_seconds_train_'+str(window_size)+'_seconds.hdf5'
    checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, mode='min',save_weights_only=False)
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=40)
    callbacks_list = [es,checkpoint]
#     train_x,test_x,train_y,test_y = train_test_split(X_train,y_train,test_size=.2,stratify=y_train)
    train_x,val_x,train_y,val_y = train_test_split(X_train,y_train,test_size=.2,stratify=y_train)
    history = model.fit(train_x,train_y,validation_data=(val_x,val_y), epochs=200, batch_size=300,verbose=0,
          callbacks=callbacks_list,shuffle=True)
    model.load_weights(filepath)
#     y_pred = model.predict(test_x).argmax(axis=1)

#     from sklearn.metrics import classification_report

#     print(accuracy_score(test_y,y_pred),window_size)
    return model


def get_training_testing(window_size=10,
                         base_window_size=20):
    n_train = window_size*60//base_window_size
    X_train = []
    X_test = []
    y_train = []
    y_test = []
    def split_data(df):
        filepath = directory+df['user'].values[0]
        df  = pickle.load(open(filepath,'rb'))
        if df.shape[0]<n_train+30*6:
            print(filepath[-2:],end=',')
            return 
        df['time'] = df['window'].apply(lambda a:a[0])
        df['data'] = df['data'].apply(lambda a:np.array([np.array(b) for b in a]))
        df['data'] = df['data'].apply(lambda a:a[a[:,0].argsort(),:].reshape(1,-1,4))
        df['data'] = df['data'].apply(lambda a:a[:,:,1:])
        df = df.sort_values('time').reset_index(drop=True)
        user_id = df.user.values[0]
        X_train.append(np.concatenate(list(df[:n_train]['data'].values)))
        X_test.append(np.concatenate(list(df[n_train:]['data'].values)))
        y_train.extend([user_id]*X_train[-1].shape[0])
        y_test.extend([user_id]*X_test[-1].shape[0])
        print(filepath[-2:],end=',')
        return 
    data.groupby('user',as_index=False).apply(split_data)
    return np.concatenate(X_train),np.concatenate(X_test),y_train,y_test


directory = './data/walking_10/right_wrist/'
# filepath1 = './data/all_walking_data_10.p'
# if not os.path.isfile(filepath1):
#     print(filepath1)
#     data_col = []
#     for f in os.listdir(directory):
#         filepath = directory+f
#         data  = pickle.load(open(filepath,'rb'))
#         data_col.append(df)
#     data = pd.concat(data_col)
#     unique_users = data.user.unique()
#     unique_users_dict = {u:i for i,u in enumerate(unique_users)}
#     data['user_id'] = data['user'].apply(lambda a:unique_users_dict[a])
#     data['time'] = data['window'].apply(lambda a:a[0])
#     data['data'] = data['data'].apply(lambda a:np.array([np.array(b) for b in a]))
#     data['data'] = data['data'].apply(lambda a:a[a[:,0].argsort(),:].reshape(1,-1,4))
#     data['data'] = data['data'].apply(lambda a:a[:,:,1:])
#     pickle.dump(data,open(filepath1,'wb'))
data = pd.DataFrame({'user':list(os.listdir(directory))})

base_window_size = 10
Fs = 25
n_timesteps = base_window_size*Fs
n_channels = 3
window_sizes = np.arange(10,250,10)
test_lengths = np.arange(1,180,2)
results = []
for window_size in window_sizes:
#     data = pickle.load(open(filepath1,'rb'))
    X_train,X_test,y_train,y_test = get_training_testing(window_size=window_size,base_window_size=base_window_size)
    unique_users = np.unique(y_train+y_test)
    unique_users_dict = {u:i for i,u in enumerate(unique_users)}
    y_train = np.array([unique_users_dict[a] for a in y_train])
    y_test = np.array([unique_users_dict[a] for a in y_test])
    print(X_train.shape,X_test.shape,len(np.unique(y_train)),len(np.unique(y_test)))
    model = get_trained_model(X_train,y_train,n_timesteps,n_channels,window_size)
    test_score,test_df = get_test_scores(model,X_test,y_test,test_size=test_lengths,n_iter=1000,base_window_size=20)
    results.append([window_size,test_score,test_df,y_test,unique_users_dict])
    pickle.dump(results,open('./data/walking_results_10.p','wb'))

In [None]:
test_score.groupby('test_lengths').mean()

In [None]:
test_score

In [None]:
directory = './data/walking_10/right_wrist/'
filepath1 = './data/all_walking_data_10.p'
if not os.path.isfile(filepath1):
    print(filepath1)
    data_col = []
    for i,f in enumerate(list(os.listdir(directory))):
        filepath = directory+f
        df  = pickle.load(open(filepath,'rb'))
        data_col.append(df)
        print(i,end=',')
    data_col = pd.concat(data_col)
    unique_users = data.user.unique()
    unique_users_dict = {u:i for i,u in enumerate(unique_users)}
    data['user_id'] = data['user'].apply(lambda a:unique_users_dict[a])
    data['time'] = data['window'].apply(lambda a:a[0])
    data['data'] = data['data'].apply(lambda a:np.array([np.array(b) for b in a]))
    data['data'] = data['data'].apply(lambda a:a[a[:,0].argsort(),:].reshape(1,-1,4))
    data['data'] = data['data'].apply(lambda a:a[:,:,1:])
    pickle.dump(data,open(filepath1,'wb'))

In [None]:
test_score.groupby('test_lengths').mean()[['maxmean_score','majority_score']].plot()