In [1]:
import tensorflow as tf
import numpy as np
seed = 100
tf.random.set_seed(seed)
np.random.seed(seed)
import os
import pandas as pd
import pickle
import numpy as np
import tensorflow as tf
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv1D,BatchNormalization,Dropout,Input,MaxPooling1D,Flatten,Dense,Input,Activation,GRU
from tensorflow.keras.models import Model, Sequential
from sklearn.metrics import accuracy_score
from joblib import Parallel,delayed
from scipy.stats import mode
from sklearn.metrics import accuracy_score



In [None]:
import pickle
import pandas as pd
def get_participants_df(directory,window_size,min_length):
    df = []
    n = 60//window_size
    for f in os.listdir(directory):
        if f[0]=='.':
            continue
        data = pickle.load(open(directory+f,'rb'))
        df.append([f,data.shape[0]//n])
    df = pd.DataFrame(df,columns=['user','total_test_length'])
    return df[df.total_test_length>=min_length]

def get_training_data(directory,
                      train_length,
                      n_user,
                      participant_df,
                      window_size):
    n = 60//window_size
    users = participant_df['user'].values[:n_user]
    X = []
    y = []
    for f in users:
        df = pickle.load(open(directory+f,'rb'))
        i = np.random.randint(0,df.shape[0]-n*train_length)
        df = df[i:i+n*train_length]
#         df = df.sample(n*train_length,replace=False)
        X.append(np.concatenate(list(df['data'])))
        y.extend([f]*df.shape[0])
    y_dict = {a:i for i,a in enumerate(np.unique(y))}
    y  = [y_dict[a] for a in y]
    return np.concatenate(X),np.array(y),y_dict

def get_trained_model(X_train,y_train,n_timesteps,n_channels,window_size,filepath):
    n_classes = len(np.unique(y_train))
    model =  get_model(input_shape=(n_timesteps,n_channels),n_classes=n_classes)
    checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=0, save_best_only=True, mode='max',save_weights_only=False)
    es = EarlyStopping(monitor='val_acc', mode='max', verbose=0,patience=40)
    lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',patience=5,verbose=0,factor=0.5)
    callbacks_list = [es,checkpoint]
    train_x,val_x,train_y,val_y = train_test_split(X_train,y_train,test_size=.2,stratify=y_train)
    history = model.fit(train_x,train_y,validation_data=(val_x,val_y), epochs=200, batch_size=300,verbose=0,callbacks=callbacks_list,shuffle=True)
    model.load_weights(filepath)
    print(accuracy_score(val_y,model.predict(val_x).argmax(axis=1)),end=',')
    return model

def get_model(input_shape=(500,3),n_classes=1):
    model =  Sequential()
    model.add(Conv1D(128,2,input_shape=input_shape,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
#     model.add(Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same'))
#     model.add(MaxPooling1D(2))
#     model.add(Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same'))
#     model.add(MaxPooling1D(2))
    model.add(Activation('tanh'))
    model.add(Dropout(.4))
    model.add(GRU(128,return_sequences=False,activation='tanh'))
    model.add(Flatten())
    model.add(Dense(350,name='feature'))
    model.add(Dense(n_classes))
    model.add(Dense(n_classes,activation='softmax'))
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),optimizer='adam',metrics=['acc'])
    return model


window_size = 20
activity = 'sports'
data_directory = './data/'+str(window_size)+'/'+activity+'/'
model_directory = './models/'+str(window_size)+'/'+activity+'/'
min_test_total_length  = 100
fs = 25
n_timesteps,n_channels = fs*window_size,3 
if not os.path.isdir(model_directory):
    os.makedirs(model_directory)
participant_df = get_participants_df(data_directory+'testing/',window_size,min_test_total_length)
# n_users = list(np.arange(50,participant_df.shape[0],50))+[participant_df.shape[0]]
train_lengths = list(np.arange(10,60,10))+list(np.arange(60,450,30))
n_iters = np.arange(3)
n_users = [participant_df.shape[0]]
train_lengths = np.arange(210,330,30)
# n_iters = [1]
print(n_users)
for n_user in n_users[::-1]:
    if not os.path.isdir(model_directory+str(n_user)):
        os.makedirs(model_directory+str(n_user))
    print('--'*30)
    print('Starting for no. of training users = ',n_user)
    for train_length in train_lengths:
        print('Training length minutes = ',train_length)
        if not os.path.isdir(model_directory+str(n_user)+'/'+str(train_length)):
            os.makedirs(model_directory+str(n_user)+'/'+str(train_length))
        for n_iter in n_iters:
            X_train,y_train,user_dict = get_training_data(directory = data_directory+'training/',
                                                          train_length=train_length,
                                                          n_user=n_user,
                                                          participant_df=participant_df,
                                                          window_size=window_size) 
            print(X_train.shape)
            pickle.dump(user_dict,open(model_directory+str(n_user)+'/'+str(train_length)+'/userdict_seed_'+str(seed)+'_iteration_'+str(n_iter)+'.p','wb'))
            model = get_model(input_shape=(n_timesteps,n_channels),n_classes=len(np.unique(y_train)))
            model_filepath = model_directory+str(n_user)+'/'+str(train_length)+'/trainedmodel_seed_'+str(seed)+'_iteration_'+str(n_iter)+'.hdf5'
            model = get_trained_model(X_train,y_train,n_timesteps,n_channels,window_size,model_filepath)
            print('iteration = ',n_iter)
        print(train_length, '--Done')

[346]
------------------------------------------------------------
Starting for no. of training users =  346
Training length minutes =  210
(217980, 500, 3)


In [None]:
import pickle
import pandas as pd
def get_participants_df(directory,window_size,min_length):
    df = []
    n = 60//window_size
    for f in os.listdir(directory):
        data = pickle.load(open(directory+f,'rb'))
        df.append([f,data.shape[0]//n])
    df = pd.DataFrame(df,columns=['user','total_test_length'])
    return df[df.total_test_length>=min_length]

def get_training_data(directory,
                      train_length,
                      n_user,
                      participant_df,
                      window_size):
    n = 60//window_size
    users = participant_df['user'].values[:n_user]
    X = []
    y = []
    for f in users:
        df = pickle.load(open(directory+f,'rb'))
        i = np.random.randint(0,df.shape[0]-n*train_length)
        df = df[i:i+n*train_length]
#         df = df.sample(n*train_length,replace=False)
        X.append(np.concatenate(list(df['data'])))
        y.extend([f]*df.shape[0])
    y_dict = {a:i for i,a in enumerate(np.unique(y))}
    y  = [y_dict[a] for a in y]
    return np.concatenate(X),np.array(y),y_dict

def get_trained_model(X_train,y_train,n_timesteps,n_channels,window_size,filepath):
    n_classes = len(np.unique(y_train))
    model =  get_model(input_shape=(n_timesteps,n_channels),n_classes=n_classes)
    checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=0, save_best_only=True, mode='max',save_weights_only=False)
    es = EarlyStopping(monitor='val_acc', mode='max', verbose=0,patience=40)
    lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',patience=5,verbose=0,factor=0.5)
    callbacks_list = [es,checkpoint]
    train_x,val_x,train_y,val_y = train_test_split(X_train,y_train,test_size=.2,stratify=y_train)
    history = model.fit(train_x,train_y,validation_data=(val_x,val_y), epochs=200, batch_size=300,verbose=0,callbacks=callbacks_list,shuffle=True)
    model.load_weights(filepath)
    print(accuracy_score(val_y,model.predict(val_x).argmax(axis=1)),end=',')
    return model

def get_model(input_shape=(500,3),n_classes=1):
    model =  Sequential()
    model.add(Conv1D(128,2,input_shape=input_shape,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same'))
    model.add(MaxPooling1D(2))
#     model.add(Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same'))
#     model.add(MaxPooling1D(2))
#     model.add(Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same'))
#     model.add(MaxPooling1D(2))
    model.add(Activation('tanh'))
    model.add(Dropout(.4))
    model.add(GRU(128,return_sequences=False,activation='tanh'))
    model.add(Flatten())
    model.add(Dense(350,name='feature'))
    model.add(Dense(n_classes))
    model.add(Dense(n_classes,activation='softmax'))
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),optimizer='adam',metrics=['acc'])
    return model


window_size = 20
activity = 'sports'
data_directory = './data/'+str(window_size)+'/'+activity+'/'
model_directory = './models/'+str(window_size)+'/'+activity+'/'
min_test_total_length  = 100
fs = 25
n_timesteps,n_channels = fs*window_size,3 
if not os.path.isdir(model_directory):
    os.makedirs(model_directory)
participant_df = get_participants_df(data_directory+'testing/',window_size,min_test_total_length)
# n_users = list(np.arange(50,participant_df.shape[0],50))+[participant_df.shape[0]]
train_lengths = list(np.arange(10,60,10))+list(np.arange(60,210,30))
n_iters = np.arange(3)
n_users = [participant_df.shape[0]]
# train_lengths = [120]
# n_iters = [1]

for n_user in n_users[::-1]:
    if not os.path.isdir(model_directory+str(n_user)):
        os.makedirs(model_directory+str(n_user))
    print('--'*30)
    print('Starting for no. of training users = ',n_user)
    for train_length in train_lengths:
        print('Training length minutes = ',train_length)
        if not os.path.isdir(model_directory+str(n_user)+'/'+str(train_length)):
            os.makedirs(model_directory+str(n_user)+'/'+str(train_length))
        for n_iter in n_iters:
            X_train,y_train,user_dict = get_training_data(directory = data_directory+'training/',
                                                          train_length=train_length,
                                                          n_user=n_user,
                                                          participant_df=participant_df,
                                                          window_size=window_size) 
            print(X_train.shape)
            pickle.dump(user_dict,open(model_directory+str(n_user)+'/'+str(train_length)+'/userdict_seed_'+str(seed)+'_iteration_'+str(n_iter)+'.p','wb'))
            model = get_model(input_shape=(n_timesteps,n_channels),n_classes=len(np.unique(y_train)))
            model_filepath = model_directory+str(n_user)+'/'+str(train_length)+'/trainedmodel_seed_'+str(seed)+'_iteration_'+str(n_iter)+'.hdf5'
            model = get_trained_model(X_train,y_train,n_timesteps,n_channels,window_size,model_filepath)
            print('iteration = ',n_iter)
        print(train_length, '--Done')

In [None]:
plt.plot(X_train[0])

In [None]:
import tensorflow as tf
import os

import pickle
window_size = 20
train_length = 120
activity = 'walking'
n_user  = 333
model_directory = './models/'+str(window_size)+'/'+activity+'/'+str(n_user)+'/'+str(train_length)+'/'

model_directory = './models/20/walking/333/120/trained_model_seed_100_iteration_1.hdf5'
# model = tf.keras.models.load_model(model_directory+'trained_model_seed_100_iteration_0.hdf5')
model = tf.keras.models.load_model(model_directory)

# user_dict = pickle.load(open(model_directory+'user_dict_seed_100_iteration_0.p','rb'))

user_dict = pickle.load(open('./models/20/walking/333/120/user_dict_seed_100_iteration_1.p','rb'))

data_directory = './data/'+str(window_size)+'/'+activity+'/testing/'

from sklearn.metrics import accuracy_score
import numpy as np
def get_training_data(directory,
                      test_length,
                      user_dict,
                      window_size,
                      model):
    n = 60//window_size
    users = list(user_dict.keys())
    results = []
    y_orig = []
    y_pred = []
    for f in users:
        df = pickle.load(open(directory+f,'rb'))
        X = np.concatenate(list(df['data']))
        pred = model.predict(X).argmax(axis=1)
        results.append(accuracy_score([user_dict[f]]*df.shape[0],pred))
        y_orig.extend([user_dict[f]]*df.shape[0])
        y_pred.extend(list(pred))
    return results,pd.DataFrame({'original':y_orig,'prediction':y_pred})

results,df = get_training_data(data_directory,1,user_dict,window_size,model)

np.mean(results)

In [None]:
plt.hist(results)

In [None]:
accuracy_score(df['original'],df['prediction'])

In [None]:
df.groupby('original',as_index=False).apply(lambda a:pd.Series({'accuracy':accuracy_score(a['original'],a['prediction'])}))

In [None]:
import shutil

shutil.make_archive('./models/','zip','./models/')