In [2]:
from numpy import mean
from numpy import std
from numpy import vstack,hstack,dstack,stack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
from matplotlib import pyplot

# load a single file as a numpy array
def load_file(filepath):
    dataframe = read_csv(filepath)
    #return dataframe.values
    return dataframe

def load_cols(filepath):
    dataframe = read_csv(filepath)
    #return dataframe.values
    loaded=list()
    dloaded=list()
    for i in range(0,46806,234):
        
        for axis in ['x','y','z']:
            loaded.append(dataframe[axis].loc[i:i+234].values)
        loaded=stack(loaded,axis=1)
        dloaded=dstack(loaded)
    return dloaded,dataframe['label'].values
# load a list of files and return as a 3d numpy array

def load_group(filenames, prefix=''):
    loaded = list()
    for name in filenames:
        data = load_file(prefix + name)
        loaded.append(data)
    # stack group so that features are the 3rd dimension
    loaded = dstack(loaded)
    return loaded

# load a dataset group, such as train or test
def load_dataset_group(group, prefix=''):
    filepath = prefix + group + '/Inertial Signals/'
    # load all 9 files as a single array
    filenames = list()
    # total acceleration
    filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
    # body acceleration
    filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
    # body gyroscope
    filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
    # load input data
    X = load_group(filenames, filepath)
    # load class output
    y = load_file(prefix + group + '/y_'+group+'.txt')
    return X, y

# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
# load all train
    trainX, trainy = load_dataset_group('train', prefix + 'HARDataset/')
    print(trainX.shape, trainy.shape)
    # load all test
    testX, testy = load_dataset_group('test', prefix + 'HARDataset/')
    print(testX.shape, testy.shape)
    # zero-offset class values
    trainy = trainy - 1
    testy = testy - 1
    # one hot encode y
    trainy = to_categorical(trainy)
    testy = to_categorical(testy)
    print(trainX.shape, trainy.shape, testX.shape, testy.shape)
    return trainX, trainy, testX, testy

# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy,verbose, epochs, batch_size):
    #verbose, epochs, batch_size = 1, 50, 32
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    model = Sequential()
    model.add(LSTM(100, input_shape=(n_timesteps,n_features)))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, train_acc = model.evaluate(trainX, trainy,batch_size=batch_size, verbose=0)
    
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    return  epochs, batch_size,accuracy,train_acc

# summarize scores
def summarize_results(scores):
    print(scores)
    m, s = mean(scores), std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

# run an experiment
def run_experiment(trainX, trainy, testX, testy,batch):
    # load data
    ##trainX, trainy, testX, testy = load_dataset()
    
    # repeat experiment
    scores = list()
    for r,epoch in enumerate([40,50,60,100,120,150,200]):
        epochs, batch_size,testscore,trainacc = evaluate_model(trainX, trainy, testX, testy,0,epoch,batch)
        testscore = testscore * 100.0
        trainacc=trainacc*100.0
        print( "epoch:",epochs,"/batch_size:", batch_size)
        print('test>#%d: %.3f' % (r+1, testscore))
        print('train>#%d: %.3f' % (r+1, trainacc))
        scores.append(testscore)
    # summarize results
    summarize_results(scores)

    # run the experiment
    #run_experiment()


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
run_experiment(trainxlist,train_y,testxlist,test_y,117) #dropout 0.2 nodes 100 
run_experiment(trainxlist,train_y,testxlist,test_y,234) #dropout 0.2 nodes 100 

epoch: 40 /batch_size: 117
test>#1: 34.000
train>#1: 43.000
epoch: 50 /batch_size: 117
test>#2: 36.000
train>#2: 41.500
epoch: 60 /batch_size: 117
test>#3: 42.000
train>#3: 43.500
epoch: 100 /batch_size: 117
test>#4: 34.000
train>#4: 45.500
epoch: 120 /batch_size: 117
test>#5: 46.000
train>#5: 50.000
epoch: 150 /batch_size: 117
test>#6: 36.000
train>#6: 43.000
epoch: 200 /batch_size: 117
test>#7: 42.000
train>#7: 54.500
[34.00000035762787, 36.000001430511475, 41.999998688697815, 34.00000035762787, 46.00000083446503, 36.000001430511475, 41.999998688697815]
Accuracy: 38.571% (+/-4.371)
epoch: 40 /batch_size: 234
test>#1: 38.000
train>#1: 39.500
epoch: 50 /batch_size: 234
test>#2: 36.000
train>#2: 43.000
epoch: 60 /batch_size: 234
test>#3: 34.000
train>#3: 42.000


In [7]:
run_experiment(trainxlist,train_y,testxlist,test_y,234) #dropout 0.2 nodes 100 

epoch: 40 /batch_size: 234
test>#1: 36.000
train>#1: 41.500
epoch: 50 /batch_size: 234
test>#2: 38.000
train>#2: 39.500
epoch: 60 /batch_size: 234
test>#3: 36.000
train>#3: 36.000
epoch: 100 /batch_size: 234
test>#4: 32.000
train>#4: 48.000
epoch: 120 /batch_size: 234
test>#5: 32.000
train>#5: 51.000
epoch: 150 /batch_size: 234
test>#6: 34.000
train>#6: 47.000
epoch: 200 /batch_size: 234
test>#7: 36.000
train>#7: 53.000
[36.000001430511475, 37.99999952316284, 36.000001430511475, 31.999999284744263, 31.999999284744263, 34.00000035762787, 36.000001430511475]
Accuracy: 34.857% (+/-2.100)


In [22]:
run_experiment(trainxlist,train_y,testxlist,test_y,13) #dropout 0.2 nodes 100 
run_experiment(trainxlist,train_y,testxlist,test_y,39) #dropout 0.2 nodes 100 
run_experiment(trainxlist,train_y,testxlist,test_y,117) #dropout 0.2 nodes 100 
run_experiment(trainxlist,train_y,testxlist,test_y,234) #dropout 0.2 nodes 100 

epoch: 20 /batch_size: 13
test>#1: 40.000
train>#1: 41.500
epoch: 30 /batch_size: 13
test>#2: 34.000
train>#2: 45.500
epoch: 40 /batch_size: 13
test>#3: 32.000
train>#3: 44.000
epoch: 50 /batch_size: 13
test>#4: 30.000
train>#4: 52.500
epoch: 60 /batch_size: 13
test>#5: 32.000
train>#5: 50.500
epoch: 100 /batch_size: 13
test>#6: 40.000
train>#6: 50.000
[40.0000015348196, 34.000000566244125, 32.0000007301569, 30.000000476837158, 32.0000005364418, 40.00000101327896]
Accuracy: 34.667% (+/-3.944)
epoch: 20 /batch_size: 39
test>#1: 32.000
train>#1: 42.500
epoch: 30 /batch_size: 39
test>#2: 40.000
train>#2: 40.000
epoch: 40 /batch_size: 39
test>#3: 38.000
train>#3: 46.500
epoch: 50 /batch_size: 39
test>#4: 36.000
train>#4: 45.000
epoch: 60 /batch_size: 39
test>#5: 36.000
train>#5: 49.500
epoch: 100 /batch_size: 39
test>#6: 36.000
train>#6: 52.500
[32.00000020861626, 40.00000008940697, 38.00000149011612, 35.99999997019768, 36.00000160932541, 36.00000160932541]
Accuracy: 36.333% (+/-2.427)
epo

In [17]:
run_experiment(trainxlist,train_y,testxlist,test_y,13) #dropout 0.2 nodes 100 

epoch: 40 /batch_size: 39
test>#1: 38.000
train>#1: 44.000
epoch: 50 /batch_size: 39
test>#2: 38.000
train>#2: 44.000
epoch: 60 /batch_size: 39
test>#3: 30.000
train>#3: 41.000
[38.00000101327896, 38.000000685453415, 30.00000125169754]
Accuracy: 35.333% (+/-3.771)


In [18]:
run_experiment(trainxlist,train_y,testxlist,test_y,39) #dropout 0.2 nodes 100 

epoch: 40 /batch_size: 39
test>#1: 28.000
train>#1: 39.500
epoch: 50 /batch_size: 39
test>#2: 36.000
train>#2: 42.000
epoch: 60 /batch_size: 39
test>#3: 34.000
train>#3: 43.500
[28.00000038743019, 35.99999997019768, 34.00000137090683]
Accuracy: 32.667% (+/-3.399)


In [19]:
run_experiment(trainxlist,train_y,testxlist,test_y,117) #dropout 0.2 nodes 100 

epoch: 40 /batch_size: 39
test>#1: 40.000
train>#1: 39.000
epoch: 50 /batch_size: 39
test>#2: 44.000
train>#2: 40.000
epoch: 60 /batch_size: 39
test>#3: 34.000
train>#3: 47.000
[40.00000089406967, 44.00000065565109, 34.00000137090683]
Accuracy: 39.333% (+/-4.110)


In [20]:
run_experiment(trainxlist,train_y,testxlist,test_y,243) #dropout 0.2 nodes 100 

epoch: 40 /batch_size: 39
test>#1: 22.000
train>#1: 42.500
epoch: 50 /batch_size: 39
test>#2: 40.000
train>#2: 42.500
epoch: 60 /batch_size: 39
test>#3: 38.000
train>#3: 45.500
[22.000000566244125, 40.00000089406967, 38.00000065565109]
Accuracy: 33.333% (+/-8.055)


In [25]:
run_experiment(trainxlist,train_y,testxlist,test_y) #dropout 0.2 nodes 100 -> 64

epoch: 40 /batch_size: 13
test>#1: 38.000
train>#1: 47.000
epoch: 50 /batch_size: 13
test>#2: 32.000
train>#2: 45.000
epoch: 60 /batch_size: 13
test>#3: 36.000
train>#3: 44.500
[38.00000062584877, 32.00000110268593, 36.000001057982445]
Accuracy: 35.333% (+/-2.494)


In [21]:
run_experiment(trainxlist,train_y,testxlist,test_y) #dropout 0.1

epoch: 40 /batch_size: 13
test>#1: 40.000
train>#1: 44.500
epoch: 50 /batch_size: 13
test>#2: 34.000
train>#2: 48.000
epoch: 60 /batch_size: 13
test>#3: 30.000
train>#3: 45.500
[40.000000700354576, 34.00000098347664, 30.00000083446503]
Accuracy: 34.667% (+/-4.110)


In [20]:
run_experiment(trainxlist,train_y,testxlist,test_y) #dropout 0.4

epoch: 40 /batch_size: 13
test>#1: 32.000
train>#1: 44.000
epoch: 50 /batch_size: 13
test>#2: 40.000
train>#2: 42.000
epoch: 60 /batch_size: 13
test>#3: 44.000
train>#3: 41.000
[32.00000062584877, 40.00000137090683, 44.00000149011612]
Accuracy: 38.667% (+/-4.989)


In [18]:
run_experiment(trainxlist,train_y,testxlist,test_y) #dropout 0.3

epoch: 40 /batch_size: 13
test>#1: 34.000
train>#1: 52.500
epoch: 50 /batch_size: 13
test>#2: 40.000
train>#2: 46.000
epoch: 60 /batch_size: 13
test>#3: 38.000
train>#3: 52.000
epoch: 70 /batch_size: 13
test>#4: 32.000
train>#4: 44.000
[34.00000075995922, 40.00000089406967, 38.00000110268593, 32.0000005364418]
Accuracy: 36.000% (+/-3.162)


In [15]:
run_experiment(trainxlist,train_y,testxlist,test_y)

epoch: 30 /batch_size: 39
test>#1: 40.000
train>#1: 44.500
epoch: 50 /batch_size: 39
test>#2: 40.000
train>#2: 41.500
epoch: 70 /batch_size: 39
test>#3: 34.000
train>#3: 48.000
[40.00000137090683, 40.00000089406967, 34.000001192092896]
Accuracy: 38.000% (+/-2.828)


In [13]:
run_experiment(trainxlist,train_y,testxlist,test_y)

epoch: 50 /batch_size: 64
test>#1: 44.000
train>#1: 47.500
epoch: 70 /batch_size: 64
test>#2: 38.000
train>#2: 49.000
epoch: 100 /batch_size: 64
test>#3: 24.000
train>#3: 44.000
[43.99999976158142, 37.99999952316284, 23.999999463558197]
Accuracy: 35.333% (+/-8.380)


#### Data Preprocessing

In [2]:
traindata=load_file("../data/train_data.csv")
testdata=load_file("../data/test_data.csv")

In [3]:
traindata.head()

Unnamed: 0.1,Unnamed: 0,x,y,z,label
0,0,-333.0,9664.0,2624.0,0
1,1,-2701.0,14208.0,5184.0,0
2,2,-1677.0,14400.0,3392.0,0
3,3,50.0,17792.0,2368.0,0
4,4,-2189.0,13696.0,1152.0,0


In [3]:
traindata=load_file("../data/train_data.csv").fillna(method='pad')
testdata=load_file("../data/test_data.csv").fillna(method='pad')
trainxlist=list()
trainylist=list()
testylist=list()
for i in range(0,46800,234):
    t=traindata[['x','y','z']].iloc[i:i+234]
    trainxlist.append(t)
    trainylist.append(traindata['label'].iloc[i])
try:
    trainxlist=stack(trainxlist,axis=0)
except:
    pass

In [4]:
trainxlist.shape

(200, 234, 3)

In [5]:
testxlist=list()
for i in range(0,11700,234):
    t=testdata[['x','y','z']].iloc[i:i+234]
    testxlist.append(t)
    testylist.append(testdata['label'].iloc[i])

try:
    testxlist=stack(testxlist,axis=0)
except:
    pass

In [6]:
#train_x,train_y=load_cols("../data/train_data.csv")
#test_x,test_y=load_cols("../data/test_data.csv")
train_y=to_categorical(trainylist)
test_y=to_categorical(testylist)
print(test_y.shape)
print(train_y.shape) # (datalen,5classes)

(50, 5)
(200, 5)


In [None]:
from sklearn import svm
from sklearn import metrics
from pprint import pprint
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, f1_score,accuracy_score
from sklearn.model_selection import ShuffleSplit

traindata=load_file("../data/train_data.csv").fillna(method='pad')
testdata=load_file("../data/test_data.csv").fillna(method='pad')
X_train=traindata[['x','y','z']]
X_test=testdata[['x','y','z']]
y_train=traindata['label']
y_test=testdata['label']
C = [.1,1,10,100]
probability = [True]

param_grid = [
  {'C': C, 'kernel':['linear'], 'probability':probability}
]

# Create a base model
svc = svm.SVC(decision_function_shape ='ovo', random_state=8)

# Manually create the splits in CV in order to be able to fix a random_state (GridSearchCV doesn't have that argument)
#cv_sets = ShuffleSplit(n_splits = 10, test_size = .13, random_state = 8)

# Instantiate the grid search model
grid_search = GridSearchCV(estimator=svc, 
                           param_grid=param_grid,
                           scoring='accuracy',
                           cv=5)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)
best_svc = grid_search.best_estimator_
svc_pred = best_svc.predict(X_test)
print(grid_search.best_params_)
y_hat=grid_search.predict(X_test)
print(metrics.classification_report(y_test,y_hat,digits=4))
print("--- %s seconds ---"%(time.time()-start_time))
print("----------------------------------")