In [1]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import make_scorer

from mlxtend.feature_selection import SequentialFeatureSelector as SFS

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

from utilize.data import *
from utilize.transform import *
from utilize.feature_selection import *
from utilize.test import *
from utilize.model import *

Found 60 users data.


In [2]:
import importlib
import utilize.transform as transform
import utilize.test as test
import utilize.model as model
importlib.reload(transform)
importlib.reload(test)
importlib.reload(model)

<module 'utilize.model' from 'C:\\Users\\zifan\\OneDrive\\Desktop\\Zifan Xu\\Datamining\\Projects\\extrasensory_Xu\\utilize\\model.py'>

In [3]:
 X, y, M, user_index, feature_names, label_names = load_all_data()

In [4]:
# Only select body state label
target_label = ['LYING_DOWN', 'SITTING', 'FIX_walking', 'FIX_running', 'BICYCLING', 'OR_standing']

# Use the last 5 user's data as test set
test_uuid = list(range(56, 61))

# Fill the Nan with mean value and normalize all the data 
pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('std_scaler', StandardScaler())
])

In [5]:
# Transform 
# 1. select target labels 
# 2. tansform feature matrix fill None with mean and do the normalization
# 3. Split train, validation and test set by ratio of 6:2:2
X_new, y_new, M_new = select_target_labels(X, y, M, target_label, label_names, drop_all_zero = False)
X_new = pipeline.fit_transform(X_new, y_new)
X_train, y_train, M_train, X_val, y_val, M_val, X_test, y_test, M_test = random_split(X_new, y_new, M_new, test_size = 0.2, val_size = 0.2, random_seed = 42)

In [6]:
# Initialize a model and train
mlp = MLP_model([16, 16], target_label, epoches = 15, learning_rate = 0.00005)
mlp.fit(X_train, y_train, X_val, y_val, M_train, M_val, report = True)

Test epoch 0:
accuaracy      sensitivity    specificity    BA             
0.720233       0.787645       0.669095       0.728370       
Test epoch 1:
accuaracy      sensitivity    specificity    BA             
0.799931       0.787579       0.795613       0.791596       
Test epoch 2:
accuaracy      sensitivity    specificity    BA             
0.802472       0.824523       0.794871       0.809697       
Test epoch 3:
accuaracy      sensitivity    specificity    BA             
0.800975       0.833797       0.797875       0.815836       
Test epoch 4:
accuaracy      sensitivity    specificity    BA             
0.807579       0.835091       0.802889       0.818990       
Test epoch 5:
accuaracy      sensitivity    specificity    BA             
0.817197       0.837320       0.812835       0.825078       
Test epoch 6:
accuaracy      sensitivity    specificity    BA             
0.822744       0.837834       0.818693       0.828263       
Test epoch 7:
accuaracy      sensitivity    spec

In [8]:
print('learning rate = 0.00005 and train 15 epoches seem enough for get a good result')

learning rate = 0.00005 and train 15 epoches seem enough for get a good result


In [None]:
# 50 min select one feature with epoch 40
# if we reduce to 15 epoch, it will take 19 min to select a feature
# Total time if we do SBS: 19/225 * (175 + 225)/2 * 50 min = 14 h
# Seems a possible time to reach now! 

mlp = MLP_model([16, 16], target_label, epoches = 15, learning_rate = 0.00005)

BA_val_scoring = make_scorer(score_function, W_test = abs(1-M_train))
sbs = SFS(mlp, 
           k_features=175, 
           forward=False, 
           floating=False, 
           verbose=2,
           scoring=BA_val_scoring,
           n_jobs = -1,  
           cv=0)
sbs.fit(X_train, y_train, M_train = M_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  7.6min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed: 33.2min
[Parallel(n_jobs=-1)]: Done 225 out of 225 | elapsed: 48.3min finished

[2020-04-19 00:06:15] Features: 224/175 -- score: 0.8465497427982834[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  9.8min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed: 35.2min
[Parallel(n_jobs=-1)]: Done 224 out of 224 | elapsed: 50.0min finished

[2020-04-19 00:56:14] Features: 223/175 -- score: 0.8441079197433[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  7.4min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed: 32.9min
[Parallel(n_jobs=-1)]: Done 223 out of 223 | elapsed: 48.2min finished

[2020-04-19 01:44:23] Features: 222/175 -- score: 0