In [25]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
import os
from mpl_toolkits.mplot3d import axes3d
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
import sklearn.naive_bayes as nb
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from tensorflow.keras.layers import LSTM,Conv1D,MaxPooling1D
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense 
import tensorflow.keras.backend as K 
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout,Input,Dense,Activation,Flatten,SeparableConv2D,BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras.models import Sequential
import tensorflow as tf
from tqdm import tqdm
from keras.callbacks import ModelCheckpoint,EarlyStopping
from tensorflow.keras.layers import TimeDistributed
from sklearn.utils import resample
import pickle
from sklearn.externals import joblib
from sklearn.externals.joblib import parallel_backend

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

config = tf.ConfigProto() 
config.gpu_options.per_process_gpu_memory_fraction = 0.9 
session = tf.Session(config=config)

1 Physical GPUs, 1 Logical GPUs


In [26]:
class load:
    def __init__(self):
        self.file_name_dir = []
        self.total_data = []
        self.total_label = []
    
    def load_file(self,dir_location):
        print('now loading_file (location : ' + dir_location + ') ... \n')
        
        for root,dirs,files in os.walk(dir_location):
            for fname in files:
                full_fname = os.path.join(root,fname)
                self.file_name_dir.append(full_fname)
        
        print('make file list complete')
    
    def make_DataFrame(self,tar_li,p_n):
        for file_name in tqdm(self.file_name_dir):
            sp = file_name.split('/')
            tmp_label = sp[1]
            d = open(file_name,'r',encoding='UTF8').read()
            data = d.split('\n')
            data.pop(0) # remove trash data header
            index = data.pop(0)
            tmp_real_data = []
            for dat_num in range(len(data)):
                if data[dat_num] == '':
                    continue
                tmp_real_data.append(data[dat_num].split(','))
            
            df = pd.DataFrame(tmp_real_data)
            index_li = index.split(',')
            df.columns = index_li
            
            #now change str to float
            
            for y in index_li:
                df[y] = pd.to_numeric(df[y],downcast='float')
            
            tmp_li = []
            for i in range(len(df)):
                tmp = []
                for j in tar_li:
                    tmp.append((df[j][i]/1000)**p_n)
                tmp_li.append(tmp)
            
            self.total_data.append(tmp_li)
            self.total_label.append(tmp_label)
        print('make total_data finish.....')
    
    
    
        
    
    def return_data(self):
        return self.total_data , self.total_label

class Train_model:
    
    def __init__(self):
        
        self.encoder = LabelEncoder()
        self.enc_label = 0
        
        self.total_data = 0
        self.total_label = 0
        
        self.x_train = 0
        self.y_train = 0
        self.x_test = 0
        self.y_test = 0
        
        self.earlystopping = EarlyStopping(monitor='val_loss',patience=10)
        
        #model list
        self.lstm = 0
        self.svm = 0
        self.xgboost = 0
        self.nb = 0
        self.rf =0
        self.knn = 0
        
        #sample prediction
        self.sample_data = 0
        self.sample_label = 0
        
    def get_enc(self):
        self.enc_label = self.encoder.fit_transform(self.total_label)
    
    
    
    def make_arr(self):
        self.total_data = np.array(self.total_data)
        self.enc_label =np.array(self.enc_label)
        
    def Data_Augmentation(self,nu):
        
        #Data Augmentation is very optional Function
        
        tmp_li = [0]
        for x in range(len(self.enc_label)-1):
            if self.enc_label[x] != self.enc_label[x+1]:
                tmp_li.append(x)
        tmp_li.append(len(self.enc_label)-1)
        print('Augmentation Data index is : ',tmp_li)
        
        div_data = []
        div_label = []
        for x in range(len(tmp_li)-1):
            div_tmp =[]
            div_la = []
            for y in range(tmp_li[x]+1,tmp_li[x+1]+1):
                div_tmp.append(self.total_data[y])
                div_la.append(self.enc_label[y])
            div_data.append(div_tmp)
            div_label.append(div_la)
        
        
        boot = []
        label = []
        t_li = [5,1,7,6,0,4,2,3]
        for x in range(len(div_data)):
            tmp = resample(div_data[x],replace=True,n_samples = nu,random_state=1)
            boot+=tmp
            label += list(t_li[x] for i in range(nu))
        
        boot = np.array(boot)
        label = np.array(label)
        
        self.total_data = np.append(self.total_data,boot,axis=0)
        self.enc_label = np.append(self.enc_label,label,axis=0)
        
    def divide_dataset(self,mode):
        
        if mode == 'lstm':
            self.x_train,self.x_test,self.y_train,self.y_test = train_test_split(self.total_data,self.enc_label,test_size=0.2,random_state=0)

        else:
            self.x_train,self.x_test,self.y_train,self.y_test = train_test_split(self.total_data,self.enc_label,test_size=0.2,random_state=0)
            nsamples,nx,ny = self.x_train.shape
            self.x_train = self.x_train.reshape((nsamples,nx*ny))
            nsamples,nx,ny = self.x_test.shape
            self.x_test = self.x_test.reshape((nsamples,nx*ny))
        
    def model_create_train(self,mode):
        if mode == 'lstm':
            with tf.device('/GPU:0'):
                model = Sequential() # Sequeatial Model 
                model.add(LSTM(180, input_shape=(60,3),return_sequences = True)) # (timestep, feature) 
                model.add(Dropout(0.2))
                model.add(Conv1D(128,
                                 2,
                                 padding='valid',
                                 activation='relu',
                                 strides=1))
                model.add(MaxPooling1D(pool_size=4))
                model.add(LSTM(128))
                model.add(Dense(8, activation='softmax'))

                # 3. 모델 학습과정 설정하기
                model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

                hist = model.fit(self.x_train, self.y_train, epochs=100, batch_size=256,callbacks=[self.earlystopping] ,validation_data=(self.x_test, self.y_test))
                #model.save('model_x.h5')
                #model.save_weights('model_x_weights.h5')
            self.lstm = model
            
        elif mode=='svm':
            #####here to change####
            ######################################################################

            param_grid = {'C': [0.1, 1, 10, 100, 1000],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['rbf']}  
  
            grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 3) 

            # fitting the model for grid search
            with parallel_backend('threading'):
                grid.fit(self.x_train, self.y_train) 
                print(grid.best_params_) 

            # print how our model looks after hyper-parameter tuning 
            print(grid.best_estimator_)
            mod = grid
            

            ######################################################################
            
            predict_model = mod.fit(self.x_train,self.y_train)
            print('fitting ',mode,' is complete...')
            print(mode,'score is :',predict_model.score(self.x_test,self.y_test))

            prediction = predict_model.predict(self.x_test)
            self.svm = mod
            joblib.dump(mod,str(mode)+'_model.pkl')
        elif mode=='xgboost':
            #####here to change####
            ######################################################################

            space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
                    'gamma': hp.uniform ('gamma', 1,9),
                    'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
                    'reg_lambda' : hp.uniform('reg_lambda', 0,1),
                    'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
                    'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
                    'n_estimators': 180,
                    'seed': 0
                }

            mod =xgb.XGBClassifier(
                                n_estimators =space['n_estimators'], max_depth = space['max_depth'], gamma = space['gamma'],
                                reg_alpha = space['reg_alpha'],min_child_weight=space['min_child_weight'],
                                colsample_bytree=space['colsample_bytree'])

            

            ######################################################################
            
            predict_model = mod.fit(self.x_train,self.y_train)
            print('fitting ',mode,' is complete...')
            print(mode,'score is :',predict_model.score(self.x_test,self.y_test))

            prediction = predict_model.predict(self.x_test)
            self.xgboost = mod
        
        elif mode=='nb':
            #####here to change####
            ######################################################################

            mod = GaussianNB()

            ######################################################################
            
            predict_model = mod.fit(self.x_train,self.y_train)
            print('fitting ',mode,' is complete...')
            print(mode,'score is :',predict_model.score(self.x_test,self.y_test))

            prediction = predict_model.predict(self.x_test)
            self.nb = mod
        
        
        elif mode=='rf':
            #####here to change####
            ######################################################################
            
            rfc=RandomForestClassifier(random_state=42)
            param_grid = { 
                'n_estimators': [10,15,20,30,40,50,100],
                'max_features': ['auto', 'sqrt', 'log2'],
                'max_depth' : [3,4,5,6,7,8,9],
                'criterion' :['gini', 'entropy']
            }

            mod = GridSearchCV(estimator=rfc, param_grid=param_grid, cv= 10,n_jobs=4)

            
            ######################################################################
            
            predict_model = mod.fit(self.x_train,self.y_train)
            print('fitting ',mode,' is complete...')
            print(mode,'score is :',predict_model.score(self.x_test,self.y_test))

            prediction = predict_model.predict(self.x_test)
            self.rf = mod
        
        elif mode=='knn':
            #####here to change####
            ######################################################################
            
            leaf_size = list(range(1,30))
            n_neighbors = list(range(1,8))
            p=[1,2]

            hyperparameters = dict(leaf_size=leaf_size, n_neighbors=n_neighbors, p=p)

            knn = KNeighborsClassifier()

            mod = GridSearchCV(knn, hyperparameters, cv=10, n_jobs=-1)

            
            ######################################################################
            
            predict_model = mod.fit(self.x_train,self.y_train)
            print('fitting ',mode,' is complete...')
            print(mode,'score is :',predict_model.score(self.x_test,self.y_test))

            prediction = predict_model.predict(self.x_test)
            self.knn = mod
        
    
    def prediction(self,input_axis,mode,p_n):
        p = load()
        p.load_file('test_data')
        p.make_DataFrame(input_axis,p_n)
        self.sample_data , self.sample_label = p.return_data()
        
        if mode == 'lstm':
            self.sample_data = np.array(self.sample_data)
            print('here is :',self.sample_data[0])
            sample_pred = self.lstm.predict(self.sample_data)
            sample_pred = np.argmax(sample_pred,axis=-1)
            lab = self.encoder.inverse_transform(sample_pred)
            
            hit = 0
            miss = 0
            answer=[]
            print('testing new data result :\n[answer]  -->  [predict err]')
            for x in range(len(lab)):
                if lab[x] == self.sample_label[x]:
                    hit+=1
                    answer.append(lab[x])
                else:
                    miss+=1
                    print(self.sample_label[x],' --> ' ,lab[x],'        err_index number : ',x)


            print('hit: ',hit,' miss : ',miss,'percent : ',(100*hit)/(hit+miss))
        
        else:
            model_list = ['svm','knn','rf','nb','xgboost']
            match_list = [self.svm , self.knn , self.rf , self.nb , self.xgboost]
            
            for x in range(len(model_list)):
                if model_list[x] == mode:
                    mod = match_list[x]
                    print(mode + 'model match complete.....')
                
            self.sample_data = np.array(self.sample_data)
            nsamples , nx , ny = self.sample_data.shape
            sample = self.sample_data.reshape((nsamples,nx*ny))
            print('here is :',sample[0])
            sample_pred = mod.predict(sample)
            lab = self.encoder.inverse_transform(sample_pred)
            
            hit = 0
            miss = 0
            answer=[]
            print('testing new data result :\n[answer]  -->  [predict err]')
            for x in range(len(lab)):
                if lab[x] == self.sample_label[x]:
                    hit+=1
                    answer.append(lab[x])
                else:
                    miss+=1
                    print(self.sample_label[x],' --> ' ,lab[x],'        err_index number : ',x)


            print('hit: ',hit,' miss : ',miss,'percent : ',(100*hit)/(hit+miss))
            
    


In [27]:
def IO(tar_dir,input_axis,p_n):
    total_dat = []
    total_lab = []
    v = load()
    v.load_file(tar_dir)
    v.make_DataFrame(input_axis,p_n)
    total_dat,total_lab = v.return_data()
    return total_dat , total_lab

def pipline(total_data,total_label,input_axis,mode,p_n,aug):
    t = Train_model()
    t.total_data = total_data
    t.total_label = total_label
    t.get_enc()
    t.make_arr()
    if aug != 0:
        t.Data_Augmentation(5000)
    t.divide_dataset(mode)
    t.model_create_train(mode)
    t.prediction(input_axis,mode,p_n)

    
def prac_machine(tar_dir,input_axis,mode_name,power,is_aug):
    total_data = []
    total_label = []
    print('Dir : '+tar_dir+'\nthis ML model name is '+mode_name+'\npower : '+str(power),'\n\n\n')
    total_data,total_label = IO(tar_dir,input_axis,power)
    pipline(total_data,total_label,input_axis,mode_name,power,is_aug)

In [29]:
dir_name = 'swing'
input_axis = ['AX','AY','AZ']
mode_list = ['svm']
power = 1
is_aug = 0

for mode_name in mode_list:
    prac_machine(dir_name,input_axis,mode_name,power,is_aug)

  0%|          | 18/12331 [00:00<01:10, 173.97it/s]

Dir : swing
this ML model name is svm
power : 1 



now loading_file (location : swing) ... 

make file list complete


100%|██████████| 12331/12331 [01:11<00:00, 172.04it/s]


make total_data finish.....


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.


Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.898, total=  21.5s
[CV] C=0.1, gamm

[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   49.7s


[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.363, total=  50.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.150, total=  55.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.150, total=  55.9s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.150, total=  56.3s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.708, total=  43.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.704, total=  43.1s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.715, total=  43.2s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] .

[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.780, total=  39.8s
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.782, total=  38.7s
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.771, total=  39.5s
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.208, total=  42.6s
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.204, total=  43.0s
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.209, total=  43.1s


[Parallel(n_jobs=-1)]: Done  75 out of  75 | elapsed:  3.0min finished


{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.150, total=  19.4s
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   19.4s remaining:    0.0s


[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.150, total=  19.3s
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   38.7s remaining:    0.0s


[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.150, total=  19.4s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.379, total=  17.4s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.363, total=  17.3s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.371, total=  17.3s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.901, total=   7.3s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.903, total=   7.3s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.898, total=   7.3s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] .

[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.208, total=  19.6s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.204, total=  19.5s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.209, total=  19.6s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.780, total=  17.2s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.771, total=  17.2s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.782, total=  17.3s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.976, total=   4.8s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] .

[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed: 12.8min finished


fitting  svm  is complete...
svm score is : 0.9801378192136198


 21%|██        | 18/87 [00:00<00:00, 171.42it/s]

now loading_file (location : test_data) ... 

make file list complete


100%|██████████| 87/87 [00:00<00:00, 170.95it/s]


make total_data finish.....
svmmodel match complete.....
here is : [ 0.452  0.262 -0.791  0.453  0.25  -0.811  0.393  0.247 -0.857  0.372
  0.258 -0.895  0.36   0.272 -0.882  0.376  0.275 -0.946  0.384  0.259
 -1.015  0.397  0.23  -0.984  0.355  0.189 -0.969  0.275  0.177 -1.029
  0.216  0.161 -1.076  0.246  0.153 -1.091  0.258  0.156 -1.057  0.224
  0.16  -1.096  0.202  0.173 -1.107  0.212  0.244 -1.036  0.199  0.312
 -0.989  0.244  0.413 -0.971  0.32   0.447 -0.914  0.348  0.414 -0.87
  0.421  0.195 -0.908  0.462 -0.093 -1.122  0.571 -0.636 -1.263  0.519
 -1.078 -1.053  0.631 -1.945 -0.694  0.616 -2.818 -0.853  0.683 -4.043
 -1.156  1.411 -4.546 -0.603  3.086 -4.264  0.326  3.361 -3.84   0.327
  4.354 -3.639  0.247  5.494 -2.612  0.436  5.872 -2.543  0.353  6.143
 -2.598  0.662  5.864 -2.596  0.532  4.391 -2.774  0.828  3.746 -3.03
  0.894  3.356 -3.147  0.952  3.13  -3.014  1.118  2.741 -2.769  1.163
  2.117 -2.534  1.163  1.756 -2.386  1.161  1.651 -2.218  1.087  1.756
 -2.083  1.1