# Import Libraries


In [1]:
import numpy as np
import pandas as pd

from seglearn.pipe import Pype
from seglearn.transform import Interp, Segment, patch_sampler, FeatureRep
from seglearn.feature_functions import base_features, all_features, hudgins_features, emg_features
from seglearn.datasets import load_watch

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, FunctionTransformer, PolynomialFeatures
from sklearn.model_selection import GridSearchCV, GroupKFold, KFold, cross_validate 
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, accuracy_score, make_scorer, f1_score, accuracy_score
from sklearn.linear_model import LogisticRegression

from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler

import matplotlib.pyplot as plt

import random
import itertools

from joblib import dump, load
#%matplotlib notebook

# Importand paths

In [2]:
graphs_path = "./Graphs/"

## Loading the Data

In [3]:
df_data = pd.read_csv("./Data/Preprocessed_Data/Data.csv")
df_data.drop(columns=["Unnamed: 0", "index"], inplace=True)

## Select the user

In [4]:
SELECTED_USER_ID = 6
df_data = df_data.loc[df_data["USER_ID"] == SELECTED_USER_ID]
X = df_data[["ACCEL_X", "ACCEL_Y", "ACCEL_Z", "GYRO_X", "GYRO_Y", "GYRO_Z"]].to_numpy()
y = df_data["ACTIVITY_ID"].to_numpy()
X = [X]
y = [y]
del df_data

# from construction it can not cross validate with the whole timeseries
segment = Segment(width=50, overlap=0.5, shuffle=True)
X = segment.fit_transform(X, y)
y = X[1]
X = X[0]

# Defining the features and the pipeline

In [5]:
f1_scorer = make_scorer(f1_score, average="macro")
accuracy_scorer = make_scorer(accuracy_score)

pipeline = Pype([
    ("features", FeatureRep(features = {**base_features(), **emg_features()})),
    ("scaler", RobustScaler(quantile_range=[15.0, 85.0])),
    ("pca", PCA()),
    #("rf", RandomForestClassifier(criterion="gini"))
    ("lr",  LogisticRegression(multi_class='multinomial', max_iter=1000))
], accuracy_scorer, memory=None)



## Hyperparameter tuning

In [6]:
splitter = KFold(n_splits=10) 
cv = splitter.split(X, y)

parameters_grid = {"pca__n_components": [60],
                   "lr__C": [0.1]}

# scoring does not work withing the current framework 
grid_search = GridSearchCV(pipeline, parameters_grid, cv=cv, n_jobs=-1)
grid_search.fit(X, y)

print("GridSearchCV best clf score: %.2f" % (grid_search.best_score_))
print(grid_search.best_estimator_)

GridSearchCV best clf score: 0.95
Pype(scorer=make_scorer(accuracy_score),
     steps=[('features',
             FeatureRep(features={'abs_energy': <function abs_energy at 0x7f0ef68d5af0>,
                                  'emg_var': <function emg_var at 0x7f0ef68b7a60>,
                                  'integrated_emg': <function abs_sum at 0x7f0ef68d5a60>,
                                  'kurt': <function kurt at 0x7f0ef68d5f70>,
                                  'max': <function maximum at 0x7f0ef68d5e50>,
                                  'mean': <function mean at 0x7f0ef68d5790>,
                                  'mean_abs...
                                  'var': <function var at 0x7f0ef68d5c10>,
                                  'waveform_length': <function waveform_length at 0x7f0ef68b7550>,
                                  'willison_amplitude': willison_amplitude(threshold=0),
                                  'zero_crossings': zero_crossing(threshold=0)})),
            

In [7]:
best_estimator = grid_search.best_estimator_

cv = splitter.split(X, y)
cv_scores = cross_validate(best_estimator, X, y, cv=cv, n_jobs=-1)


cv_scores_df = pd.DataFrame(cv_scores)
cv_scores_df.mean()



fit_time      0.324810
score_time    0.008836
test_score    0.948925
dtype: float64

# Dump and Load the model

In [8]:
# Dump the model
#best_estimator = best_estimator.fit(X, y)
#model_path = './Model/classifier.joblib'
#dump(best_estimator, model_path) 

In [9]:
# Load the model
#clf = load(model_path)
#dummy_data = np.random.rand(150, 6)
#dummy_data = [dummy_data]
#pred = clf.predict(dummy_data)
#pred