In [46]:
import pandas as pd
import numpy as np
import pathlib
import matplotlib.pyplot as plt
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
%matplotlib inline

In [3]:
cwd = pathlib.Path().cwd()
data_path = cwd/"dataset"/"final_train.csv"
pd.set_option("display.max_rows", None, "display.max_columns", None)
data = pd.read_csv(data_path, index_col=0)

In [4]:
# drop rows
data_d = data.dropna()

In [10]:
def data_to_train_valid(data):
    y = data.loc[:, "Activity"].copy()
    X = data.drop(["Activity", "subject", "void()"], axis=1)
    #simple split
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=0)
    return X_train, X_valid, y_train, y_valid

In [6]:
def encode_y(y_train, y_valid):
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train)
    y_valid = label_encoder.transform(y_valid)
    return y_train, y_valid, label_encoder

## Split data, encode y labels

In [None]:
X_train, X_valid, y_train, y_valid = data_to_train_valid(data)

In [41]:
y_train, y_valid, label_encoder = encode_y(y_train, y_valid)

In [30]:
numerical_columns = list(X_train.columns.values)

In [72]:
numerical_transformer = SimpleImputer(strategy='median')
scaler = StandardScaler()
preproc = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_columns)
       
    ])

In [73]:
svc = SVC(C=1)
main_pipe = Pipeline(steps=[('preproc', preproc),
                            ('scaler', scaler),
                            ('model', svc)]
                     )

check how scaler works

In [75]:
scaler.fit(X_train)

np.isinf(scaler.transform(X_train)).sum()

0

check how main_pipe works

In [76]:
main_pipe.fit(X_train, y_train);

In [None]:
main_pipe.pred(X_valid)

In [47]:
accuracy_score(y_pred, y_valid)

0.9125338142470695

# Grid search

In [79]:
params_grid = [{'model__kernel': ['rbf'], 'model__gamma': [1e-3, 1e-4],
                     'model__C': [1, 10, 100, 1000]},
                    {'model__kernel': ['linear'], 'model__C': [1, 10, 100, 1000]}]

In [80]:
svc_grid = GridSearchCV(main_pipe, params_grid, cv=5, verbose=2, jobs=-1)

In [81]:
main_pipe.get_params().keys()

dict_keys(['memory', 'steps', 'verbose', 'preproc', 'scaler', 'model', 'preproc__n_jobs', 'preproc__remainder', 'preproc__sparse_threshold', 'preproc__transformer_weights', 'preproc__transformers', 'preproc__verbose', 'preproc__num', 'preproc__num__add_indicator', 'preproc__num__copy', 'preproc__num__fill_value', 'preproc__num__missing_values', 'preproc__num__strategy', 'preproc__num__verbose', 'scaler__copy', 'scaler__with_mean', 'scaler__with_std', 'model__C', 'model__break_ties', 'model__cache_size', 'model__class_weight', 'model__coef0', 'model__decision_function_shape', 'model__degree', 'model__gamma', 'model__kernel', 'model__max_iter', 'model__probability', 'model__random_state', 'model__shrinking', 'model__tol', 'model__verbose'])

In [82]:
svc_grid.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('preproc',
                                        ColumnTransformer(transformers=[('num',
                                                                         SimpleImputer(strategy='median'),
                                                                         ['angle(X,gravityMean)',
                                                                          'angle(Y,gravityMean)',
                                                                          'angle(Z,gravityMean)',
                                                                          'angle(tBodyAccJerkMean),gravityMean)',
                                                                          'angle(tBodyAccMean,gravity)',
                                                                          'angle(tBodyGyroJerkMean,gravityMean)',
                                                                          'angle(tBodyGyroMean,gravityMean)',
      

In [84]:
svc_grid.best_score_

0.9890619088405501

In [98]:
svc_grid.best_params_

{'model__C': 100, 'model__gamma': 0.001, 'model__kernel': 'rbf'}

In [99]:
best_svc = svc_grid.best_estimator_

In [100]:
y_pred = best_svc.predict(X_valid)

In [101]:
accuracy_score(y_pred, y_valid)

0.9873760144274121