In [19]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_validate, train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [2]:
df = pd.read_csv('dataset.csv')
X = df.drop(columns=['Activity'])
y = df['Activity']

y = [
    0 if a.startswith('WALKING') or '_' not in a
    else 1
    for a in y
]

metrics = ['f1_weighted', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'accuracy']

## SVM

In [26]:
params = {
    'C':[0.001,0.01,0.1,1,10,100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, train_size=0.7, random_state=42)

grid = GridSearchCV(SVC(probability=True, random_state=42), params, cv=10, n_jobs=-1, scoring='f1_weighted')
grid.fit(X_train, y_train)
grid.predict(X_train)
print(grid.best_params_)

{'C': 100, 'kernel': 'rbf'}


In [27]:
svmClass = SVC(C=100, kernel='rbf', probability=True, random_state=42)
result = cross_validate(svmClass, X, y, cv=10, scoring=metrics,
                        return_train_score=True, n_jobs=-1)

print("\tf1:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
    .format(mean = result.get('test_f1_weighted').mean()*100,
            std = result.get('test_f1_weighted').std()))

print("\tROC AUC OneVsRest:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
            std = result.get('test_roc_auc_ovr_weighted').std()))

print("\tROC AUC OneVsOne:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
            std = result.get('test_roc_auc_ovo_weighted').std()))

print("\tbalanced accuracy:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
    .format(mean = result.get('test_balanced_accuracy').mean()*100,
            std = result.get('test_balanced_accuracy').std()))

print("\taccuracy:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
      .format(mean = result.get('test_accuracy').mean()*100,
              std = result.get('test_accuracy').std()))

	f1:
		mean = 99.70%;
		std = 0.00;
	ROC AUC OneVsRest:
		mean = 99.71%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 99.71%;
		std = 0.01;
	balanced accuracy:
		mean = 97.92%;
		std = 0.02;
	accuracy:
		mean = 99.70%;
		std = 0.00;


## SVM report

In [28]:
svm = SVC(C=100, kernel='rbf', probability=True, random_state=42)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3124
           1       1.00      0.97      0.99       155

    accuracy                           1.00      3279
   macro avg       1.00      0.99      0.99      3279
weighted avg       1.00      1.00      1.00      3279



## Linear Regression

In [31]:
params = {
    'C':[0.001,0.01,0.1,1,10,100],
    'solver': ['liblinear', 'saga'],
    'penalty': ['l1', 'l2']
}

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, train_size=0.7, random_state=42)

grid = GridSearchCV(
    LogisticRegression(max_iter=1500, random_state=42, n_jobs=-1),
    params, cv=10, n_jobs=-1, scoring='f1_weighted')
grid.fit(X_train, y_train)
grid.predict(X_train)
print(grid.best_score_)
print(grid.best_params_)
print('\n\n\n')

params = {
    'C':[0.001,0.01,0.1,1,10,100],
    'solver': ['newton-cg', 'lbfgs', 'sag'],
    'penalty': ['l2']
}

grid = GridSearchCV(
    LogisticRegression(max_iter=1500, random_state=42, n_jobs=-1),
    params, cv=10, n_jobs=-1, scoring='f1_weighted')
grid.fit(X_train, y_train)
grid.predict(X_train)
print(grid.best_score_)
print(grid.best_params_)



0.9968457866483373
{'C': 1, 'penalty': 'l1', 'solver': 'saga'}




0.996587807658277
{'C': 1, 'penalty': 'l2', 'solver': 'sag'}


In [32]:
lr = LogisticRegression(C=1, penalty='l1', solver='saga', max_iter=1500, random_state=42, n_jobs=-1)
result = cross_validate(lr, X, y, cv=10, scoring=metrics,
                        return_train_score=True, n_jobs=-1)

print("\tf1:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
    .format(mean = result.get('test_f1_weighted').mean()*100,
            std = result.get('test_f1_weighted').std()))

print("\tROC AUC OneVsRest:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
            std = result.get('test_roc_auc_ovr_weighted').std()))

print("\tROC AUC OneVsOne:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
            std = result.get('test_roc_auc_ovo_weighted').std()))

print("\tbalanced accuracy:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
    .format(mean = result.get('test_balanced_accuracy').mean()*100,
            std = result.get('test_balanced_accuracy').std()))

print("\taccuracy:")
print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
      .format(mean = result.get('test_accuracy').mean()*100,
              std = result.get('test_accuracy').std()))

	f1:
		mean = 99.56%;
		std = 0.00;
	ROC AUC OneVsRest:
		mean = 99.83%;
		std = 0.00;
	ROC AUC OneVsOne:
		mean = 99.83%;
		std = 0.00;
	balanced accuracy:
		mean = 98.03%;
		std = 0.02;
	accuracy:
		mean = 99.55%;
		std = 0.00;


## LR report

In [34]:
lr = LogisticRegression(C=1, penalty='l1', solver='saga', max_iter=2000, random_state=42, n_jobs=-1)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3124
           1       0.98      0.96      0.97       155

    accuracy                           1.00      3279
   macro avg       0.99      0.98      0.98      3279
weighted avg       1.00      1.00      1.00      3279





## KNN

In [36]:
params = {
    'n_neighbors':[1,3,5,7,9,11,13,15,17,19]
}

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, train_size=0.7, random_state=42)

grid = GridSearchCV(
    KNeighborsClassifier(n_jobs=-1),
    params, cv=10, n_jobs=-1, scoring='f1_weighted')
grid.fit(X_train, y_train)
print(grid.best_score_)
print(grid.best_params_)

0.9968315149466089
{'n_neighbors': 1}


In [37]:
knnClass = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
result = cross_validate(knnClass, X, y, cv=10, scoring=metrics,
                        return_train_score=True, n_jobs=-1)

print("f1:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_f1_weighted').mean()*100,
            std = result.get('test_f1_weighted').std()))

print("ROC AUC OneVsRest:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
            std = result.get('test_roc_auc_ovr_weighted').std()))

print("ROC AUC OneVsOne:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
            std = result.get('test_roc_auc_ovo_weighted').std()))

print("balanced accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_balanced_accuracy').mean()*100,
            std = result.get('test_balanced_accuracy').std()))

print("accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
      .format(mean = result.get('test_accuracy').mean()*100,
              std = result.get('test_accuracy').std()))

f1:
	mean = 99.60%;
	std = 0.00;
ROC AUC OneVsRest:
	mean = 97.23%;
	std = 0.01;
ROC AUC OneVsOne:
	mean = 97.23%;
	std = 0.01;
balanced accuracy:
	mean = 97.23%;
	std = 0.01;
accuracy:
	mean = 99.61%;
	std = 0.00;


## KNN report

In [38]:
knn = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3124
           1       0.99      0.95      0.97       155

    accuracy                           1.00      3279
   macro avg       0.99      0.98      0.98      3279
weighted avg       1.00      1.00      1.00      3279



## Decision Tree

In [39]:
# Search for the best tree depth
params = {
    'max_depth':[1,2,3,4,5,6,7,8,9,10,None]
}

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, train_size=0.7, random_state=42)

grid = GridSearchCV(
    DecisionTreeClassifier(random_state=42),
    params, cv=10, n_jobs=-1, scoring='f1_weighted')
grid.fit(X_train, y_train)
print(grid.best_score_)
print(grid.best_params_)

0.9937552325778014
{'max_depth': 4}


In [40]:
curr = DecisionTreeClassifier(random_state=42, max_depth=4)
result = cross_validate(curr, X, y, cv=10, scoring=metrics,
                        return_train_score=True, n_jobs=-1)

print("f1:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_f1_weighted').mean()*100,
            std = result.get('test_f1_weighted').std()))

print("ROC AUC OneVsRest:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
            std = result.get('test_roc_auc_ovr_weighted').std()))

print("ROC AUC OneVsOne:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
            std = result.get('test_roc_auc_ovo_weighted').std()))

print("balanced accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_balanced_accuracy').mean()*100,
            std = result.get('test_balanced_accuracy').std()))

print("accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
      .format(mean = result.get('test_accuracy').mean()*100,
              std = result.get('test_accuracy').std()))

f1:
	mean = 99.26%;
	std = 0.01;
ROC AUC OneVsRest:
	mean = 93.96%;
	std = 0.04;
ROC AUC OneVsOne:
	mean = 93.96%;
	std = 0.04;
balanced accuracy:
	mean = 94.22%;
	std = 0.05;
accuracy:
	mean = 99.29%;
	std = 0.00;


## Decision Tree Report

In [41]:
dtree = DecisionTreeClassifier(random_state=42, max_depth=4)
dtree.fit(X_train, y_train)
y_pred = dtree.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3124
           1       0.97      0.94      0.95       155

    accuracy                           1.00      3279
   macro avg       0.98      0.97      0.97      3279
weighted avg       1.00      1.00      1.00      3279



## Random Forest

In [42]:
# Search for the best tree depth
params = {
    'max_depth':[8,9,10,11,12,13,14,15,16,17,None]
}

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, train_size=0.7, random_state=42)

grid = GridSearchCV(
    RandomForestClassifier(random_state=42, n_jobs=-1),
    params, cv=10, n_jobs=-1, scoring='f1_weighted')
grid.fit(X_train, y_train)
print(grid.best_score_)
print(grid.best_params_)

0.9958906714445114
{'max_depth': 16}


In [43]:
curr = RandomForestClassifier(random_state=42, max_depth=16)
result = cross_validate(curr, X, y, cv=10, scoring=metrics,
                        return_train_score=True, n_jobs=-1)

print("f1:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_f1_weighted').mean()*100,
            std = result.get('test_f1_weighted').std()))

print("ROC AUC OneVsRest:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
            std = result.get('test_roc_auc_ovr_weighted').std()))

print("ROC AUC OneVsOne:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
            std = result.get('test_roc_auc_ovo_weighted').std()))

print("balanced accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_balanced_accuracy').mean()*100,
            std = result.get('test_balanced_accuracy').std()))

print("accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
      .format(mean = result.get('test_accuracy').mean()*100,
              std = result.get('test_accuracy').std()))

f1:
	mean = 99.53%;
	std = 0.00;
ROC AUC OneVsRest:
	mean = 99.89%;
	std = 0.00;
ROC AUC OneVsOne:
	mean = 99.89%;
	std = 0.00;
balanced accuracy:
	mean = 96.91%;
	std = 0.02;
accuracy:
	mean = 99.53%;
	std = 0.00;


## Random Forest Report

In [44]:
dtree = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=16)
dtree.fit(X_train, y_train)
y_pred = dtree.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3124
           1       0.99      0.94      0.96       155

    accuracy                           1.00      3279
   macro avg       0.99      0.97      0.98      3279
weighted avg       1.00      1.00      1.00      3279



## MLP

In [45]:
params = {
    'hidden_layer_sizes':[50,100,150,200,250,300,350,400,450,500]
}

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, train_size=0.7, random_state=42)

clf = MLPClassifier(activation = 'logistic',
                    solver='adam',
                    alpha=1e-5,
                    learning_rate = 'invscaling',
                    max_iter = 500,
                    random_state=42)
grid = GridSearchCV(clf, params, cv=10, n_jobs=-1, scoring='f1_weighted')
grid.fit(X_train, y_train)
print(grid.best_score_)
print(grid.best_params_)

0.9968489774260789
{'hidden_layer_sizes': 150}


In [46]:
mpl = MLPClassifier(activation = 'logistic',
                solver='adam',
                alpha=1e-5,
                learning_rate = 'invscaling',
                max_iter = 500,
                hidden_layer_sizes=(150,),
                random_state=42)

result = cross_validate(mpl, X, y, cv=10, scoring=metrics,
                        return_train_score=True, n_jobs=-1)

print("f1:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_f1_weighted').mean()*100,
            std = result.get('test_f1_weighted').std()))

print("ROC AUC OneVsRest:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
            std = result.get('test_roc_auc_ovr_weighted').std()))

print("ROC AUC OneVsOne:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
            std = result.get('test_roc_auc_ovo_weighted').std()))

print("balanced accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_balanced_accuracy').mean()*100,
            std = result.get('test_balanced_accuracy').std()))

print("accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
      .format(mean = result.get('test_accuracy').mean()*100,
              std = result.get('test_accuracy').std()))

f1:
	mean = 99.55%;
	std = 0.00;
ROC AUC OneVsRest:
	mean = 99.76%;
	std = 0.00;
ROC AUC OneVsOne:
	mean = 99.76%;
	std = 0.00;
balanced accuracy:
	mean = 97.39%;
	std = 0.02;
accuracy:
	mean = 99.55%;
	std = 0.00;


## MLP Report

In [47]:
mlp = MLPClassifier(activation = 'logistic',
                    solver='adam',
                    alpha=1e-5,
                    learning_rate = 'invscaling',
                    max_iter = 500,
                    hidden_layer_sizes=(150,),
                    random_state=42)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3124
           1       0.99      0.96      0.98       155

    accuracy                           1.00      3279
   macro avg       1.00      0.98      0.99      3279
weighted avg       1.00      1.00      1.00      3279



## Naive Bayes

In [48]:
nb = GaussianNB()

result = cross_validate(nb, X, y, cv=10, scoring=metrics,
                        return_train_score=True, n_jobs=-1)

print("f1:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_f1_weighted').mean()*100,
            std = result.get('test_f1_weighted').std()))

print("ROC AUC OneVsRest:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
            std = result.get('test_roc_auc_ovr_weighted').std()))

print("ROC AUC OneVsOne:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
            std = result.get('test_roc_auc_ovo_weighted').std()))

print("balanced accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
    .format(mean = result.get('test_balanced_accuracy').mean()*100,
            std = result.get('test_balanced_accuracy').std()))

print("accuracy:")
print("\tmean = {mean:.2f}%;\n\tstd = {std:.2f};"
      .format(mean = result.get('test_accuracy').mean()*100,
              std = result.get('test_accuracy').std()))

f1:
	mean = 97.30%;
	std = 0.01;
ROC AUC OneVsRest:
	mean = 98.16%;
	std = 0.01;
ROC AUC OneVsOne:
	mean = 98.16%;
	std = 0.01;
balanced accuracy:
	mean = 97.64%;
	std = 0.01;
accuracy:
	mean = 96.91%;
	std = 0.02;


## Naive Bayes Report

In [49]:
nb = GaussianNB()

nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)


              precision    recall  f1-score   support

           0       1.00      0.98      0.99      3124
           1       0.67      0.99      0.80       155

    accuracy                           0.98      3279
   macro avg       0.84      0.98      0.89      3279
weighted avg       0.98      0.98      0.98      3279

