In [1]:
import sys
sys.path.append('../')

from feature_extraction.hog_descriptor import *

### Helper Functions

In [2]:
# helper functions for models-comparison
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, auc, confusion_matrix


def get_metrics(y_true, y_pred, print_metrics=True):
    """
    Get accuracy, precision, recall, f1-score, auc, confusion matrix
    """
    # print(y_true)
    # print(y_pred)
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    conf_mat = confusion_matrix(y_true, y_pred)

    if print_metrics:
        print('Accuracy: {:.2f}'.format(acc*100))
        print('Precision: {:.2f}'.format(prec))
        print('Recall: {:.2f}'.format(rec))
        print('F1-score: {:.2f}'.format(f1))
        print('Confusion matrix:')
        print(pd.DataFrame(conf_mat))

    return acc, prec, rec, f1, conf_mat


# take the model and the train and test data and return the metrics
def get_metrics_model(model, X_train, y_train, X_test, y_test, print_metrics=True):
    """
    Get accuracy, precision, recall, f1-score, auc, confusion matrix
    """
    # get model name
    if print_metrics:
       print("training model: {}".format(model.__class__.__name__))
    model.fit(X_train, y_train)
    if print_metrics:
        print("predicting model: {}".format(model.__class__.__name__))
    y_pred = model.predict(X_test)
    if print_metrics:
        print("evaluating model: {}".format(model.__class__.__name__))
    acc, prec, rec, f1, conf_mat = get_metrics(y_test, y_pred, print_metrics)

    if print_metrics:
        print('saving model: {}'.format(model.__class__.__name__))
    
    # # save the trained model
    # model_name = model.__class__.__name__
    # model.save('models/{}.h5'.format(model_name))

    return acc, prec, rec, f1, conf_mat


# compute the metrics for all the models and return a dataframe with the results
def get_metrics_all_models(models, X_train, y_train, X_test, y_test):
    """
    Get accuracy, precision, recall, f1-score, auc, confusion matrix for all models
    """
    metrics = []
    for model in models:
        print('Model: {}'.format(model.__class__.__name__))
        acc, prec, rec, f1, conf_mat = get_metrics_model(model, X_train, y_train, X_test, y_test, print_metrics=False)
        metrics.append([acc, prec, rec, f1, conf_mat])

    df_metrics = pd.DataFrame(metrics, columns=['Accuracy', 'Precision', 'Recall', 'F1-score', 'Confusion matrix'])
    df_metrics.index = [str(model.__class__.__name__) for model in models]

    return df_metrics


In [3]:
# from sklearn.model_selection import train_test_split
# # Define the number of samples and features
# num_samples = 1000
# num_features = 6
# # Create a random feature matrix
# X = np.random.rand(num_samples, num_features)
# # Create corresponding labels 6 classes (0, 1, 2, 3, 4, 5)
# Y = np.random.randint(6, size=num_samples)

# # split the data into training (80%) and testing (20%) sets
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# # check the size of the training and testing sets
# print("Training set size: ", X_train.shape[0])
# print("Testing set size: ", X_test.shape[0])

In [5]:
images, labels = read_images('../../pp_dataset')

In [6]:
# HOG = HogDescriptor()

# features = HOG.builtin_hog_descriptor(images)

# X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# print(len(X_train), len(X_test), len(Y_train), len(Y_test))

In [4]:
# load features
efd_features = np.load('../../features/efd_features.npy')
hog_features_builtin = np.load('../../features/hog_features_builtin.npy')
hog_features_custom = np.load('../../features/hog_features_custom.npy')
hog_efd_features_builtin = np.load('../../features/hog_efd_features_builtin.npy')
hog_efd_features_custom = np.load('../../features/hog_efd_features_custom.npy')
labels = np.load('../../features/labels.npy')

print(efd_features.shape, hog_features_builtin.shape, hog_features_custom.shape, hog_efd_features_builtin.shape, hog_efd_features_custom.shape, labels.shape)


efd_features_cropped = np.load('../../features_cropped/efd_features.npy')
hog_features_builtin_cropped = np.load('../../features_cropped/hog_features_builtin.npy')
hog_features_custom_cropped = np.load('../../features_cropped/hog_features_custom.npy')
hog_efd_features_builtin_cropped = np.load('../../features_cropped/hog_efd_features_builtin.npy')
hog_efd_features_custom_cropped = np.load('../../features_cropped/hog_efd_features_custom.npy')
labels_cropped = np.load('../../features_cropped/labels.npy')

print(efd_features_cropped.shape, hog_features_builtin_cropped.shape, hog_features_custom_cropped.shape, hog_efd_features_builtin_cropped.shape, hog_efd_features_custom_cropped.shape, labels_cropped.shape)

(1821, 37) (1821, 3780) (1821, 3780) (1821, 3817) (1821, 3817) (1821,)
(1821, 37) (1821, 3780) (1821, 3780) (1821, 3817) (1821, 3817) (1821,)


# - Models

In [5]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import  GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier

models = [svm.SVC(kernel='linear'), RandomForestClassifier(n_estimators=1), KNeighborsClassifier(n_neighbors=14),DecisionTreeClassifier(max_depth=3), GaussianNB(),
          LogisticRegression(), GradientBoostingClassifier(n_estimators=100, learning_rate=.1, max_depth=3, random_state=0), AdaBoostClassifier(n_estimators=1000, random_state=0), 
          ExtraTreesClassifier(n_estimators=1, random_state=0), XGBClassifier(n_estimators=1000, learning_rate=0.008, max_depth=3, random_state=0)]


## Linear SVM Model

In [114]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[0], X_train, Y_train, X_test, Y_test)


training model: SVC
predicting model: SVC
evaluating model: SVC
Accuracy: 40.00
Precision: 0.60
Recall: 0.40
F1-score: 0.47
Confusion matrix:
   0  1  2  3  4  5
0  1  0  0  0  0  0
1  0  0  0  0  0  0
2  0  1  0  1  0  0
3  0  0  0  0  0  0
4  0  0  0  0  1  1
5  0  0  0  0  0  0
saving model: SVC


### Random Forest

In [115]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[1], X_train, Y_train, X_test, Y_test) # check n_estimators, max_depth, random_state


training model: RandomForestClassifier
predicting model: RandomForestClassifier
evaluating model: RandomForestClassifier
Accuracy: 20.00
Precision: 0.13
Recall: 0.20
F1-score: 0.16
Confusion matrix:
   0  1  2  3
0  0  0  0  1
1  0  0  0  0
2  0  1  0  1
3  0  0  1  1
saving model: RandomForestClassifier


### KNN

In [116]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[2], X_train, Y_train, X_test, Y_test) # check n_neighbors


training model: KNeighborsClassifier
predicting model: KNeighborsClassifier
evaluating model: KNeighborsClassifier
Accuracy: 20.00
Precision: 0.10
Recall: 0.20
F1-score: 0.13
Confusion matrix:
   0  1  2  3  4
0  1  0  0  0  0
1  0  0  0  0  0
2  1  1  0  0  0
3  0  1  0  0  1
4  0  0  0  0  0
saving model: KNeighborsClassifier


### Decision Tree

In [117]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[3], X_train, Y_train, X_test, Y_test) # check max_depth, random_state


training model: DecisionTreeClassifier
predicting model: DecisionTreeClassifier
evaluating model: DecisionTreeClassifier
Accuracy: 0.00
Precision: 0.00
Recall: 0.00
F1-score: 0.00
Confusion matrix:
   0  1  2  3  4
0  0  0  0  0  1
1  0  0  2  0  0
2  0  0  0  0  0
3  0  0  1  0  1
4  0  0  0  0  0
saving model: DecisionTreeClassifier


### Naive Bayes 

In [118]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[4], X_train, Y_train, X_test, Y_test) 


training model: GaussianNB
predicting model: GaussianNB
evaluating model: GaussianNB
Accuracy: 0.00
Precision: 0.00
Recall: 0.00
F1-score: 0.00
Confusion matrix:
   0  1  2  3  4
0  0  0  1  0  0
1  0  0  2  0  0
2  0  0  0  0  0
3  0  0  0  0  2
4  0  0  0  0  0
saving model: GaussianNB


### Logistic Regression

In [119]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[5], X_train, Y_train, X_test, Y_test)


training model: LogisticRegression
predicting model: LogisticRegression
evaluating model: LogisticRegression
Accuracy: 20.00
Precision: 0.20
Recall: 0.20
F1-score: 0.20
Confusion matrix:
   0  1  2  3  4  5
0  1  0  0  0  0  0
1  0  0  0  0  0  0
2  0  1  0  1  0  0
3  0  0  0  0  0  0
4  0  0  0  0  0  2
5  0  0  0  0  0  0
saving model: LogisticRegression


### Gradient Boosting

In [120]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[6], X_train, Y_train, X_test, Y_test) # check n_estimators, learning_rate, max_depth, random_state


training model: GradientBoostingClassifier
predicting model: GradientBoostingClassifier
evaluating model: GradientBoostingClassifier
Accuracy: 20.00
Precision: 0.10
Recall: 0.20
F1-score: 0.13
Confusion matrix:
   0  1  2  3  4
0  1  0  0  0  0
1  1  0  1  0  0
2  0  0  0  0  0
3  0  0  1  0  1
4  0  0  0  0  0
saving model: GradientBoostingClassifier


### AdaBoost

In [121]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[7], X_train, Y_train, X_test, Y_test) # check n_estimators, random_state


training model: AdaBoostClassifier
predicting model: AdaBoostClassifier
evaluating model: AdaBoostClassifier
Accuracy: 0.00
Precision: 0.00
Recall: 0.00
F1-score: 0.00
Confusion matrix:
   0  1  2  3
0  0  1  0  0
1  0  0  0  0
2  0  2  0  0
3  0  2  0  0
saving model: AdaBoostClassifier


### Extra Trees

In [122]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[8], X_train, Y_train, X_test, Y_test) # check n_estimators, random_state


training model: ExtraTreesClassifier
predicting model: ExtraTreesClassifier
evaluating model: ExtraTreesClassifier
Accuracy: 0.00
Precision: 0.00
Recall: 0.00
F1-score: 0.00
Confusion matrix:
   0  1  2  3  4  5
0  0  0  0  1  0  0
1  0  0  0  0  0  0
2  0  2  0  0  0  0
3  0  0  0  0  0  0
4  0  0  0  0  0  2
5  0  0  0  0  0  0
saving model: ExtraTreesClassifier


### XGBoost

In [123]:
acc, prec, rec, f1, conf_mat = get_metrics_model(models[9], X_train, Y_train, X_test, Y_test) # check n_estimators, learning_rate, max_depth, random_state


training model: XGBClassifier
predicting model: XGBClassifier
evaluating model: XGBClassifier
Accuracy: 20.00
Precision: 0.40
Recall: 0.20
F1-score: 0.27
Confusion matrix:
   0  1  2  3  4
0  0  0  0  0  1
1  0  0  0  0  0
2  0  1  1  0  0
3  0  0  0  0  2
4  0  0  0  0  0
saving model: XGBClassifier


## Models Comparison

### Cropped

In [6]:
from sklearn.model_selection import train_test_split

# divide the data into training (70%) and validation (15%) and testing (15%) sets  #### to be adjusted ####
X_train_hog_custom_cropped, X_test_hog_custom_cropped, Y_train_hog_custom_cropped, Y_test_hog_custom_cropped = train_test_split(hog_features_custom_cropped, labels_cropped, test_size=0.2, random_state=42)
X_train_hog_efd_custom_cropped, X_test_hog_efd_custom_cropped, Y_train_hog_efd_custom_cropped, Y_test_hog_efd_custom_cropped = train_test_split(hog_efd_features_custom_cropped, labels_cropped, test_size=0.2, random_state=42)
X_train_hog_builtin_cropped, X_test_hog_builtin_cropped, Y_train_hog_builtin_cropped, Y_test_hog_builtin_cropped = train_test_split(hog_features_builtin_cropped, labels_cropped, test_size=0.2, random_state=42)
X_train_hog_efd_builtin_cropped, X_test_hog_efd_builtin_cropped, Y_train_hog_efd_builtin_cropped, Y_test_hog_efd_builtin_cropped = train_test_split(hog_efd_features_builtin_cropped, labels_cropped, test_size=0.2, random_state=42)
X_train_efd_cropped, X_test_efd_cropped, Y_train_efd_cropped, Y_test_efd_cropped = train_test_split(efd_features_cropped, labels_cropped, test_size=0.2, random_state=42)

print(X_train_hog_custom_cropped.shape, X_test_hog_custom_cropped.shape, Y_train_hog_custom_cropped.shape, Y_test_hog_custom_cropped.shape)
print(X_train_hog_efd_custom_cropped.shape, X_test_hog_efd_custom_cropped.shape, Y_train_hog_efd_custom_cropped.shape, Y_test_hog_efd_custom_cropped.shape)
print(X_train_hog_builtin_cropped.shape, X_test_hog_builtin_cropped.shape, Y_train_hog_builtin_cropped.shape, Y_test_hog_builtin_cropped.shape)
print(X_train_hog_efd_builtin_cropped.shape, X_test_hog_efd_builtin_cropped.shape, Y_train_hog_efd_builtin_cropped.shape, Y_test_hog_efd_builtin_cropped.shape)
print(X_train_efd_cropped.shape, X_test_efd_cropped.shape, Y_train_efd_cropped.shape, Y_test_efd_cropped.shape)



(1456, 3780) (365, 3780) (1456,) (365,)
(1456, 3817) (365, 3817) (1456,) (365,)
(1456, 3780) (365, 3780) (1456,) (365,)
(1456, 3817) (365, 3817) (1456,) (365,)
(1456, 37) (365, 37) (1456,) (365,)


In [8]:
df_metrics_hog_custom_cropped = get_metrics_all_models(models, X_train_hog_custom_cropped, Y_train_hog_custom_cropped, X_test_hog_custom_cropped, Y_test_hog_custom_cropped)

df_metrics_hog_custom_cropped.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.720548,0.717885,0.720548,0.718738
RandomForestClassifier,0.40274,0.410316,0.40274,0.404146
KNeighborsClassifier,0.70137,0.698422,0.70137,0.696403
DecisionTreeClassifier,0.408219,0.412439,0.408219,0.379269
GaussianNB,0.608219,0.613884,0.608219,0.608003
LogisticRegression,0.712329,0.712516,0.712329,0.711387
GradientBoostingClassifier,0.630137,0.65273,0.630137,0.639238
AdaBoostClassifier,0.479452,0.51774,0.479452,0.491194
ExtraTreesClassifier,0.493151,0.498649,0.493151,0.494673
XGBClassifier,0.679452,0.693006,0.679452,0.685109


In [9]:
df_metrics_hog_efd_custom_cropped = get_metrics_all_models(models, X_train_hog_efd_custom_cropped, Y_train_hog_efd_custom_cropped, X_test_hog_efd_custom_cropped, Y_test_hog_efd_custom_cropped)

df_metrics_hog_efd_custom_cropped.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.734247,0.731913,0.734247,0.732457
RandomForestClassifier,0.430137,0.437212,0.430137,0.430222
KNeighborsClassifier,0.70137,0.69939,0.70137,0.696421
DecisionTreeClassifier,0.50411,0.562918,0.50411,0.510682
GaussianNB,0.608219,0.613884,0.608219,0.608003
LogisticRegression,0.717808,0.715877,0.717808,0.715925
GradientBoostingClassifier,0.720548,0.723128,0.720548,0.721318
AdaBoostClassifier,0.526027,0.574538,0.526027,0.542731
ExtraTreesClassifier,0.438356,0.446436,0.438356,0.44012
XGBClassifier,0.739726,0.734463,0.739726,0.736749


### Not Cropped

In [32]:
from sklearn.model_selection import train_test_split

# divide the data into training (70%) and validation (15%) and testing (15%) sets  #### to be adjusted ####
X_train_hog_custom, X_test_hog_custom, Y_train_hog_custom, Y_test_hog_custom = train_test_split(hog_features_custom, labels, test_size=0.2, random_state=42)
X_train_hog_efd_custom, X_test_hog_efd_custom, Y_train_hog_efd_custom, Y_test_hog_efd_custom = train_test_split(hog_efd_features_custom, labels, test_size=0.2, random_state=42)
X_train_hog_builtin, X_test_hog_builtin, Y_train_hog_builtin, Y_test_hog_builtin = train_test_split(hog_features_builtin, labels, test_size=0.2, random_state=42)
X_train_hog_efd_builtin, X_test_hog_efd_builtin, Y_train_hog_efd_builtin, Y_test_hog_efd_builtin = train_test_split(hog_efd_features_builtin, labels, test_size=0.2, random_state=42)
X_train_efd, X_test_efd, Y_train_efd, Y_test_efd = train_test_split(efd_features, labels, test_size=0.2, random_state=42)


print(X_train_hog_custom.shape, X_test_hog_custom.shape, Y_train_hog_custom.shape, Y_test_hog_custom.shape)
print(X_train_hog_efd_custom.shape, X_test_hog_efd_custom.shape, Y_train_hog_efd_custom.shape, Y_test_hog_efd_custom.shape)
print(X_train_hog_builtin.shape, X_test_hog_builtin.shape, Y_train_hog_builtin.shape, Y_test_hog_builtin.shape)
print(X_train_hog_efd_builtin.shape, X_test_hog_efd_builtin.shape, Y_train_hog_efd_builtin.shape, Y_test_hog_efd_builtin.shape)
print(X_train_efd.shape, X_test_efd.shape, Y_train_efd.shape, Y_test_efd.shape)


(1456, 3780) (365, 3780) (1456,) (365,)
(1456, 3817) (365, 3817) (1456,) (365,)
(1456, 3780) (365, 3780) (1456,) (365,)
(1456, 3817) (365, 3817) (1456,) (365,)
(1456, 37) (365, 37) (1456,) (365,)


In [33]:
df_metrics_hog_custom  = get_metrics_all_models(models, X_train_hog_custom, Y_train_hog_custom, X_test_hog_custom, Y_test_hog_custom)

df_metrics_hog_custom.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.742466,0.744294,0.742466,0.74156
RandomForestClassifier,0.484932,0.497067,0.484932,0.485925
KNeighborsClassifier,0.657534,0.643243,0.657534,0.647723
DecisionTreeClassifier,0.326027,0.308028,0.326027,0.270413
GaussianNB,0.613699,0.62707,0.613699,0.618532
LogisticRegression,0.756164,0.759463,0.756164,0.756626
GradientBoostingClassifier,0.676712,0.689317,0.676712,0.680584
AdaBoostClassifier,0.567123,0.618068,0.567123,0.582957
ExtraTreesClassifier,0.468493,0.466713,0.468493,0.464698
XGBClassifier,0.70411,0.713435,0.70411,0.705786


In [34]:
df_metrics_hog_efd_custom  = get_metrics_all_models(models, X_train_hog_efd_custom, Y_train_hog_efd_custom, X_test_hog_efd_custom, Y_test_hog_efd_custom)

df_metrics_hog_efd_custom.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.739726,0.742912,0.739726,0.739474
RandomForestClassifier,0.517808,0.51899,0.517808,0.517272
KNeighborsClassifier,0.652055,0.637043,0.652055,0.642112
DecisionTreeClassifier,0.454795,0.466065,0.454795,0.419833
GaussianNB,0.610959,0.624981,0.610959,0.615926
LogisticRegression,0.747945,0.750627,0.747945,0.747983
GradientBoostingClassifier,0.750685,0.759338,0.750685,0.753736
AdaBoostClassifier,0.526027,0.560175,0.526027,0.537212
ExtraTreesClassifier,0.517808,0.514448,0.517808,0.513447
XGBClassifier,0.79726,0.799781,0.79726,0.797539


In [35]:
df_metrics_hog_builtin  = get_metrics_all_models(models, X_train_hog_builtin, Y_train_hog_builtin, X_test_hog_builtin, Y_test_hog_builtin)

df_metrics_hog_builtin.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.723288,0.715564,0.723288,0.718315
RandomForestClassifier,0.476712,0.464227,0.476712,0.467143
KNeighborsClassifier,0.734247,0.724763,0.734247,0.725613
DecisionTreeClassifier,0.419178,0.439109,0.419178,0.387588
GaussianNB,0.663014,0.672435,0.663014,0.661517
LogisticRegression,0.739726,0.739514,0.739726,0.738863
GradientBoostingClassifier,0.693151,0.696936,0.693151,0.694054
AdaBoostClassifier,0.465753,0.482504,0.465753,0.470718
ExtraTreesClassifier,0.482192,0.484391,0.482192,0.482379
XGBClassifier,0.753425,0.755649,0.753425,0.754081


In [36]:
df_metrics_hog_efd_builtin  = get_metrics_all_models(models, X_train_hog_efd_builtin, Y_train_hog_efd_builtin, X_test_hog_efd_builtin, Y_test_hog_efd_builtin)

df_metrics_hog_efd_builtin.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.720548,0.715483,0.720548,0.7167
RandomForestClassifier,0.515068,0.51633,0.515068,0.515427
KNeighborsClassifier,0.731507,0.720299,0.731507,0.721951
DecisionTreeClassifier,0.452055,0.462006,0.452055,0.423772
GaussianNB,0.665753,0.675819,0.665753,0.66384
LogisticRegression,0.720548,0.723662,0.720548,0.720468
GradientBoostingClassifier,0.750685,0.753114,0.750685,0.751579
AdaBoostClassifier,0.536986,0.591654,0.536986,0.557001
ExtraTreesClassifier,0.545205,0.538941,0.545205,0.539878
XGBClassifier,0.767123,0.769509,0.767123,0.768023


In [37]:
df_metrics_efd  = get_metrics_all_models(models, X_train_efd, Y_train_efd, X_test_efd, Y_test_efd)

df_metrics_efd.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.509589,0.502797,0.509589,0.490276
RandomForestClassifier,0.515068,0.531022,0.515068,0.520426
KNeighborsClassifier,0.69863,0.707069,0.69863,0.696845
DecisionTreeClassifier,0.454795,0.492417,0.454795,0.443449
GaussianNB,0.567123,0.544293,0.567123,0.550859
LogisticRegression,0.512329,0.48824,0.512329,0.489266
GradientBoostingClassifier,0.723288,0.723513,0.723288,0.723188
AdaBoostClassifier,0.556164,0.598774,0.556164,0.569093
ExtraTreesClassifier,0.49589,0.498294,0.49589,0.496624
XGBClassifier,0.728767,0.729509,0.728767,0.728298


### Try some different parameters

In [10]:
from sklearn.model_selection import train_test_split

# load efd features from npy file
efd_features_5 = np.load('../../efd_features/efd_features_5.npy')
efd_features_15 = np.load('../../efd_features/efd_features_15.npy')
efd_features_20 = np.load('../../efd_features/efd_features_20.npy')
efd_features_25 = np.load('../../efd_features/efd_features_25.npy')
efd_features_50 = np.load('../../efd_features/efd_features_50.npy')

# split the data into training (70%) and validation (15%) and testing (15%) sets  #### to be adjusted ####
X_train_efd_5, X_test_efd_5, Y_train_efd_5, Y_test_efd_5 = train_test_split(efd_features_5, labels, test_size=0.2, random_state=42)
X_train_efd_15, X_test_efd_15, Y_train_efd_15, Y_test_efd_15 = train_test_split(efd_features_15, labels, test_size=0.2, random_state=42)
X_train_efd_20, X_test_efd_20, Y_train_efd_20, Y_test_efd_20 = train_test_split(efd_features_20, labels, test_size=0.2, random_state=42)
X_train_efd_25, X_test_efd_25, Y_train_efd_25, Y_test_efd_25 = train_test_split(efd_features_25, labels, test_size=0.2, random_state=42)
X_train_efd_50, X_test_efd_50, Y_train_efd_50, Y_test_efd_50 = train_test_split(efd_features_50, labels, test_size=0.2, random_state=42)

In [11]:
# get metrics for all models
df_metrics_efd_5  = get_metrics_all_models(models, X_train_efd_5, Y_train_efd_5, X_test_efd_5, Y_test_efd_5)

df_metrics_efd_5.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.49863,0.488433,0.49863,0.477272
RandomForestClassifier,0.517808,0.529667,0.517808,0.520094
KNeighborsClassifier,0.693151,0.698056,0.693151,0.685989
DecisionTreeClassifier,0.457534,0.479923,0.457534,0.445922
GaussianNB,0.536986,0.492605,0.536986,0.503894
LogisticRegression,0.49863,0.480336,0.49863,0.474142
GradientBoostingClassifier,0.69863,0.696675,0.69863,0.696486
AdaBoostClassifier,0.550685,0.557154,0.550685,0.547627
ExtraTreesClassifier,0.534247,0.534373,0.534247,0.533714
XGBClassifier,0.69863,0.691917,0.69863,0.693227


In [12]:
df_metrics_efd_15  = get_metrics_all_models(models, X_train_efd_15, Y_train_efd_15, X_test_efd_15, Y_test_efd_15)

df_metrics_efd_15.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.493151,0.483176,0.493151,0.472709
RandomForestClassifier,0.389041,0.380151,0.389041,0.383786
KNeighborsClassifier,0.690411,0.695737,0.690411,0.684335
DecisionTreeClassifier,0.452055,0.477084,0.452055,0.451812
GaussianNB,0.578082,0.575013,0.578082,0.564469
LogisticRegression,0.49863,0.478208,0.49863,0.472669
GradientBoostingClassifier,0.709589,0.714562,0.709589,0.710554
AdaBoostClassifier,0.567123,0.605225,0.567123,0.579653
ExtraTreesClassifier,0.463014,0.464212,0.463014,0.462018
XGBClassifier,0.717808,0.715269,0.717808,0.716284


In [13]:
df_metrics_efd_20  = get_metrics_all_models(models, X_train_efd_20, Y_train_efd_20, X_test_efd_20, Y_test_efd_20)

df_metrics_efd_20.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.493151,0.483176,0.493151,0.472709
RandomForestClassifier,0.369863,0.375913,0.369863,0.370782
KNeighborsClassifier,0.687671,0.692344,0.687671,0.681179
DecisionTreeClassifier,0.452055,0.477084,0.452055,0.451812
GaussianNB,0.534247,0.529336,0.534247,0.514742
LogisticRegression,0.49863,0.478208,0.49863,0.472669
GradientBoostingClassifier,0.69589,0.696752,0.69589,0.694865
AdaBoostClassifier,0.575342,0.617241,0.575342,0.589797
ExtraTreesClassifier,0.361644,0.365523,0.361644,0.363018
XGBClassifier,0.712329,0.709556,0.712329,0.710345


In [14]:
df_metrics_efd_25  = get_metrics_all_models(models, X_train_efd_25, Y_train_efd_25, X_test_efd_25, Y_test_efd_25)

df_metrics_efd_25.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.490411,0.48073,0.490411,0.470165
RandomForestClassifier,0.438356,0.436548,0.438356,0.43445
KNeighborsClassifier,0.690411,0.694931,0.690411,0.683752
DecisionTreeClassifier,0.452055,0.477084,0.452055,0.451812
GaussianNB,0.49589,0.489465,0.49589,0.46416
LogisticRegression,0.49863,0.478208,0.49863,0.472669
GradientBoostingClassifier,0.720548,0.722867,0.720548,0.720934
AdaBoostClassifier,0.520548,0.571816,0.520548,0.536486
ExtraTreesClassifier,0.452055,0.450143,0.452055,0.448665
XGBClassifier,0.712329,0.711349,0.712329,0.711752


In [15]:
df_metrics_efd_50  = get_metrics_all_models(models, X_train_efd_50, Y_train_efd_50, X_test_efd_50, Y_test_efd_50)

df_metrics_efd_50.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.490411,0.48073,0.490411,0.470165
RandomForestClassifier,0.394521,0.404765,0.394521,0.398069
KNeighborsClassifier,0.690411,0.694931,0.690411,0.683752
DecisionTreeClassifier,0.452055,0.477084,0.452055,0.451812
GaussianNB,0.389041,0.369113,0.389041,0.337177
LogisticRegression,0.49863,0.478208,0.49863,0.472669
GradientBoostingClassifier,0.715068,0.719796,0.715068,0.716473
AdaBoostClassifier,0.531507,0.57403,0.531507,0.544246
ExtraTreesClassifier,0.386301,0.388421,0.386301,0.38543
XGBClassifier,0.682192,0.680178,0.682192,0.680357
