In [2]:
import sys
sys.path.append('../')

from feature_extraction.hog_descriptor import *

### Helper Functions

In [3]:
# helper functions for models-comparison
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, auc, confusion_matrix


def get_metrics(y_true, y_pred, print_metrics=True):
    """
    Get accuracy, precision, recall, f1-score, auc, confusion matrix
    """
    # print(y_true)
    # print(y_pred)
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    conf_mat = confusion_matrix(y_true, y_pred)

    if print_metrics:
        print('Accuracy: {:.2f}'.format(acc*100))
        print('Precision: {:.2f}'.format(prec))
        print('Recall: {:.2f}'.format(rec))
        print('F1-score: {:.2f}'.format(f1))
        print('Confusion matrix:')
        print(pd.DataFrame(conf_mat))

    return acc, prec, rec, f1, conf_mat


# take the model and the train and test data and return the metrics
def get_metrics_model(model, X_train, y_train, X_test, y_test, print_metrics=True):
    """
    Get accuracy, precision, recall, f1-score, auc, confusion matrix
    """
    # get model name
    if print_metrics:
       print("training model: {}".format(model.__class__.__name__))
    model.fit(X_train, y_train)
    if print_metrics:
        print("predicting model: {}".format(model.__class__.__name__))
    y_pred = model.predict(X_test)
    if print_metrics:
        print("evaluating model: {}".format(model.__class__.__name__))
    acc, prec, rec, f1, conf_mat = get_metrics(y_test, y_pred, print_metrics)

    if print_metrics:
        print('saving model: {}'.format(model.__class__.__name__))
    
    # # save the trained model
    # model_name = model.__class__.__name__
    # model.save('models/{}.h5'.format(model_name))

    return acc, prec, rec, f1, conf_mat


# compute the metrics for all the models and return a dataframe with the results
def get_metrics_all_models(models, X_train, y_train, X_test, y_test):
    """
    Get accuracy, precision, recall, f1-score, auc, confusion matrix for all models
    """
    metrics = []
    for model in models:
        print('Model: {}'.format(model.__class__.__name__))
        acc, prec, rec, f1, conf_mat = get_metrics_model(model, X_train, y_train, X_test, y_test, print_metrics=False)
        metrics.append([acc, prec, rec, f1, conf_mat])

    df_metrics = pd.DataFrame(metrics, columns=['Accuracy', 'Precision', 'Recall', 'F1-score', 'Confusion matrix'])
    df_metrics.index = [str(model.__class__.__name__) for model in models]

    return df_metrics


In [4]:
# from sklearn.model_selection import train_test_split
# # Define the number of samples and features
# num_samples = 1000
# num_features = 6
# # Create a random feature matrix
# X = np.random.rand(num_samples, num_features)
# # Create corresponding labels 6 classes (0, 1, 2, 3, 4, 5)
# Y = np.random.randint(6, size=num_samples)

# # split the data into training (80%) and testing (20%) sets
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# # check the size of the training and testing sets
# print("Training set size: ", X_train.shape[0])
# print("Testing set size: ", X_test.shape[0])

In [5]:
images, labels = read_images('../../pp_dataset')

In [6]:
# HOG = HogDescriptor()

# features = HOG.builtin_hog_descriptor(images)

# X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# print(len(X_train), len(X_test), len(Y_train), len(Y_test))

In [7]:
# load features
efd_features = np.load('../../features/efd_features.npy')
hog_features_builtin = np.load('../../features/hog_features_builtin.npy')
hog_features_custom = np.load('../../features/hog_features_custom.npy')
hog_efd_features_builtin = np.load('../../features/hog_efd_features_builtin.npy')
hog_efd_features_custom = np.load('../../features/hog_efd_features_custom.npy')
labels = np.load('../../features/labels.npy')

print(efd_features.shape, hog_features_builtin.shape, hog_features_custom.shape, hog_efd_features_builtin.shape, hog_efd_features_custom.shape, labels.shape)

(1821, 37) (1821, 3780) (1821, 3780) (1821, 3817) (1821, 3817) (1821,)


# - Models

## Models parametars' tuning

In [8]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import  GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier


models = [svm.SVC(C=0.1, gamma=0.1, kernel='poly'), RandomForestClassifier(max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=100),
          KNeighborsClassifier(n_neighbors=1), DecisionTreeClassifier(criterion='gini', max_depth=9 min_samples_leaf=1), GaussianNB(),
          LogisticRegression(), GradientBoostingClassifier(n_estimators=100, learning_rate=.1, max_depth=3, random_state=0), AdaBoostClassifier(n_estimators=1000, random_state=0), 
          ExtraTreesClassifier(n_estimators=1, random_state=0), XGBClassifier(n_estimators=1000, learning_rate=0.008, max_depth=3, random_state=0)]


In [12]:
from sklearn.model_selection import train_test_split

# divide the data into training (70%) and validation (15%) and testing (15%) sets  #### to be adjusted ####
X_train_val_hog_custom, X_test_hog_custom, Y_train_val_hog_custom, Y_test_hog_custom = train_test_split(hog_features_custom, labels, test_size=0.15, random_state=42)
X_train_val_hog_builtin, X_test_hog_builtin, Y_train_val_hog_builtin, Y_test_hog_builtin = train_test_split(hog_features_builtin, labels, test_size=0.15, random_state=42)
X_train_val_efd, X_test_efd, Y_train_val_efd, Y_test_efd = train_test_split(efd_features, labels, test_size=0.15, random_state=42)
X_train_val_hog_efd_custom, X_test_hog_efd_custom, Y_train_val_hog_efd_custom, Y_test_hog_efd_custom = train_test_split(hog_efd_features_custom, labels, test_size=0.15, random_state=42)
X_train_val_hog_efd_builtin, X_test_hog_efd_builtin, Y_train_val_hog_efd_builtin, Y_test_hog_efd_builtin = train_test_split(hog_efd_features_builtin, labels, test_size=0.15, random_state=42)

X_train_hog_custom, X_val_hog_custom, Y_train_hog_custom, Y_val_hog_custom = train_test_split(X_train_val_hog_custom, Y_train_val_hog_custom, test_size=0.1765, random_state=42)
X_train_hog_builtin, X_val_hog_builtin, Y_train_hog_builtin, Y_val_hog_builtin = train_test_split(X_train_val_hog_builtin, Y_train_val_hog_builtin, test_size=0.1765, random_state=42)
X_train_efd, X_val_efd, Y_train_efd, Y_val_efd = train_test_split(X_train_val_efd, Y_train_val_efd, test_size=0.1765, random_state=42)
X_train_hog_efd_custom, X_val_hog_efd_custom, Y_train_hog_efd_custom, Y_val_hog_efd_custom = train_test_split(X_train_val_hog_efd_custom, Y_train_val_hog_efd_custom, test_size=0.1765, random_state=42)
X_train_hog_efd_builtin, X_val_hog_efd_builtin, Y_train_hog_efd_builtin, Y_val_hog_efd_builtin = train_test_split(X_train_val_hog_efd_builtin, Y_train_val_hog_efd_builtin, test_size=0.1765, random_state=42)

print(len(X_train_hog_custom), len(X_val_hog_custom), len(X_test_hog_custom), len(Y_train_hog_custom), len(Y_val_hog_custom), len(Y_test_hog_custom))
print(len(X_train_hog_builtin), len(X_val_hog_builtin), len(X_test_hog_builtin), len(Y_train_hog_builtin), len(Y_val_hog_builtin), len(Y_test_hog_builtin))
print(len(X_train_efd), len(X_val_efd), len(X_test_efd), len(Y_train_efd), len(Y_val_efd), len(Y_test_efd))
print(len(X_train_hog_efd_custom), len(X_val_hog_efd_custom), len(X_test_hog_efd_custom), len(Y_train_hog_efd_custom), len(Y_val_hog_efd_custom), len(Y_test_hog_efd_custom))
print(len(X_train_hog_efd_builtin), len(X_val_hog_efd_builtin), len(X_test_hog_efd_builtin), len(Y_train_hog_efd_builtin), len(Y_val_hog_efd_builtin), len(Y_test_hog_efd_builtin))

1273 274 274 1273 274 274
1273 274 274 1273 274 274
1273 274 274 1273 274 274
1273 274 274 1273 274 274
1273 274 274 1273 274 274


### SVM Model

#### Tuning

In [13]:
from sklearn.model_selection import GridSearchCV

# Create an SVM model
svm_model = models[0]

# Define the parameter grid
param_grid = {'C': [0.1, 1, 10], 'gamma': [0.1, 1, 10], 'kernel': ['rbf', 'poly', 'linear']}

# Perform grid search with 5-fold cross-validation
grid_search = GridSearchCV(svm_model, param_grid, cv=5)
print("cross validation finished ...")
# Fit the grid search to the training data
grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...
fitting finished ...
Best parameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}
Best score: 0.76279141577891


In [17]:
# get the best model
best_svm = grid_search.best_estimator_
best_svm.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_svm.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_svm.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 1.0
Test accuracy: 0.791970802919708


### Random Forest

In [18]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [1, 50, 100],
    'max_depth': [5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
rf = models[1]

grid_search = GridSearchCV(rf, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...
fitting finished ...
Best parameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best score: 0.7384529874942103


In [19]:
# get the best model
best_rf = grid_search.best_estimator_
best_rf.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_rf.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_rf.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 1.0
Test accuracy: 0.7372262773722628


### KNN

In [29]:
param_grid = {'n_neighbors': [1, 3, 5, 7, 9, 11, 13, 14, 15]}

knn = models[2]

grid_search = GridSearchCV(knn, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...
fitting finished ...
Best parameters: {'n_neighbors': 1}
Best score: 0.6716720704029644


In [30]:
# get the best model
best_knn = grid_search.best_estimator_
best_knn.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_knn.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_knn.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 1.0
Test accuracy: 0.718978102189781


### Decision Tree

In [31]:
param_grid = {
    'max_depth': [3, 5, 7, 9],
    'min_samples_leaf': [1, 5, 10],
    'criterion': ['gini', 'entropy']
}

dt = models[3]

grid_search = GridSearchCV(dt, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...
fitting finished ...
Best parameters: {'criterion': 'gini', 'max_depth': 9, 'min_samples_leaf': 1}
Best score: 0.49491122433225254


In [32]:
# get the best model
best_dt = grid_search.best_estimator_
best_dt.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_dt.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_dt.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 0.6605839416058394
Test accuracy: 0.5182481751824818


### Naive Bayes 

In [37]:
# get the best model
nb = models[4]
nb.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = nb.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = nb.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 0.8613138686131386
Test accuracy: 0.5985401459854015


### Logistic Regression

In [48]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

grid_params = param_grid = {'penalty': ['none', 'l1', 'l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 'max_iter': [100, 1000, 2500]}
lr = models[5]

grid_search = GridSearchCV(lr, param_grid, cv=5)
print("cross validation finished ...")

scaler = StandardScaler()
X_train_hog_custom_norm = scaler.fit_transform(X_train_hog_custom)
X_train_val_hog_custom_norm = scaler.transform(X_train_val_hog_custom)
X_val_hog_custom_norm = scaler.transform(X_val_hog_custom)
X_test_hog_custom_norm = scaler.transform(X_test_hog_custom)

grid_search.fit(X_train_hog_custom_norm, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...




In [None]:
# get the best model
best_lr = grid_search.best_estimator_
best_lr.fit(X_train_val_hog_custom_norm, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_lr.predict(X_val_hog_custom_norm)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_lr.predict(X_test_hog_custom_norm)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 1.0
Test accuracy: 0.7408759124087592


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Gradient Boosting

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {'n_estimators': [100, 200, 300], 'learning_rate': [0.1, 0.05, 0.01], 'max_depth': [3, 5, 7]}
gb = models[6]

grid_search = GridSearchCV(gb, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

In [None]:
# get the best model
best_gb = grid_search.best_estimator_
best_gb.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_gb.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_gb.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

### AdaBoost

In [None]:
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 1.0]
}

ada = models[7]

grid_search = GridSearchCV(ada, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

training model: AdaBoostClassifier
predicting model: AdaBoostClassifier
evaluating model: AdaBoostClassifier
Accuracy: 0.00
Precision: 0.00
Recall: 0.00
F1-score: 0.00
Confusion matrix:
   0  1  2  3
0  0  1  0  0
1  0  0  0  0
2  0  2  0  0
3  0  2  0  0
saving model: AdaBoostClassifier


In [None]:
# get the best model
best_ada = grid_search.best_estimator_
best_ada.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_ada.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_ada.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

### Extra Trees

In [122]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'criterion': ['gini', 'entropy']
}

et = models[8]

grid_search = GridSearchCV(et, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

training model: ExtraTreesClassifier
predicting model: ExtraTreesClassifier
evaluating model: ExtraTreesClassifier
Accuracy: 0.00
Precision: 0.00
Recall: 0.00
F1-score: 0.00
Confusion matrix:
   0  1  2  3  4  5
0  0  0  0  1  0  0
1  0  0  0  0  0  0
2  0  2  0  0  0  0
3  0  0  0  0  0  0
4  0  0  0  0  0  2
5  0  0  0  0  0  0
saving model: ExtraTreesClassifier


In [None]:
# get the best model
best_et = grid_search.best_estimator_
best_et.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_et.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_et.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

### XGBoost

In [123]:
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.1, 0.01, 0.001],
    'n_estimators': [100, 200, 300],
    'subsample': [0.8, 1.0],
}
xgb = models[9]

grid_search = GridSearchCV(xgb, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

training model: XGBClassifier
predicting model: XGBClassifier
evaluating model: XGBClassifier
Accuracy: 20.00
Precision: 0.40
Recall: 0.20
F1-score: 0.27
Confusion matrix:
   0  1  2  3  4
0  0  0  0  0  1
1  0  0  0  0  0
2  0  1  1  0  0
3  0  0  0  0  2
4  0  0  0  0  0
saving model: XGBClassifier


In [None]:
# get the best model
best_xgb = grid_search.best_estimator_
best_xgb.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_xgb.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_xgb.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

## Models Comparison

In [33]:
df_metrics_hog_custom  = get_metrics_all_models(models, X_train_hog_custom, Y_train_hog_custom, X_test_hog_custom, Y_test_hog_custom)

df_metrics_hog_custom.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.742466,0.744294,0.742466,0.74156
RandomForestClassifier,0.484932,0.497067,0.484932,0.485925
KNeighborsClassifier,0.657534,0.643243,0.657534,0.647723
DecisionTreeClassifier,0.326027,0.308028,0.326027,0.270413
GaussianNB,0.613699,0.62707,0.613699,0.618532
LogisticRegression,0.756164,0.759463,0.756164,0.756626
GradientBoostingClassifier,0.676712,0.689317,0.676712,0.680584
AdaBoostClassifier,0.567123,0.618068,0.567123,0.582957
ExtraTreesClassifier,0.468493,0.466713,0.468493,0.464698
XGBClassifier,0.70411,0.713435,0.70411,0.705786


In [34]:
df_metrics_hog_efd_custom  = get_metrics_all_models(models, X_train_hog_efd_custom, Y_train_hog_efd_custom, X_test_hog_efd_custom, Y_test_hog_efd_custom)

df_metrics_hog_efd_custom.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.739726,0.742912,0.739726,0.739474
RandomForestClassifier,0.517808,0.51899,0.517808,0.517272
KNeighborsClassifier,0.652055,0.637043,0.652055,0.642112
DecisionTreeClassifier,0.454795,0.466065,0.454795,0.419833
GaussianNB,0.610959,0.624981,0.610959,0.615926
LogisticRegression,0.747945,0.750627,0.747945,0.747983
GradientBoostingClassifier,0.750685,0.759338,0.750685,0.753736
AdaBoostClassifier,0.526027,0.560175,0.526027,0.537212
ExtraTreesClassifier,0.517808,0.514448,0.517808,0.513447
XGBClassifier,0.79726,0.799781,0.79726,0.797539


In [35]:
df_metrics_hog_builtin  = get_metrics_all_models(models, X_train_hog_builtin, Y_train_hog_builtin, X_test_hog_builtin, Y_test_hog_builtin)

df_metrics_hog_builtin.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.723288,0.715564,0.723288,0.718315
RandomForestClassifier,0.476712,0.464227,0.476712,0.467143
KNeighborsClassifier,0.734247,0.724763,0.734247,0.725613
DecisionTreeClassifier,0.419178,0.439109,0.419178,0.387588
GaussianNB,0.663014,0.672435,0.663014,0.661517
LogisticRegression,0.739726,0.739514,0.739726,0.738863
GradientBoostingClassifier,0.693151,0.696936,0.693151,0.694054
AdaBoostClassifier,0.465753,0.482504,0.465753,0.470718
ExtraTreesClassifier,0.482192,0.484391,0.482192,0.482379
XGBClassifier,0.753425,0.755649,0.753425,0.754081


In [36]:
df_metrics_hog_efd_builtin  = get_metrics_all_models(models, X_train_hog_efd_builtin, Y_train_hog_efd_builtin, X_test_hog_efd_builtin, Y_test_hog_efd_builtin)

df_metrics_hog_efd_builtin.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.720548,0.715483,0.720548,0.7167
RandomForestClassifier,0.515068,0.51633,0.515068,0.515427
KNeighborsClassifier,0.731507,0.720299,0.731507,0.721951
DecisionTreeClassifier,0.452055,0.462006,0.452055,0.423772
GaussianNB,0.665753,0.675819,0.665753,0.66384
LogisticRegression,0.720548,0.723662,0.720548,0.720468
GradientBoostingClassifier,0.750685,0.753114,0.750685,0.751579
AdaBoostClassifier,0.536986,0.591654,0.536986,0.557001
ExtraTreesClassifier,0.545205,0.538941,0.545205,0.539878
XGBClassifier,0.767123,0.769509,0.767123,0.768023


In [37]:
df_metrics_efd  = get_metrics_all_models(models, X_train_efd, Y_train_efd, X_test_efd, Y_test_efd)

df_metrics_efd.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.509589,0.502797,0.509589,0.490276
RandomForestClassifier,0.515068,0.531022,0.515068,0.520426
KNeighborsClassifier,0.69863,0.707069,0.69863,0.696845
DecisionTreeClassifier,0.454795,0.492417,0.454795,0.443449
GaussianNB,0.567123,0.544293,0.567123,0.550859
LogisticRegression,0.512329,0.48824,0.512329,0.489266
GradientBoostingClassifier,0.723288,0.723513,0.723288,0.723188
AdaBoostClassifier,0.556164,0.598774,0.556164,0.569093
ExtraTreesClassifier,0.49589,0.498294,0.49589,0.496624
XGBClassifier,0.728767,0.729509,0.728767,0.728298
