In [6]:
import sys
sys.path.append('../')

from feature_extraction.hog_descriptor import *

### Helper Functions

In [None]:
# helper functions for models-comparison
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, auc, confusion_matrix
import pickle


def get_metrics(y_true, y_pred, print_metrics=True):
    """
    Get accuracy, precision, recall, f1-score, auc, confusion matrix
    """
    # print(y_true)
    # print(y_pred)
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    conf_mat = confusion_matrix(y_true, y_pred)

    if print_metrics:
        print('Accuracy: {:.2f}'.format(acc*100))
        print('Precision: {:.2f}'.format(prec))
        print('Recall: {:.2f}'.format(rec))
        print('F1-score: {:.2f}'.format(f1))
        print('Confusion matrix:')
        print(pd.DataFrame(conf_mat))

    return acc, prec, rec, f1, conf_mat


# take the model and the train and test data and return the metrics
def get_metrics_model(model, X_train, y_train, X_test, y_test, print_metrics=True):
    """
    Get accuracy, precision, recall, f1-score, auc, confusion matrix
    """
    # get model name
    if print_metrics:
       print("training model: {}".format(model.__class__.__name__))
    model.fit(X_train, y_train)
    pickle.dump(model, open('../../models/model.pkl', 'wb'))

    if print_metrics:
        print("predicting model: {}".format(model.__class__.__name__))
    y_pred = model.predict(X_test)
    if print_metrics:
        print("evaluating model: {}".format(model.__class__.__name__))
    acc, prec, rec, f1, conf_mat = get_metrics(y_test, y_pred, print_metrics)

    if print_metrics:
        print('saving model: {}'.format(model.__class__.__name__))
    
    # # save the trained model
    # model_name = model.__class__.__name__
    # model.save('models/{}.h5'.format(model_name))

    return acc, prec, rec, f1, conf_mat


# compute the metrics for all the models and return a dataframe with the results
def get_metrics_all_models(models, X_train, y_train, X_test, y_test):
    """
    Get accuracy, precision, recall, f1-score, auc, confusion matrix for all models
    """
    metrics = []
    for model in models:
        print('Model: {}'.format(model.__class__.__name__))
        acc, prec, rec, f1, conf_mat = get_metrics_model(model, X_train, y_train, X_test, y_test, print_metrics=False)
        metrics.append([acc, prec, rec, f1, conf_mat])

    df_metrics = pd.DataFrame(metrics, columns=['Accuracy', 'Precision', 'Recall', 'F1-score', 'Confusion matrix'])
    df_metrics.index = [str(model.__class__.__name__) for model in models]

    return df_metrics


In [8]:
# load features
efd_features = np.load('../../features/efd_features.npy')
hog_features_builtin = np.load('../../features/hog_features_builtin.npy')
hog_features_custom = np.load('../../features/hog_features_custom.npy')
hog_efd_features_builtin = np.load('../../features/hog_efd_features_builtin.npy')
hog_efd_features_custom = np.load('../../features/hog_efd_features_custom.npy')
labels = np.load('../../features/labels.npy')

print(efd_features.shape, hog_features_builtin.shape, hog_features_custom.shape, hog_efd_features_builtin.shape, hog_efd_features_custom.shape, labels.shape)

(1821, 37) (1821, 3780) (1821, 3780) (1821, 3817) (1821, 3817) (1821,)


# - Models

## Models parametars' tuning

In [9]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import  GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier


models = [svm.SVC(C=0.1, gamma=0.1, kernel='poly'), RandomForestClassifier(max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=100),
          KNeighborsClassifier(n_neighbors=1), DecisionTreeClassifier(criterion='gini', max_depth=9, min_samples_leaf=1), GaussianNB(),
          LogisticRegression(C=0.01, max_iter=100, penalty='l2'), GradientBoostingClassifier(n_estimators=100, learning_rate=.1, max_depth=3, random_state=0), 
          AdaBoostClassifier(learning_rate=0.01, n_estimators=1000), ExtraTreesClassifier(criterion='entropy', max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200), 
          XGBClassifier(n_estimators=1000, learning_rate=0.008, max_depth=3, random_state=0)] 
          

In [10]:
from sklearn.model_selection import train_test_split

# divide the data into training (70%) and validation (15%) and testing (15%) sets  #### to be adjusted ####
X_train_val_hog_custom, X_test_hog_custom, Y_train_val_hog_custom, Y_test_hog_custom = train_test_split(hog_features_custom, labels, test_size=0.15, random_state=42)
X_train_val_hog_builtin, X_test_hog_builtin, Y_train_val_hog_builtin, Y_test_hog_builtin = train_test_split(hog_features_builtin, labels, test_size=0.15, random_state=42)
X_train_val_efd, X_test_efd, Y_train_val_efd, Y_test_efd = train_test_split(efd_features, labels, test_size=0.15, random_state=42)
X_train_val_hog_efd_custom, X_test_hog_efd_custom, Y_train_val_hog_efd_custom, Y_test_hog_efd_custom = train_test_split(hog_efd_features_custom, labels, test_size=0.15, random_state=42)
X_train_val_hog_efd_builtin, X_test_hog_efd_builtin, Y_train_val_hog_efd_builtin, Y_test_hog_efd_builtin = train_test_split(hog_efd_features_builtin, labels, test_size=0.15, random_state=42)

X_train_hog_custom, X_val_hog_custom, Y_train_hog_custom, Y_val_hog_custom = train_test_split(X_train_val_hog_custom, Y_train_val_hog_custom, test_size=0.1765, random_state=42)
X_train_hog_builtin, X_val_hog_builtin, Y_train_hog_builtin, Y_val_hog_builtin = train_test_split(X_train_val_hog_builtin, Y_train_val_hog_builtin, test_size=0.1765, random_state=42)
X_train_efd, X_val_efd, Y_train_efd, Y_val_efd = train_test_split(X_train_val_efd, Y_train_val_efd, test_size=0.1765, random_state=42)
X_train_hog_efd_custom, X_val_hog_efd_custom, Y_train_hog_efd_custom, Y_val_hog_efd_custom = train_test_split(X_train_val_hog_efd_custom, Y_train_val_hog_efd_custom, test_size=0.1765, random_state=42)
X_train_hog_efd_builtin, X_val_hog_efd_builtin, Y_train_hog_efd_builtin, Y_val_hog_efd_builtin = train_test_split(X_train_val_hog_efd_builtin, Y_train_val_hog_efd_builtin, test_size=0.1765, random_state=42)

print(len(X_train_hog_custom), len(X_val_hog_custom), len(X_test_hog_custom), len(Y_train_hog_custom), len(Y_val_hog_custom), len(Y_test_hog_custom))
print(len(X_train_hog_builtin), len(X_val_hog_builtin), len(X_test_hog_builtin), len(Y_train_hog_builtin), len(Y_val_hog_builtin), len(Y_test_hog_builtin))
print(len(X_train_efd), len(X_val_efd), len(X_test_efd), len(Y_train_efd), len(Y_val_efd), len(Y_test_efd))
print(len(X_train_hog_efd_custom), len(X_val_hog_efd_custom), len(X_test_hog_efd_custom), len(Y_train_hog_efd_custom), len(Y_val_hog_efd_custom), len(Y_test_hog_efd_custom))
print(len(X_train_hog_efd_builtin), len(X_val_hog_efd_builtin), len(X_test_hog_efd_builtin), len(Y_train_hog_efd_builtin), len(Y_val_hog_efd_builtin), len(Y_test_hog_efd_builtin))

1273 274 274 1273 274 274
1273 274 274 1273 274 274
1273 274 274 1273 274 274
1273 274 274 1273 274 274
1273 274 274 1273 274 274


### SVM Model

#### Tuning

In [58]:
from sklearn.model_selection import GridSearchCV

# Create an SVM model
svm_model = models[0]

# Define the parameter grid
param_grid = {'C': [0.1, 1, 10], 'gamma': [0.1, 1, 10], 'kernel': ['rbf', 'poly', 'linear']}

# Perform grid search with 5-fold cross-validation
grid_search = GridSearchCV(svm_model, param_grid, cv=5)
print("cross validation finished ...")
# Fit the grid search to the training data
grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...
fitting finished ...
Best parameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}
Best score: 0.76279141577891


#### Testing

In [59]:
# get the best model
best_svm = grid_search.best_estimator_
best_svm.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_svm.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_svm.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 1.0
Test accuracy: 0.791970802919708


### Random Forest

#### Tuning

In [18]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [1, 50, 100],
    'max_depth': [5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
rf = models[1]

grid_search = GridSearchCV(rf, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...
fitting finished ...
Best parameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best score: 0.7384529874942103


#### Testing

In [19]:
# get the best model
best_rf = grid_search.best_estimator_
best_rf.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_rf.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_rf.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 1.0
Test accuracy: 0.7372262773722628


### KNN

#### Tuning

In [29]:
param_grid = {'n_neighbors': [1, 3, 5, 7, 9, 11, 13, 14, 15]}

knn = models[2]

grid_search = GridSearchCV(knn, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...
fitting finished ...
Best parameters: {'n_neighbors': 1}
Best score: 0.6716720704029644


#### Testing

In [30]:
# get the best model
best_knn = grid_search.best_estimator_
best_knn.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_knn.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_knn.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 1.0
Test accuracy: 0.718978102189781


### Decision Tree

#### Tuning

In [31]:
param_grid = {
    'max_depth': [3, 5, 7, 9],
    'min_samples_leaf': [1, 5, 10],
    'criterion': ['gini', 'entropy']
}

dt = models[3]

grid_search = GridSearchCV(dt, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...
fitting finished ...
Best parameters: {'criterion': 'gini', 'max_depth': 9, 'min_samples_leaf': 1}
Best score: 0.49491122433225254


#### Testing

In [32]:
# get the best model
best_dt = grid_search.best_estimator_
best_dt.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_dt.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_dt.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 0.6605839416058394
Test accuracy: 0.5182481751824818


### Naive Bayes 

#### Testing

In [37]:
# get the best model
nb = models[4]
nb.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = nb.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = nb.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 0.8613138686131386
Test accuracy: 0.5985401459854015


### Logistic Regression

#### Tuning

In [50]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

grid_params = param_grid = {'penalty': ['none', 'l1', 'l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'max_iter': [100, 1000, 2500]}
lr = models[5]

grid_search = GridSearchCV(lr, param_grid, cv=5)
print("cross validation finished ...")

scaler = StandardScaler()
X_train_hog_custom_norm = scaler.fit_transform(X_train_hog_custom)
X_train_val_hog_custom_norm = scaler.transform(X_train_val_hog_custom)
X_val_hog_custom_norm = scaler.transform(X_val_hog_custom)
X_test_hog_custom_norm = scaler.transform(X_test_hog_custom)

grid_search.fit(X_train_hog_custom_norm, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

cross validation finished ...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

fitting finished ...
Best parameters: {'C': 0.01, 'max_iter': 100, 'penalty': 'l2'}
Best score: 0.7282322062683342


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


#### Testing

In [51]:
# get the best model
best_lr = grid_search.best_estimator_
best_lr.fit(X_train_val_hog_custom_norm, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_lr.predict(X_val_hog_custom_norm)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_lr.predict(X_test_hog_custom_norm)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

Validation accuracy: 1.0
Test accuracy: 0.7153284671532847


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Gradient Boosting

#### Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {'n_estimators': [100, 200, 300], 'learning_rate': [0.1, 0.05, 0.01], 'max_depth': [3, 5, 7]}
gb = models[6]

grid_search = GridSearchCV(gb, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

#### Testing

In [None]:
# get the best model
best_gb = grid_search.best_estimator_
best_gb.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_gb.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_gb.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

### AdaBoost

#### Tuning

In [None]:
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 1.0]
}

ada = models[7]

grid_search = GridSearchCV(ada, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

training model: AdaBoostClassifier
predicting model: AdaBoostClassifier
evaluating model: AdaBoostClassifier
Accuracy: 0.00
Precision: 0.00
Recall: 0.00
F1-score: 0.00
Confusion matrix:
   0  1  2  3
0  0  1  0  0
1  0  0  0  0
2  0  2  0  0
3  0  2  0  0
saving model: AdaBoostClassifier


#### Testing

In [None]:
# get the best model
best_ada = grid_search.best_estimator_
best_ada.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_ada.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_ada.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

### Extra Trees

#### Tuning

In [122]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'criterion': ['gini', 'entropy']
}

et = models[8]

grid_search = GridSearchCV(et, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

training model: ExtraTreesClassifier
predicting model: ExtraTreesClassifier
evaluating model: ExtraTreesClassifier
Accuracy: 0.00
Precision: 0.00
Recall: 0.00
F1-score: 0.00
Confusion matrix:
   0  1  2  3  4  5
0  0  0  0  1  0  0
1  0  0  0  0  0  0
2  0  2  0  0  0  0
3  0  0  0  0  0  0
4  0  0  0  0  0  2
5  0  0  0  0  0  0
saving model: ExtraTreesClassifier


#### Testing

In [None]:
# get the best model
best_et = grid_search.best_estimator_
best_et.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_et.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_et.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

### XGBoost

#### Tuning

In [None]:
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.1, 0.01, 0.001],
    'n_estimators': [100, 200, 300],
    'subsample': [0.8, 1.0],
}
xgb = models[9]

grid_search = GridSearchCV(xgb, param_grid, cv=5)
print("cross validation finished ...")

grid_search.fit(X_train_hog_custom, Y_train_hog_custom)
print("fitting finished ...")

# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

training model: XGBClassifier
predicting model: XGBClassifier
evaluating model: XGBClassifier
Accuracy: 20.00
Precision: 0.40
Recall: 0.20
F1-score: 0.27
Confusion matrix:
   0  1  2  3  4
0  0  0  0  0  1
1  0  0  0  0  0
2  0  1  1  0  0
3  0  0  0  0  2
4  0  0  0  0  0
saving model: XGBClassifier


#### Tuning

In [None]:
# get the best model
best_xgb = grid_search.best_estimator_
best_xgb.fit(X_train_val_hog_custom, Y_train_val_hog_custom)

# get the accuracy on the validation set
y_pred_val = best_xgb.predict(X_val_hog_custom)
val_acc = accuracy_score(Y_val_hog_custom, y_pred_val)

# get the accuracy on the test set
y_pred_test = best_xgb.predict(X_test_hog_custom)
test_acc = accuracy_score(Y_test_hog_custom, y_pred_test)


print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)

## Models Comparison

In [66]:
df_metrics_hog_custom  = get_metrics_all_models(models, X_train_val_hog_custom, Y_train_val_hog_custom, X_test_hog_custom, Y_test_hog_custom)

df_metrics_hog_custom.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.755474,0.750939,0.755474,0.752425
RandomForestClassifier,0.737226,0.738754,0.737226,0.735802
KNeighborsClassifier,0.682482,0.679584,0.682482,0.679781
DecisionTreeClassifier,0.434307,0.537486,0.434307,0.443593
GaussianNB,0.554745,0.568706,0.554745,0.558418
LogisticRegression,0.686131,0.682492,0.686131,0.681399
GradientBoostingClassifier,0.689781,0.690204,0.689781,0.689499
AdaBoostClassifier,0.5,0.58266,0.5,0.508989
ExtraTreesClassifier,0.744526,0.739189,0.744526,0.7401
XGBClassifier,0.682482,0.686724,0.682482,0.683499


In [13]:
df_metrics_hog_efd_custom  = get_metrics_all_models(models, X_train_val_hog_efd_custom, Y_train_val_hog_efd_custom, X_test_hog_efd_custom, Y_test_hog_efd_custom)

df_metrics_hog_efd_custom.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier


In [69]:
df_metrics_hog_builtin  = get_metrics_all_models(models, X_train_hog_builtin, Y_train_hog_builtin, X_test_hog_builtin, Y_test_hog_builtin)

df_metrics_hog_builtin.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


KeyboardInterrupt: 

In [12]:
df_metrics_hog_efd_builtin  = get_metrics_all_models(models, X_train_val_hog_efd_builtin, Y_train_val_hog_efd_builtin, X_test_hog_efd_builtin, Y_test_hog_efd_builtin)

df_metrics_hog_efd_builtin.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.813869,0.81026,0.813869,0.810785
RandomForestClassifier,0.755474,0.747835,0.755474,0.74961
KNeighborsClassifier,0.722628,0.717597,0.722628,0.717603
DecisionTreeClassifier,0.562044,0.560762,0.562044,0.558824
GaussianNB,0.678832,0.683719,0.678832,0.677485
LogisticRegression,0.711679,0.703869,0.711679,0.705673
GradientBoostingClassifier,0.740876,0.741928,0.740876,0.740831
AdaBoostClassifier,0.620438,0.660968,0.620438,0.634075
ExtraTreesClassifier,0.770073,0.764379,0.770073,0.76637
XGBClassifier,0.766423,0.76893,0.766423,0.76749


In [None]:
model = None
y_pred = model.predict(X_test_hog_custom)
metrics = get_metrics(y_pred, Y_test_hog_efd_builtin)
print(metrics)
pickle.dump(model, open('../../models/model.pkl', 'wb'))
# metrics = get_metrics_model(model, X_train_val_hog_custom, Y_train_val_hog_custom, X_test_hog_custom, Y_test_hog_custom)


In [68]:
df_metrics_efd  = get_metrics_all_models(models, X_train_efd, Y_train_efd, X_test_efd, Y_test_efd)

df_metrics_efd.drop('Confusion matrix', axis=1)

Model: SVC
Model: RandomForestClassifier
Model: KNeighborsClassifier
Model: DecisionTreeClassifier
Model: GaussianNB
Model: LogisticRegression
Model: GradientBoostingClassifier
Model: AdaBoostClassifier
Model: ExtraTreesClassifier
Model: XGBClassifier


Unnamed: 0,Accuracy,Precision,Recall,F1-score
SVC,0.164234,0.026973,0.164234,0.046335
RandomForestClassifier,0.711679,0.715493,0.711679,0.710846
KNeighborsClassifier,0.755474,0.758664,0.755474,0.754001
DecisionTreeClassifier,0.580292,0.594177,0.580292,0.583276
GaussianNB,0.543796,0.53429,0.543796,0.536937
LogisticRegression,0.215328,0.074666,0.215328,0.104593
GradientBoostingClassifier,0.715328,0.722533,0.715328,0.717647
AdaBoostClassifier,0.525547,0.544609,0.525547,0.529226
ExtraTreesClassifier,0.737226,0.733197,0.737226,0.732943
XGBClassifier,0.686131,0.695276,0.686131,0.687114
