In [16]:
import h5py as h5
import numpy as np

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier

In [17]:
# Grab extracted features and labels (TRAIN SET).
with h5.File('features_train.h5', 'r') as file:
    X_train, y_train = file['images'][:], file['labels'][:]

In [25]:
# Params used to find the best estimator.
params = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000],
}

model1_ = GridSearchCV(
    RandomForestClassifier(),
    param_grid=params,
    cv=5,
    n_jobs=-1, # -1: all cores
    verbose=10,
)

model = SGDClassifier(
    verbose=2,
)

In [26]:
model.fit(X_train, y_train)

-- Epoch 1
Norm: 115746.01, NNZs: 24828, Bias: 41.873202, T: 498, Avg. loss: 870468.282545
Total training time: 0.03 seconds.
-- Epoch 2
Norm: 95799.54, NNZs: 24937, Bias: 52.012983, T: 996, Avg. loss: 133229.346834
Total training time: 0.05 seconds.
-- Epoch 3
Norm: 80211.79, NNZs: 24951, Bias: 46.662279, T: 1494, Avg. loss: 25445.512166
Total training time: 0.07 seconds.
-- Epoch 4
Norm: 67512.89, NNZs: 24955, Bias: 50.161462, T: 1992, Avg. loss: 7308.196298
Total training time: 0.09 seconds.
-- Epoch 5
Norm: 58710.21, NNZs: 24960, Bias: 50.033827, T: 2490, Avg. loss: 5197.541822
Total training time: 0.11 seconds.
-- Epoch 6
Norm: 51376.95, NNZs: 24960, Bias: 50.033827, T: 2988, Avg. loss: 0.000000
Total training time: 0.13 seconds.
-- Epoch 7
Norm: 45672.22, NNZs: 24960, Bias: 50.033827, T: 3486, Avg. loss: 0.000000
Total training time: 0.14 seconds.
-- Epoch 8
Norm: 41107.75, NNZs: 24960, Bias: 50.033827, T: 3984, Avg. loss: 0.000000
Total training time: 0.16 seconds.
-- Epoch 9
No

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge',
              max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=2, warm_start=False)

In [14]:
print('Best estimator is', model.best_estimator_)
print('Best score is', model.best_score_)

AttributeError: 'SGDClassifier' object has no attribute 'best_estimator_'

In [27]:
best_model = model

best_model.fit(X_train, y_train)

-- Epoch 1
Norm: 122252.75, NNZs: 24901, Bias: 62.927456, T: 498, Avg. loss: 829845.861234
Total training time: 0.03 seconds.
-- Epoch 2
Norm: 101773.04, NNZs: 24982, Bias: 68.315072, T: 996, Avg. loss: 220064.439163
Total training time: 0.05 seconds.
-- Epoch 3
Norm: 84870.87, NNZs: 25000, Bias: 68.361218, T: 1494, Avg. loss: 42347.981681
Total training time: 0.07 seconds.
-- Epoch 4
Norm: 70739.91, NNZs: 25000, Bias: 68.361218, T: 1992, Avg. loss: 0.000000
Total training time: 0.09 seconds.
-- Epoch 5
Norm: 60642.90, NNZs: 25000, Bias: 68.361218, T: 2490, Avg. loss: 0.000000
Total training time: 0.12 seconds.
-- Epoch 6
Norm: 53068.24, NNZs: 25000, Bias: 68.361218, T: 2988, Avg. loss: 0.000000
Total training time: 0.14 seconds.
-- Epoch 7
Norm: 47175.72, NNZs: 25000, Bias: 68.361218, T: 3486, Avg. loss: 0.000000
Total training time: 0.15 seconds.
-- Epoch 8
Norm: 42460.98, NNZs: 25000, Bias: 68.361218, T: 3984, Avg. loss: 0.000000
Total training time: 0.17 seconds.
-- Epoch 9
Norm: 3

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge',
              max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=2, warm_start=False)

In [28]:
# Grab extracted features and labels (TEST SET)
with h5.File('features_test.h5', 'r') as file:
    X_test, y_test = file['images'][:], file['labels'][:]

In [29]:
y_hat = best_model.predict(X_test)

In [30]:
confusion_matrix = classification_report(y_test, y_hat)
print(confusion_matrix)

              precision    recall  f1-score   support

           0       0.84      0.88      0.86       250
           1       0.87      0.84      0.85       250

    accuracy                           0.86       500
   macro avg       0.86      0.86      0.86       500
weighted avg       0.86      0.86      0.86       500

