In [1]:
import matplotlib.pyplot as plt
import numpy as np
from skimage.feature import local_binary_pattern
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

In [2]:
SUBSET = '20k'
DATA_DIR = 'data/'

In [3]:
def load_npz(file):
    npdict = np.load(file)
    X = npdict['X']
    y = npdict['y']
    npdict.close()
    print(X.shape, X.dtype)
    return X, y

In [4]:
DATA_FILENAME = '2x2lbp_subset' + SUBSET
Xl_train, y_train = load_npz(DATA_DIR + DATA_FILENAME + '_train' + '.npz')
Xl_test, y_test = load_npz(DATA_DIR + DATA_FILENAME + '_test' + '.npz')

(20000, 40) int16
(10000, 40) int16


In [5]:
DATA_FILENAME = '2gabor_subset' + SUBSET
Xg_train, yg_train = load_npz(DATA_DIR + DATA_FILENAME + '_train' + '.npz')
Xg_test, yg_test = load_npz(DATA_DIR + DATA_FILENAME + '_test' + '.npz')
assert np.all(y_train == yg_train)
assert np.all(y_test == yg_test)

(20000, 64) float64
(10000, 64) float64


In [6]:
X_train = np.hstack((Xl_train, Xg_train))
X_test = np.hstack((Xl_test, Xg_test))
X_train.shape, X_test.shape

((20000, 104), (10000, 104))

In [10]:
n_ests = [100, 200, 400, 600, 800, 1000]
for n_est in n_ests[2:]:
    print(n_est, ': ')
    clf = GradientBoostingClassifier(n_estimators=n_est)
    clf.fit(X_train, y_train)
    print(clf.score(X_train, y_train), end=', ')
    print(clf.score(X_test, y_test))

400
0.88085
0.8404
600
0.8995
0.8409
800
0.9179
0.8442
1000
0.9305
0.8443
