In [1]:
import math
import numpy as np
import pickle
%matplotlib inline
import matplotlib.pyplot as plt
import cv2
from scipy.misc import imresize
from skimage import feature

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.feature_selection import SelectKBest

In [2]:
# load data
X_train = pickle.load(open("./features/features-train_2.pkl", 'rb'))
print('training set: (%i, %i)' % (X_train.shape[0], X_train.shape[1]))
X_test = pickle.load(open("./features/features-test_2.pkl", 'rb'))
print('test set: (%i, %i)' % (X_test.shape[0], X_test.shape[1]))

Y = pickle.load(open("./features/labels-train.pkl", 'rb'))

print('done')

training set: (10000, 4272)
test set: (8190, 4272)
done


In [6]:
parameters = {
        "max_depth": [25],
        "max_features": [200],
        "min_samples_split": [4],
        "min_samples_leaf": [2],
        'n_estimators': [100],
    }

# Grid search with RF classifier.
clf = GridSearchCV(
        estimator=RandomForestClassifier(random_state=1, n_jobs=-1),
        param_grid=parameters,
        cv=5,
        n_jobs=-1,
    )

clf.fit(X_train[:10000, :], Y[:10000])

print("Best parameters set found on training set:")
print(clf.best_params_)
print()

means_valid = clf.cv_results_['mean_test_score']
stds_valid = clf.cv_results_['std_test_score']
means_train = clf.cv_results_['mean_train_score']

print("Grid scores:")
for mean_valid, std_valid, mean_train, params in zip(means_valid, stds_valid, means_train, clf.cv_results_['params']):
    print("Validation: %0.3f (+/-%0.03f), Training: %0.3f  for %r" % (mean_valid, std_valid, mean_train, params))
print()

Best parameters set found on training set:
{'max_depth': 25, 'max_features': 200, 'min_samples_leaf': 2, 'min_samples_split': 4, 'n_estimators': 100}

Grid scores:
Validation: 0.848 (+/-0.013), Training: 0.994  for {'max_depth': 25, 'max_features': 200, 'min_samples_leaf': 2, 'min_samples_split': 4, 'n_estimators': 100}



In [7]:
out = np.zeros((8190, 2))
out[:, 1] = clf.predict(X_test)
out[:, 0] = range(1,8191)
np.savetxt('out.csv', out, delimiter=',', fmt='%i')