## Patches Experiment with Dex-Net

In [226]:
#global imports
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import zero_one_loss, log_loss
from sklearn import linear_model
from sklearn.grid_search import GridSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, AdaBoostClassifier, AdaBoostRegressor
from sklearn.qda import QDA
import time
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from util import PatchesDataLoader, PatchesSKLearner

In [3]:
#data_path = '/home/jacky/ws/patches/data/'
data_path = '/mnt/wd_ssd/projects/deep_patches/data/patches_06_11_16'

In [83]:
pdl = PatchesDataLoader(0.25, data_path, [i for i in range(1)], by_objs=False)

In [145]:
def get_cropped_X(pdl, dim):
    
    def crop(img, dim):
        big_dim = int(np.sqrt(img.shape[0]))
        img = img.reshape(big_dim, big_dim)
        mid = big_dim // 2
        delta = dim // 2

        small_img = img[mid - delta : mid + delta + 1, mid - delta : mid + delta + 1]

        return small_img.reshape(dim*dim)

    cropped_w1 = np.array([crop(pdl._raw_data['w1_proj'][i,:], 3) for i in range(len(pdl._raw_data['w1_proj']))])
    cropped_w2 = np.array([crop(pdl._raw_data['w2_proj'][i,:], 3) for i in range(len(pdl._raw_data['w2_proj']))])

    cropped_w1_tr, cropped_w1_t, _ = pdl.split_train_test(cropped_w1, 0.25, pdl.indices)
    cropped_w2_tr, cropped_w2_t, _ = pdl.split_train_test(cropped_w1, 0.25, pdl.indices)

    X = pdl.get_partial_train_data(('moment_arms', 'patch_ori'))

    X['tr'] = np.c_[X['tr'], cropped_w1_tr, cropped_w2_tr]
    X['t'] = np.c_[X['t'], cropped_w1_t, cropped_w2_t]
    
    return X

In [146]:
X = get_cropped_X(pdl, 3)

### Classification on FC

In [133]:
#QDA on fc
qda_fc = PatchesSKLearner.train(QDA, X['tr'], pdl.labels['fc']['tr'], X['t'], pdl.labels['fc']['t'], "QDA")

QDA train accuracy 0.562441752097, test accuracy 0.525874125874


In [134]:
#logistic regression on fc
logr_fc = PatchesSKLearner.train(linear_model.LogisticRegression, X['tr'], pdl.labels['fc']['tr'], X['t'], 
                       pdl.labels['fc']['t'], "Logistic Regression")

Logistic Regression train accuracy 0.688723205965, test accuracy 0.682517482517


In [235]:
#random forest
rf_fc = PatchesSKLearner.train(RandomForestClassifier, X['tr'], pdl.labels['fc']['tr'], X['t'], 
                               pdl.labels['fc']['t'], "Random Forest Clf")

Random Forest Clf train accuracy 0.991146318733, test accuracy 0.806993006993


In [228]:
#adaboost
ab_fc = PatchesSKLearner.train(AdaBoostClassifier, X['tr'], pdl.labels['fc']['tr'], X['t'], 
                               pdl.labels['fc']['t'], "Random Forest Clf")

Random Forest Clf train accuracy 0.843429636533, test accuracy 0.808391608392


### Grid Search CV on Classification

In [231]:
rf_fc_params_grid = [{
 'criterion': ['entropy', 'gini'],
 'max_depth': [None, 20, 70],
 'max_features': ['sqrt', 0.4, 0.7],
 'min_samples_leaf': [1, 3, 7],
 'min_samples_split': [2, 6, 10],
 'n_estimators': [10, 30, 60],
 'random_state': [0],
 'verbose': [0]
}]

In [232]:
rf_fc_cv = GridSearchCV(RandomForestClassifier(), rf_fc_params_grid, cv=3)
start = time.time()
rf_fc_cv.fit(X['tr'], pdl.labels['fc']['tr'])
end = time.time()
print 'took {0}s'.format(end - start)

took 1737.7815361s


In [233]:
print rf_fc_cv.best_params_
print rf_fc_cv.best_score_

{'verbose': 0, 'min_samples_leaf': 1, 'n_estimators': 60, 'min_samples_split': 6, 'random_state': 0, 'criterion': 'entropy', 'max_features': 'sqrt', 'max_depth': 20}
0.825722273998


### Analytical fc prediction using approx patch normals

In [8]:
X_w1_normals = pdl.get_partial_train_data(('w1_normal',))
X_w2_normals = pdl.get_partial_train_data(('w2_normal',))

In [9]:
def predict_single_approx_normals(w1, w2, mu = 0.5):
    alpha = np.arctan(mu)
    
    z1 = w1[2]
    z2 = w2[2]
    
    return np.arccos(-z1) < alpha and np.arccos(-z2) < alpha

def predict_approx_normals(W1, W2):
    n = W1.shape[0]
    results = []
    for i in range(n):
        results.append(predict_single_approx_normals(W1[i], W2[i]))
    return np.array(results)

In [10]:
approx_normal_tr_preds = predict_approx_normals(X_w1_normals['tr'], X_w2_normals['tr'])
approx_normal_t_preds = predict_approx_normals(X_w1_normals['t'], X_w2_normals['t'])

print "train accuracy", 1 - zero_one_loss(approx_normal_tr_preds, pdl.labels['fc']['tr'], normalize=True)
print "test accuracy", 1 - zero_one_loss(approx_normal_t_preds, pdl.labels['fc']['t'], normalize=True)

### Regressions on PFC

In [10]:
#reg on pfc
pfc_reg = PatchesSKLearner.train(RandomForestRegressor, pdl.tr, pdl.labels['pfc_10']['tr'], pdl.t, pdl.labels['pfc_10']['t'],
      'Random Forest Regressor PFC', print_mse = True)

Random Forest Regressor PFC train accuracy 0.96422615806, test accuracy 0.797982278609
Random Forest Regressor PFC train mse 0.00363922183066, test mse 0.0205788539116


In [None]:
#cross entropy loss for pfc
tr_pfc_preds = pfc_reg.predict(pdl.tr)
t_pfc_preds = pfc_reg.predict(pdl.t)
tr_pfc_preds_proba = np.c_[1 - tr_pfc_preds, tr_pfc_preds]
t_pfc_preds_proba = np.c_[1 - t_pfc_preds, t_pfc_preds]

In [17]:
ce_tr_pfc = log_loss(np.round(pdl.labels['pfc_10']['tr']), tr_pfc_preds_proba)
ce_t_pfc = log_loss(np.round(pdl.labels['pfc_10']['t']), t_pfc_preds_proba)
print "Random Forest Rergressor PFC train ce {0}, test ce {0}".format(ce_tr_pfc, ce_t_pfc)

Random Forest Rergressor PFC train ce 0.20100992797, test ce 0.20100992797


### Regressions on Ferrari Canny

In [14]:
#reg on ferrari canny
ferrari_reg = PatchesSKLearner.train(RandomForestRegressor, pdl.tr, pdl.labels['ferrari']['tr'], pdl.t, pdl.labels['ferrari']['t'],
      'Random Forest Regressor Ferrari Canny', print_mse= True)

Random Forest Regressor Ferrari Canny train accuracy 0.885257381622, test accuracy 0.322818561687
Random Forest Regressor Ferrari Canny train mse 1.06544717913e-08, test mse 5.34191610085e-08


### Regressions on Thresholded PFC

In [18]:
#thresholding pfc to classify on fc
pfc_rd_tr = np.round(pdl.labels['pfc_10']['tr']).astype('int')
pfc_rd_t = np.round(pdl.labels['pfc_10']['t']).astype('int')

In [20]:
PatchesSKLearner.train(RandomForestClassifier, pdl.tr, pfc_rd_tr, pdl.t,
      pfc_rd_t, "Random Forest Clf on Rounded PFC")

Random Forest Clf on Rounded PFC train accuracy 0.995240552784, test accuracy 0.865043867194


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)