In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

sys.path.insert(0, '../src')
import classifier
import detector
from image import Image

In [2]:
from image import build_histogram_equalizer
TRAIN_DATA_DIR = os.path.abspath("../trainset")
COLORS = ['COLOR_STOP_SIGN_RED', 'COLOR_OTHER_RED',
          'COLOR_BROWN' , 'COLOR_ORANGE' ,
          'COLOR_BLUE' , 'COLOR_OTHER' ]

data = {c: [] for c in COLORS}
files = os.listdir(TRAIN_DATA_DIR)
for fname in files:
    name, ext = os.path.splitext(fname)
    if ext == ".npz":
        if name + '.jpg' in files:
            img = Image.load(os.path.join(TRAIN_DATA_DIR, name) + '.jpg')
        elif name + '.png' in files:
            img = Image.load(os.path.join(TRAIN_DATA_DIR, name) + '.png')
        
        npzfname = os.path.join(TRAIN_DATA_DIR, fname)
        npzdata = np.load(npzfname)
        for c in COLORS:
            if npzdata[c].size > 0:
                mat = npzdata[c]
                mat = mat.reshape(-1, 3).astype(np.uint8)
                data[c].append(mat)
                
for c in COLORS:
    data[c] = np.vstack(data[c])

print('---- done ------')

---- done ------


In [3]:
N_DATA_PER_CLASS = 200000
APPEND_YCRCB = False
APPEND_BIAS = False
ONLY_YCRCB = False

labelmp = {
    'COLOR_STOP_SIGN_RED': 0,
    'COLOR_OTHER_RED': 1,
    'COLOR_ORANGE': 2,
    'COLOR_BROWN': 3,
    'COLOR_BLUE': 4,
    'COLOR_OTHER': 5
}
X, y = [], []
for ci, c in enumerate(COLORS):
    print(c, data[c].shape)
    
    rndidx = np.random.choice(data[c].shape[0], N_DATA_PER_CLASS, replace=False)
    x = data[c][rndidx, :]
    
    if ONLY_YCRCB:
        xycc = cv2.cvtColor(x.reshape(-1, 1, 3).astype(np.uint8), cv2.COLOR_RGB2YCrCb)
        xycc = xycc.reshape(-1, 3)
        x = xycc
    elif APPEND_YCRCB:
        xycc = cv2.cvtColor(x.reshape(-1, 1, 3).astype(np.uint8), cv2.COLOR_RGB2YCrCb)
        xycc = xycc.reshape(-1, 3)
        x = np.hstack([x, xycc])
        
    if APPEND_BIAS:
        x = np.hstack([x, np.ones((N_DATA_PER_CLASS, 1))])
        
    X.append(x)
    y.append(np.ones((N_DATA_PER_CLASS, 1)) * labelmp[c])
    
X = np.vstack(X).astype(np.float64)
y = np.vstack(y).astype(np.int32).reshape(-1)
print('-----------done------------')

COLOR_STOP_SIGN_RED (1952535, 3)
COLOR_OTHER_RED (2234268, 3)
COLOR_BROWN (11148031, 3)
COLOR_ORANGE (267664, 3)
COLOR_BLUE (46662704, 3)
COLOR_OTHER (186000767, 3)
-----------done------------


In [26]:
def ssred_accuracy(clf, X, y):
    pred = clf.predict(X)
    pred = pred == 0
    y = y == 0
    return np.sum(pred == y) / y.shape[0]

def ssred_precision(clf, X, y):
    pred = clf.predict(X)
    pred = pred == 0
    y = y == 0
    return np.sum(pred[pred == y]) / np.sum(pred)

def ssred_recall(clf, X, y):
    pred = clf.predict(X)
    pred = pred == 0
    y = y == 0
    return np.sum(pred[pred == y]) / np.sum(y)

scoring = {
    'accuracy': ssred_accuracy,
    'precision': ssred_precision,
    'recall': ssred_recall
}

def print_scores(scores):
    for key, val in scores.items():
        print(f'\t{key}: %0.2f (+/- %0.2f)' % (val.mean(), val.std() * 2))
        

In [7]:
# %reload_ext autoreload
# from sklearn.model_selection import cross_validate
# from sklearn.utils import shuffle
# from classifier import LogisticRegression

# X, y = shuffle(X, y)
# XX = np.hstack([X, np.ones((X.shape[0], 1))])
# clf = LogisticRegression(max_iter=200, learning_rate=0.01, batchsize=3000)

# lr_score = cross_validate(clf, XX, y, cv=5, n_jobs=-1, scoring=scoring, error_score='raise')
# print('Logistic Regression')
# print_scores(lr_score)

In [20]:
%reload_ext autoreload
from sklearn.model_selection import cross_validate
from sklearn.utils import shuffle
from classifier import OneVsAllLogisticRegression

X, y = shuffle(X, y)
XX = np.hstack([X, np.ones((X.shape[0], 1))])

clf = OneVsAllLogisticRegression(max_iter=500, learning_rate=0.005, batchsize=3000)

ovalr_score = cross_validate(clf, XX, y, cv=5, n_jobs=-1, scoring=scoring, error_score='raise')
print('1vall Logistic Regression')
print_scores(ovalr_score)

1vall Logistic Regression
	fit_time: 795.71 (+/- 11.11)
	score_time: 0.09 (+/- 0.01)
	test_accuracy: 0.95 (+/- 0.07)
	test_precision: 0.83 (+/- 0.24)
	test_recall: 0.94 (+/- 0.08)


In [17]:
%reload_ext autoreload
from sklearn.model_selection import cross_validate
from sklearn.utils import shuffle
from classifier import KaryLogisticRegression

X, y = shuffle(X, y)
XX = np.hstack([X, np.ones((X.shape[0], 1))])
clf = KaryLogisticRegression(max_iter=500, learning_rate=0.005, batchsize=3000)

klr_score = cross_validate(clf, XX, y, cv=5, n_jobs=-1, scoring=scoring, error_score='raise')
print('Kary Logistic Regression')
print_scores(klr_score)

Kary Logistic Regression
	fit_time: 192.42 (+/- 2.61)
	score_time: 0.02 (+/- 0.00)
	test_accuracy: 0.97 (+/- 0.02)
	test_precision: 0.95 (+/- 0.04)
	test_recall: 0.86 (+/- 0.16)


In [18]:
%reload_ext autoreload
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.utils import shuffle
from classifier import GaussianNaiveBayes

X, y = shuffle(X, y)
XX = X
clf = classifier.GaussianNaiveBayes()

gnb_score = cross_validate(clf, XX, y, cv=5, n_jobs=-1, scoring=scoring, error_score='raise')
print('Gaussian Naive Bayes')
print_scores(gnb_score)

Gaussian Naive Bayes
	fit_time: 0.22 (+/- 0.01)
	score_time: 0.27 (+/- 0.00)
	test_accuracy: 0.98 (+/- 0.00)
	test_precision: 0.94 (+/- 0.00)
	test_recall: 0.93 (+/- 0.00)


In [11]:
# X, y = shuffle(X, y, random_state=1)
# clf = classifier.GaussianNaiveBayes()
# clf.fit(X, y)
# clf.save('../model/gnb_300000_histeq.pic')