In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

import multiprocessing
from multiprocessing.pool import ThreadPool

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report,accuracy_score
from skimage.feature import hog
from skimage import color

import cv2

import os


In [None]:
dataPath = 'all'
localPath = ''

images = np.load(dataPath + '/train_images.npy', encoding="bytes")
labels = pd.read_csv(dataPath + '/train_labels.csv')

In [None]:
allData = pd.DataFrame(np.array(list(images[:,1]))).assign(label=labels['Category'])
num_classes = len(labels['Category'].unique())
valueCounts = labels['Category'].value_counts()

xTrain, xValid = train_test_split(allData, stratify=labels['Category'], test_size=0.15)
trainInds = xTrain.index
validInds = xValid.index
xTrainRaw = xTrain.drop('label', axis=1).values.reshape((xTrain.shape[0], 100, 100, 1))
xValidRaw = xValid.drop('label', axis=1).values.reshape((xValid.shape[0], 100, 100, 1))

yTrain = labels.iloc[trainInds, 1].values
yValid = labels.iloc[validInds, 1].values

In [None]:
def preProcessImage(image, cutoff=127, areaCutoff=14, maxContours=4, fliplr=False):
    image = np.uint8(image)
    im = np.uint8(image)
    red, thresh = cv2.threshold(im, cutoff, 255, 0)
    im2, contours, hierarchy= cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    mask = np.zeros(im.shape, np.uint8)
    largest_contours = sorted(contours, key=cv2.contourArea, reverse=True)
    
    for ind, contour in enumerate(largest_contours[:maxContours]):
        if cv2.contourArea(contour) > areaCutoff:
            x, y, w, h = cv2.boundingRect(contour)
            mask[y:y+h, x:x+w] = 255
        
    filteredImage = cv2.bitwise_and(image, image, mask=mask)
    if fliplr:
        return np.fliplr(filteredImage).reshape((image.shape))
    return filteredImage.reshape((image.shape))

In [None]:
pool = ThreadPool(multiprocessing.cpu_count())
xTrainUnflipped = pool.map(lambda im: preProcessImage(im).flatten(), [xTrainRaw[i] for i in range(xTrainRaw.shape[0])])
xTrainUnflipped = np.array(xTrainUnflipped)
xTrainFlipped = pool.map(lambda im: preProcessImage(im, fliplr=True).flatten(), [xTrainRaw[i] for i in range(xTrainRaw.shape[0])])
xTrainFlipped = np.array(xTrainFlipped)

xValidUnflipped = pool.map(lambda im: preProcessImage(im).flatten(), [xValidRaw[i] for i in range(xValidRaw.shape[0])])
xValidUnflipped = np.array(xValidUnflipped)
xValidFlipped = pool.map(lambda im: preProcessImage(im, fliplr=True).flatten(), [xValidRaw[i] for i in range(xValidRaw.shape[0])])
xValidFlipped = np.array(xValidFlipped)


xTrain = np.concatenate((xTrainUnflipped, xTrainFlipped), axis=0)
xValid = np.concatenate((xValidUnflipped, xValidFlipped), axis=0)

yTrain = np.concatenate((yTrain, yTrain), axis=0)
yValid = np.concatenate((yValid, yValid), axis=0)

pool.close()
pool.join()

In [None]:
lb = LabelEncoder()
lb.fit(labels['Category'].unique())
yTrainFinal = lb.transform(yTrain)
yValidFinal = lb.transform(yValid)

**Simple Classifier**

In [None]:
clf = SVC(gamma=.001)
clf.fit(xTrain, yTrainFinal)

y_pred = clf.predict(xValid)

print("Accuracy: "+str(accuracy_score(yValidFinal, y_pred)))
print('\n')
print(classification_report(yValidFinal, y_pred))

**Adaptive Boosting**

In [None]:
bdt = AdaBoostClassifier(SVC(gamma=.001, decision_function_shape='ovo'),
                         algorithm="SAMME",
                         n_estimators=150)

In [None]:
bdt.fit(xTrain, yTrainFinal)

In [None]:
y_pred = bdt.predict(xValid)
print("Accuracy: "+str(accuracy_score(yValidFinal, y_pred)))
print('\n')
print(classification_report(yValidFinal, y_pred))