In [1]:
import cPickle
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cross_validation import train_test_split



### Pre-process data

In [None]:
def unpickle(file):
    fo = open(file, 'rb')
    dict = cPickle.load(fo)
    fo.close()
    return dict

def reshape_img(img):
    img = img.reshape((3,32*32))
    v = np.zeros((32,32,3),dtype=np.uint8)
    for i in range(32):
        for j in range(32):
            v[i][j][0] = img[0][i*32+j]
            v[i][j][1] = img[1][i*32+j]
            v[i][j][2] = img[2][i*32+j]  
    return v


#### Train set

In [None]:
%%time
DATASET_DIR = "cifar-10-batches-py/"
DATASET_FILES = ["data_batch_1","data_batch_2","data_batch_3","data_batch_4","data_batch_5"]

imgs = []
labels = []

for file in DATASET_FILES:
    data = unpickle(DATASET_DIR + file)
    imgs.extend(data["data"])
    labels.extend(data["labels"])
    
dataset = pd.DataFrame({'img': imgs, 'label': labels})
dataset["img"] = dataset.img.map(reshape_img)
dataset.to_pickle("dataset/train.pkl")

#### Test set

In [None]:
%%time
DATASET_DIR = "cifar-10-batches-py/"
DATASET_FILES = ["test_batch"]

imgs = []
labels = []

for file in DATASET_FILES:
    data = unpickle(DATASET_DIR + file)
    imgs.extend(data["data"])
    labels.extend(data["labels"])
    
dataset = pd.DataFrame({'img': imgs, 'label': labels})
dataset["img"] = dataset.img.map(reshape_img)
dataset.to_pickle("dataset/test.pkl")

### Load Dataset

In [11]:
DATASET_DIR = "dataset/"
DATASET_FILE = "train.pkl"

dataset = pd.read_pickle(DATASET_DIR + DATASET_FILE)
# dataset = dataset.sample(20000,random_state=7)

#### Train-Test split

In [12]:
dataset_train, dataset_test = train_test_split(dataset,test_size=.2)

## Method 1: KNN

## Method 2: Linear SVM

In [4]:
from skimage.feature import hog
from skimage import color
from sklearn.svm import LinearSVC

#### 1). Using HOG descriptors

In [13]:
%%time
print "Computing descriptors for train set"
X_train = []
for img in dataset_train.img.values:
    X_train.append(hog(color.rgb2gray(img)))
X_train = np.array(X_train)

print "Computing descriptors for test set"
X_test = []
for img in dataset_test.img.values:
    X_test.append(hog(color.rgb2gray(img)))
X_test = np.array(X_test)

y_train = dataset_train.label.values
y_test = dataset_test.label.values

Computing descriptors for train set
Computing descriptors for test set
CPU times: user 28.5 s, sys: 592 ms, total: 29 s
Wall time: 29.1 s


#### 2). Using Kmeans clustered BOW

#### Init and Fit Model

In [20]:
%%time
model = LinearSVC(verbose=1)
model.fit(X_train,y_train)

[LibLinear]CPU times: user 17 s, sys: 0 ns, total: 17 s
Wall time: 17 s


#### Evaluate

In [21]:
model.score(X_test,y_test)

0.47310000000000002

## Method 3: CNN

In [4]:
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation,Flatten,Convolution2D,MaxPooling2D,ZeroPadding2D

Using TensorFlow backend.


In [17]:
X_train = []
for v in dataset_train.img.values:
    X_train.append(np.array(v))
X_train = np.array(X_train)

X_test = []
for v in dataset_test.img.values:
    X_test.append(np.array(v))
X_test = np.array(X_test)

y_train = to_categorical(dataset_train.label.values)
y_test = to_categorical(dataset_test.label.values)

#### Compile Model

In [18]:
# Create Keras model
model = Sequential()

model.add(Convolution2D(32, 3, 3, border_mode='valid', activation='relu',input_shape=(32,32,3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(64, 3, 3, border_mode='valid', activation='relu'))
model.add(ZeroPadding2D((1, 1)))

model.add(Convolution2D(128, 3, 3, border_mode='valid', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(ZeroPadding2D((1, 1)))

model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.summary()


#### Fit

In [19]:
model.fit(X_train, y_train, batch_size=32, nb_epoch=15,verbose=1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fead236fdd0>

#### Evaluate

In [20]:
score = model.evaluate(X_test, y_test, verbose=1)
score



[1.5539512586593629, 0.54225000000000001]