In [9]:
# read dataset one image
from keras.preprocessing import image

imageSize = 128
is_debug = False

def readImage(path):
    img = image.load_img(path, target_size=(imageSize, imageSize))
    x = image.img_to_array(img)
    x = x.astype('float32')
    x /= 255
    return x

In [10]:
def getLabel(path):
    name = path[path.rfind('/') + 1:path.index('.')]
#     print (name)
    return 1 if name == 'dog' else 0
print ("getLabel done")

getLabel done


In [11]:
import helper

def readFeatureAndLabel():
    train_files = helper.get_train_files()
    file_count = 1024 if is_debug else len(train_files)
    print("readFeatureAndLabel file_count=" + str(file_count))
    train_feature = []
    train_label = []
    for i in range(file_count):
        f = train_files[i]
#         print (f)
        train_feature.append(readImage(f))
        train_label.append(getLabel(f))
    
    return train_feature, train_label

In [12]:
import numpy as np

total_feature, total_label = readFeatureAndLabel()
total_feature = np.array(total_feature)
# total_feature = total_feature.astype('float32')
# total_feature /= 255

total_label = np.array(total_label)
# print (total_label)

readFeatureAndLabel file_count=1024


In [13]:
# split train_feature, train_label, validation_feature, validation_label
from sklearn.model_selection import train_test_split
train_feature, validation_feature, train_label, validation_label = train_test_split(total_feature, total_label, test_size=0.1, random_state=1)
    
# print (train_label)

In [20]:
print(type(train_feature))

<class 'numpy.ndarray'>


In [14]:
import math

def score_fun(label_pre):
    total_score = 0
    size = validation_label.size
    for i in range(size):
        y = validation_label[i]
        yp = label_pre[i]
#         print (str(y) + " " + str(yp))
        try:
            total_score = y * math.log(yp) + (1 - y)*math.log(1-yp)
        except:
            print (str(i) + ": " + str(y) + " - " + str(yp))
    return -total_score/size

In [15]:
# convolutional
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras.optimizers import SGD

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(imageSize, imageSize, 3)))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(train_feature, train_label, batch_size=16, epochs=10, validation_data=(validation_feature, validation_label))

pre_val_label = model.predict(x=validation_feature, batch_size=16)
# for i in range(pre_val_label.size):
#     print(str(i) + ": " + str(pre_val_label[i]))
score = model.evaluate(validation_feature, validation_label, batch_size=16)
print (score)


Train on 921 samples, validate on 103 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
0: [0.09079538]
1: [0.00656988]
2: [0.18281412]
3: [0.6524658]
4: [0.01819674]
5: [0.6246056]
6: [0.00860166]
7: [0.17745721]
8: [0.04235994]
9: [0.500625]
10: [0.0500339]
11: [0.8615503]
12: [0.9467236]
13: [0.70683503]
14: [0.9143434]
15: [0.00330901]
16: [0.01329117]
17: [0.02343081]
18: [0.07804729]
19: [0.9766699]
20: [0.7314545]
21: [0.6820823]
22: [0.3600426]
23: [0.60128444]
24: [2.195965e-06]
25: [0.00466921]
26: [0.7881074]
27: [0.4164023]
28: [0.03711015]
29: [0.9872341]
30: [8.224119e-06]
31: [0.18410969]
32: [0.02212207]
33: [0.955988]
34: [0.04824639]
35: [0.00447693]
36: [0.8446992]
37: [0.00011146]
38: [0.81159633]
39: [0.5409508]
40: [0.992007]
41: [0.38037813]
42: [0.996994]
43: [0.08266976]
44: [0.00047062]
45: [0.26195046]
46: [0.04566048]
47: [0.00127]
48: [0.00277843]
49: [0.04515623]
50: [0.22816056]
51: [0.9

In [16]:
# score the model

score = score_fun(pre_val_label)

print (score)

0.0013107780161161018


In [17]:
# save weights
model.save_weights('weights.h5')

In [23]:
import helper

test_files = helper.get_test_files()
test_files = sorted(test_files, key=lambda s: (len(s), s))

test_file_size = 128 if is_debug else len(test_files)

test_feature = []
for i in range(test_file_size):
    test_feature.append(readImage(test_files[i]))
test_feature = np.asarray(test_feature)
print(type(test_feature))
test_label = model.predict(test_feature, batch_size=16)
print (len(test_label))

<class 'numpy.ndarray'>
128


In [37]:
test_label_output = []
for i in range(len(test_label)):
    test_label_output.append([i + 1, test_label[i]])
    
print (len(test_label_output))
test_label_output = np.array(test_label_output)
np.savetxt("test.csv", test_label_output, delimiter=',', header="id,label", comments="")
# np.savetxt('test.csv', test_label, delimiter=",")

128
