In [1]:
import os
import pandas as pd
import glob
import menpo.io as mio
import numpy as np
#np.random.seed(1337)  # for reproducibility
from keras.utils import np_utils
from keras import backend as K

Using TensorFlow backend.


In [2]:
# returns (X_train, y_train), (X_test, y_test)
def randomSplit(X, y, propTest):
    assert X.shape[0] == y.shape[0]
    p = np.random.permutation(len(y))
    sX, sy = X[p], y[p]
    cutoff = round(propTest * len(y))
    return (sX[cutoff:], sy[cutoff:]), (sX[0:cutoff], sy[0:cutoff])

def randomOrder(X, y):
    p = np.random.permutation(len(y))
    return X[p], y[p]

def randomOrder(x, y, z):
    p = np.random.permutation(len(y))
    return x[p], y[p], z[p]

# based on
# http://www.socouldanyone.com/2013/03/converting-grayscale-to-rgb-with-numpy.html
def to_rgb(im):
    if im.n_channels == 1:
        return np.vstack([im.pixels.astype(np.uint8)] * 3)
    else:
        return im.pixels

def subsample(df, col, props = None):
    labels = df[col].get_values()
    counts = dict()
    for l in labels:
        counts[l] = counts.get(l, 0) + 1
    countsOfProps = dict()
    for l in set(labels):
        countsOfProps[l] = counts.get(l, 0) * props.get(l, 0)
    leastKey = min(countsOfProps, key=countsOfProps.get)
    leastVal = round(countsOfProps[leastKey])
    thresholds = dict()
    for l in set(labels):
        if props == None or not leastKey in props: # assume equal parts
            thresholds[l] = leastVal
        else:
            labelToLabel = props.get(l, 0) / props[leastKey]
            thresholds[l] = round(labelToLabel * leastVal)
    df = df.reindex(np.random.permutation(df.index))
    enough = dict()
    ret = pd.DataFrame(index=np.arange(0, leastVal / props[leastKey]), columns=df.columns)
    i = 0
    for index, r in df.iterrows():
        if(enough.get(r[col], 0) <= thresholds[r[col]]):
            ret.loc[i] = r
            enough[r[col]] = enough.get(r[col], 0) + 1
            i += 1
    return ret


In [3]:
data_file = 'idToLabel.tsv'
image_folder = './croppedTwitter/' # must end with '/'
variable = 'ow'# relevant column of data_file
height = 128
width = 128
channels = 3

In [4]:
annos = pd.DataFrame.from_csv(data_file, header=0, index_col=None, sep="\t")
annos['id'] = [str(int(x)) for x in annos['id']]

test_images = []
test_labels = []
test_ids = []
for fp in glob.glob(image_folder + '*.jpg'):
    fnWithExtension = os.path.basename(fp)
    fn = os.path.splitext(fnWithExtension)[0]
    image = mio.import_image(fp)
    assert(image.shape == (height, width))
    test_images.append(image)
    test_labels.append(annos.loc[annos['id'] == fn][variable].sum())
    test_ids.append(annos.loc[annos['id'] == fn]["id"].sum())


In [5]:
nb_classes = 2

# input image dimensions
img_rows, img_cols, img_channels = height, width, channels

X_test = np.stack([to_rgb(img) for img in test_images])
if K.image_dim_ordering() == 'tf':
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, channels)
    input_shape = input_shape = (img_rows, img_cols, channels)
else:
    input_shape = (channels, img_rows, img_cols)
    
y_test = np.array(test_labels)

X_test = X_test.astype('float32')
print('X_test shape:', X_test.shape)
print(X_test.shape[0], 'test samples')

# convert class vectors to n-ary class matrices
y_test = np_utils.to_categorical(y_test, nb_classes)

id_test = np.array(test_ids)

X_test, y_test, id_test = randomOrder(X_test, y_test, id_test)

X_test shape: (5462, 128, 128, 3)
5462 test samples


In [6]:
from keras.models import load_model

model = load_model("overweight_classifier.h5")

score = model.evaluate(X_test, y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.0147912029825
Test accuracy: 0.999084584401


In [7]:
s = model.predict(X_test)
print(s[:,0])
print(s[:,1])

[  5.07050061e-07   1.36850908e-09   1.84489437e-07 ...,   1.39028657e-07
   3.40872113e-08   1.74147135e-06]
[ 0.99999952  1.          0.99999976 ...,  0.99999988  1.          0.99999821]


In [21]:
y_test[:,1].sum()

5457.0