In [1]:
import tensorflow as tf
import numpy as np
import random, json, string, pickle
import keras
import keras.layers
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
import keras.models
from keras.models import Sequential
import keras.optimizers
import keras.callbacks
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import sqlite3
%matplotlib inline

Using TensorFlow backend.


In [2]:
def VGG_16(weights_path=None):
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(3,224,224)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1000, activation='sigmoid'))

    if weights_path:
        model.load_weights(weights_path)

    return model

In [3]:
from keras import backend as K

K.set_image_dim_ordering('th')

In [4]:
m = VGG_16()
sgd = keras.optimizers.SGD(lr = 0.01, decay = 1e-2, momentum = 0.9)
m.compile(loss='binary_crossentropy', optimizer = sgd, metrics=['accuracy'])
m.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
zeropadding2d_1 (ZeroPadding2D)  (None, 3, 226, 226)   0           zeropadding2d_input_1[0][0]      
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 64, 224, 224)  1792        zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
zeropadding2d_2 (ZeroPadding2D)  (None, 64, 226, 226)  0           convolution2d_1[0][0]            
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 64, 224, 224)  36928       zeropadding2d_2[0][0]            
___________________________________________________________________________________________

Creating the Pantry

In [5]:
conn = sqlite3.connect("yummly.db")
recipes = conn.execute("SELECT ID, CleanIngredients FROM Recipe WHERE ImageAvailable=1;").fetchall()
conn.close()

In [6]:
def prepIngredients(ingredients_string):
    return [x.strip() for x in ingredients_string.split(";")]

recipes = [tuple([x[0], prepIngredients(x[1])])for x in recipes]

In [7]:
all_ingredients = np.array([item for sublist in recipes for item in sublist[1]])
unique_ing = np.unique(all_ingredients, return_counts = True)

argsort_results = np.argsort(unique_ing[1])

sorted_ing = unique_ing[0][argsort_results]
sorted_vals = unique_ing[1][argsort_results]
sorted_ing = sorted_ing[::-1]

id2food = sorted_ing[0:1000]
food2id = {food: idx[0] for idx, food in np.ndenumerate(id2food)}



In [8]:
id2food[169]

'quinoa'

In [9]:
# free up some memory
all_ingredients = None
argsort_results = None
sorted_vals = None

In [10]:
ids = [] #[x[0] for x in recipes]
recipe_ingredients = [x[1] for x in recipes]
labels = [] #np.zeros([len(recipes), 1000])


# Then we build our training data. We're not going to include
# recipes that have 2 or more ingredients not in the top 1000
for i in range(0, len(recipes)):
    num_misses = 0
    current_recipe = [0] * 1000
    
    for j in range(0, len(recipe_ingredients[i])):
        if food2id.get(recipe_ingredients[i][j]) != None:
            current_recipe[food2id.get(recipe_ingredients[i][j])] = 1
        else:
            num_misses = num_misses + 1
    
    if num_misses == 0:
        labels.append(current_recipe)
        ids.append(recipes[i][0])
            
    #labels[i, [food2id.get(x) for x in recipe_ingredients[i]]] = 1

In [11]:
len(ids)

501790

In [12]:
recipe_ingredients = None
recipes = None

In [13]:
import keras.applications.vgg16 as vgg16

In [14]:
train_ids = ids[0:(len(labels) // 5) * 4]
train_labels = labels[0:(len(labels) // 5) * 4]

test_ids = ids[(len(labels) // 5) * 4:]
test_labels = labels[(len(labels) // 5) * 4:]

In [15]:
import random

# We need to rely on this because we can not load 50k images on memory at the same time.
def DataGenerator(imageIds, imageLabels, batch_size):
    batch = np.zeros((batch_size, 3, 224, 224))
    labels = np.zeros((batch_size, 1000))
    while True:
        for i in range(0, batch_size):
            index = random.randint(0, len(imageIds) - 1)
            img_path = 'resized_thumbs/' + imageIds[index] + ".jpg"
            img = image.load_img(img_path, target_size=(224, 224))
            img = image.img_to_array(img)
            batch[i, :, :, :] = img
            labels[i, :] = imageLabels[index]#, :]
        batch = vgg16.preprocess_input(batch)
        yield batch, labels
        

m.fit_generator(DataGenerator(train_ids, train_labels, 16), len(train_ids) / 16, nb_epoch = 20,
                         validation_data = DataGenerator(test_ids, test_labels, 16),
                         nb_val_samples = len(test_ids) / 16,
                         nb_worker = 3, max_q_size = 4, pickle_safe = True)

m.save_weights('full_model_weights.hdf5')

Epoch 1/20
  800/25089 [..............................] - ETA: 2136s - loss: 0.4978 - acc: 0.7240

Exception: output of generator should be a tuple (x, y, sample_weight) or (x, y). Found: None

In [None]:
image.img_to_array(image.load_img(img_path, target_size=(224, 224))).shape

In [None]:
print("hello")

In [None]:
img_path = 'download.jpg'  # This is an image I took in my kitchen.
img = image.load_img(img_path, target_size=(224, 224))
img_arr = image.img_to_array(img)
x = np.expand_dims(img_arr, axis=0)  # The model only accepts batches so we add a dummy dimension.
x = vgg16.preprocess_input(x)  # The preprocessing should be the same that was used during training.

predictions = m.predict(x)

#label_predictions = vgg16.decode_predictions(predictions, top = 10)

print('Input image size:', x.shape)
print('Prediction scores: ', predictions.shape)
print('\nPredictions:')

plt.imshow(np.asarray(img));

In [None]:
for i in range(0,1000):
    if predictions[0][i] > 0.1:
        print(id2food[i] + ": " + str(predictions[0][i]))

In [None]:
len(predictions)

In [None]:
# train_datagen = ImageDataGenerator(
#         rescale=1./255,
#         #shear_range=0.2,
#         #zoom_range=0.2,
#         horizontal_flip=True)

# test_datagen = ImageDataGenerator(rescale=1./255)

# train_generator = train_datagen.flow_from_directory(
#         'images/',
#         target_size=(224, 224),
#         batch_size=32,
#         class_mode=None)

# validation_generator = test_datagen.flow_from_directory(
#         'images/',
#         target_size=(224, 224),
#         batch_size=32,
#         class_mode=None)

# model.fit_generator(
#         train_generator,
#         samples_per_epoch=2000,
#         nb_epoch=50,
#         validation_data=validation_generator,
#         nb_val_samples=800)