In [1]:
reset -fs

In [2]:
import numpy as np
import pandas as pd
import os
import glob
import pickle
import gzip
import dl_functions
from IPython.display import display
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.preprocessing.image import array_to_img, img_to_array, load_img
from keras.callbacks import LearningRateScheduler, ModelCheckpoint
from skimage import io, color, exposure, transform
from sklearn.metrics import confusion_matrix, roc_curve
from sklearn.cross_validation import train_test_split
from matplotlib import pyplot as plt
%matplotlib inline

Using TensorFlow backend.


#### Images we will resized as specified below.

In [3]:
IMG_SIZE = 25

#### Directory where images from epicurious.com will be downloaded.

In [4]:
ok_images='data/downloads/ok'

#### Directory where images from food.com will be downloaded.

In [5]:
nok_images='data/downloads/nok'

#### Creating an array of images already pre-processed and ready to fit a model. Array is normalized.

Note that we stack first the 'good' images, then the 'bad' ones.

In [6]:
X = np.vstack((dl_functions.normalize_images_array(ok_images, IMG_SIZE), dl_functions.normalize_images_array(nok_images, IMG_SIZE)))

#### Creating an array of labels.
Label 1 indicates good image, and 0 bad image. Note that half of the images are good, half are bad, so labels are calculated accordingly.

In [7]:
y = np.vstack((np.array([1]*(len(X)/2)).reshape((len(X)/2), 1), np.array([0]*(len(X)/2)).reshape((len(X)/2), 1)))

#### Saving as compressed pickle file. We specify on the file name the number of images and the image size selected.

In [10]:
with gzip.open('pre_processed_images/image_data_' + str(len(X)) + '_' + str(IMG_SIZE) + '.txt.gz', 'wb') as fp:
  pickle_file = pickle.dump((X, y), fp)

In [11]:
with gzip.open('pre_processed_images/image_data_' + str(len(X)) + '_' + str(IMG_SIZE) + '.pklz', 'wb') as fp:
  pickle_file = pickle.dump((X, y), fp)

#### Here file is not compressed.

In [9]:
with open('pre_processed_images/image_data_' + str(len(X)) + '_' + str(IMG_SIZE) + '.txt', 'wb') as fp:
  pickle_file = pickle.dump((X, y), fp)

In [12]:
with open('pre_processed_images/image_data_' + str(len(X)) + '_' + str(IMG_SIZE) + '.pkl', 'wb') as fp:
  pickle_file = pickle.dump((X, y), fp)

#### Copying file to a storage bucket.

In [13]:
!gsutil cp -r 'pre_processed_images' 'gs://wellio-kadaif-tasty-images-project-pre-processed-images'

Copying file://pre_processed_images/image_data_20000_25.txt [Content-Type=text/plain]...
==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

Copying file://pre_processed_images/image_data_20000_25.pkl [Content-Type=application/octet-stream]...
Copying file://pre_processed_images/image_data_20000_25.pklz [Content-Type=application/octet-stream]...
Copying file://pre_processed