### Madlib Image Loader requires psycopg2.  If you don't already have it installed, run:

`pip install psycopg2-binary`

In [1]:
import sys
import os
from keras.datasets import cifar10, cifar100, mnist, fashion_mnist, imdb, reuters

Using TensorFlow backend.


In [2]:
# Add community-artifacts to PYTHON_PATH
     # TIP:  You can skip this cell if madlib_image_loader.py is already in your current directory

home = %env HOME
     # TIP:  Change home,'workspace' to wherever you have cloned madlib-site repo
madlib_site_dir = os.path.join(home,'workspace','madlib-site','community-artifacts', 'Deep-learning')
sys.path.append(madlib_site_dir)

In [3]:
# Import image loader module
from madlib_image_loader import ImageLoader, DbCredentials

In [4]:
# Specify database credentials, for connecting to db
db_creds = DbCredentials(db_name='madlib',
                         user='pivotal',
                         host='localhost',
                         port='15432',
                         password='')

In [5]:
# Load dataset into np array
train_data, _ = cifar10.load_data()
data_x, data_y = train_data

In [6]:
# Initialize ImageLoader (increase num_workers to run faster)
iloader = ImageLoader(num_workers=8, db_creds=db_creds)

In [7]:
# Save images to temporary directories and load into database
iloader.load_dataset_from_np(data_x, data_y, 'cifar_10_test', append=False)

MainProcess: Connected to madlib db.
Executing: CREATE TABLE cifar_10_test (id SERIAL, x REAL[], y TEXT)
CREATE TABLE
Created table cifar_10_test in madlib db
Spawning 8 workers...
Initializing PoolWorker-1 [pid 13756]
PoolWorker-1: Created temporary directory /tmp/madlib_l1UVaZIxxr
Initializing PoolWorker-2 [pid 13757]
PoolWorker-2: Created temporary directory /tmp/madlib_AQTX6xmhMK
Initializing PoolWorker-3 [pid 13758]
PoolWorker-3: Created temporary directory /tmp/madlib_7QOFnBn8jB
Initializing PoolWorker-4 [pid 13759]
PoolWorker-4: Created temporary directory /tmp/madlib_1AgOTkwfPn
Initializing PoolWorker-5 [pid 13761]
PoolWorker-1: Connected to madlib db.
Initializing PoolWorker-6 [pid 13762]
PoolWorker-5: Created temporary directory /tmp/madlib_RzYo51sdaR
Initializing PoolWorker-7 [pid 13764]
PoolWorker-6: Created temporary directory /tmp/madlib_uip4DCxC9m
PoolWorker-2: Connected to madlib db.
PoolWorker-7: Created temporary directory /tmp/madlib_8Urhpm5TkR
Initializing PoolWorke

PoolWorker-1: Removed temporary directory /tmp/madlib_l1UVaZIxxr
PoolWorker-6: Removed temporary directory /tmp/madlib_uip4DCxC9m
PoolWorker-8: Removed temporary directory /tmp/madlib_SzPIAtCqyb
Done!  Loaded 50000 images in 52.8276519775s
8 workers terminated.


In [8]:
# Same thing, but without writing out any temporary files; everything handled in-memory.
#   Should run about twice as fast, but not working yet.

#iloader.ROWS_PER_FILE = 1000  # Try adjusting this downward, if running low on memory
#iloader.load_dataset_from_disk(data_x, data_y, 'cifar_10_test2', append=True, no_temp_files=True)

In [7]:
# Try loading a different dataset
train_data, _ = fashion_mnist.load_data()
data_x, data_y = train_data

In [10]:
iloader.load_dataset_from_np(data_x, data_y, 'fashion_mnist_test', append=True)

MainProcess: Connected to madlib db.
Appending to table fashion_mnist_test in madlib db
Spawning 8 workers...
Initializing PoolWorker-9 [pid 14871]
PoolWorker-9: Created temporary directory /tmp/madlib_1j4HTsicJ8
Initializing PoolWorker-10 [pid 14872]
PoolWorker-10: Created temporary directory /tmp/madlib_XUO9OeFCRp
Initializing PoolWorker-11 [pid 14873]
PoolWorker-11: Created temporary directory /tmp/madlib_XXoZi8qgE1
Initializing PoolWorker-12 [pid 14874]
PoolWorker-12: Created temporary directory /tmp/madlib_hc0Qt4WpIv
Initializing PoolWorker-13 [pid 14875]
Initializing PoolWorker-14 [pid 14877]
PoolWorker-13: Created temporary directory /tmp/madlib_2JwMfqwlOC
PoolWorker-9: Connected to madlib db.
PoolWorker-14: Created temporary directory /tmp/madlib_0kkBdCmGO4
Initializing PoolWorker-15 [pid 14879]
PoolWorker-10: Connected to madlib db.
Initializing PoolWorker-16 [pid 14880]
PoolWorker-15: Created temporary directory /tmp/madlib_NGqaV8pYyu
PoolWorker-16: Created temporary director

PoolWorker-15: Wrote 1000 images to /tmp/madlib_NGqaV8pYyu/fashion_mnist_test0006.tmp
PoolWorker-9: Wrote 1000 images to /tmp/madlib_1j4HTsicJ8/fashion_mnist_test0006.tmp
PoolWorker-16: Wrote 1000 images to /tmp/madlib_mkO0vnSoJ8/fashion_mnist_test0006.tmp
PoolWorker-11: Wrote 1000 images to /tmp/madlib_XXoZi8qgE1/fashion_mnist_test0006.tmp
PoolWorker-14: Wrote 1000 images to /tmp/madlib_0kkBdCmGO4/fashion_mnist_test0006.tmp
PoolWorker-14: Loaded 1000 images into fashion_mnist_test
PoolWorker-10: Loaded 1000 images into fashion_mnist_test
PoolWorker-11: Loaded 1000 images into fashion_mnist_test
PoolWorker-15: Loaded 1000 images into fashion_mnist_test
PoolWorker-16: Loaded 1000 images into fashion_mnist_test
PoolWorker-9: Loaded 1000 images into fashion_mnist_test
PoolWorker-10: Wrote 1000 images to /tmp/madlib_XUO9OeFCRp/fashion_mnist_test0007.tmp
PoolWorker-15: Wrote 1000 images to /tmp/madlib_NGqaV8pYyu/fashion_mnist_test0007.tmp
PoolWorker-14: Wrote 1000 images to /tmp/madlib_0kkB

In [None]:
# Load a dataset directly from disk
iloader.load_dataset_from_disk('/tmp/image_test/cifar10', 'fromdisk_test')