### Madlib Image Loader requires psycopg2.  If you don't already have it installed, run:

`pip install psycopg2-binary`

In [1]:
import sys
import os
from keras.datasets import cifar10, cifar100, mnist, fashion_mnist, imdb, reuters

Using TensorFlow backend.


In [2]:
# Add community-artifacts to PYTHON_PATH
     # TIP:  You can skip this cell if working directory of notebook is community-artifacts

home = %env HOME
     # TIP:  Change home,'workspace' to wherever you have cloned madlib-site repo
madlib_site_dir = os.path.join(home,'workspace','madlib-site','community-artifacts')
sys.path.append(madlib_site_dir)

In [3]:
# Import image loader module
from madlib_image_loader import ImageLoader, DbCredentials

In [4]:
# Specify database credentials, for connecting to db
db_creds = DbCredentials(db_name='madlib',
                         user='pivotal',
                         host='localhost',
                         port='15432',
                         password='')

In [5]:
# Load dataset into np array
train_data, _ = cifar10.load_data()
data_x, data_y = train_data

In [6]:
# Initialize ImageLoader (increase num_workers to run faster)
iloader = ImageLoader(num_workers=5, db_creds=db_creds)

In [7]:
# Save images to temporary directories and load into database
iloader.load_np_array_to_table(data_x, data_y, 'cifar_10_test', append=False, img_names=None)

MainProcess: Connected to madlib db.
Appending to table cifar_10_test in madlib db
Spawning 5 workers...
Initializing PoolWorker-1 [pid 240]
PoolWorker-1: Created temporary directory PoolWorker-1
Initializing PoolWorker-2 [pid 241]
PoolWorker-2: Created temporary directory PoolWorker-2
Initializing PoolWorker-3 [pid 242]
PoolWorker-3: Created temporary directory PoolWorker-3
Initializing PoolWorker-4 [pid 243]
PoolWorker-4: Created temporary directory PoolWorker-4
Initializing PoolWorker-5 [pid 245]
PoolWorker-1: Connected to madlib db.
PoolWorker-5: Created temporary directory PoolWorker-5
PoolWorker-2: Connected to madlib db.
PoolWorker-3: Connected to madlib db.
PoolWorker-4: Connected to madlib db.
PoolWorker-5: Connected to madlib db.
PoolWorker-1: Wrote 1000 images to /tmp/madlib_KdLWwZ322f/cifar_10_test0000.tmp
PoolWorker-2: Wrote 1000 images to /tmp/madlib_xlKP6JhnfV/cifar_10_test0000.tmp
PoolWorker-3: Wrote 1000 images to /tmp/madlib_kfSWAjQUxH/cifar_10_test0000.tmp
PoolWorker

In [2]:
TODO:  no_temp_files option currently has a bug--it looks like it succeeds, but table ens up being empty.

SyntaxError: invalid syntax (<ipython-input-2-c4a66f4c96da>, line 1)

In [9]:
# Same thing, but without writing out any temporary files; everything handled in-memory.
#   Should run about twice as fast.

iloader.ROWS_PER_FILE = 1000  # Try adjusting this downward, if running low on memory
iloader.load_np_array_to_table(data_x, data_y, 'cifar_10_test', append=True, no_temp_files=True)

Appending to table cifar_10_test in madlib db
Spawning 5 workers...
Initializing PoolWorker-6 [pid 279]
Initializing PoolWorker-7 [pid 280]
Initializing PoolWorker-8 [pid 281]
Initializing PoolWorker-9 [pid 284]
PoolWorker-6: Connected to madlib db.
Initializing PoolWorker-10 [pid 285]
PoolWorker-7: Connected to madlib db.
PoolWorker-8: Connected to madlib db.
PoolWorker-9: Connected to madlib db.
PoolWorker-10: Connected to madlib db.
PoolWorker-6: Loaded 1000 images into cifar_10_test
PoolWorker-7: Loaded 1000 images into cifar_10_test
PoolWorker-9: Loaded 1000 images into cifar_10_test
PoolWorker-8: Loaded 1000 images into cifar_10_test
PoolWorker-10: Loaded 1000 images into cifar_10_test
PoolWorker-6: Loaded 1000 images into cifar_10_test
PoolWorker-7: Loaded 1000 images into cifar_10_test
PoolWorker-9: Loaded 1000 images into cifar_10_test
PoolWorker-8: Loaded 1000 images into cifar_10_test
PoolWorker-10: Loaded 1000 images into cifar_10_test
PoolWorker-6: Loaded 1000 images into 

In [10]:
# Try loading a different dataset
train_data, _ = fashion_mnist.load_data()
data_x, data_y = train_data

In [12]:
iloader.load_np_array_to_table(data_x, data_y, 'fashion_mnist_test', append=False, no_temp_files=True)

Executing: CREATE TABLE fashion_mnist_test (id SERIAL, x REAL[], y TEXT)
CREATE TABLE
Created table fashion_mnist_test in madlib db
Spawning 5 workers...
Initializing PoolWorker-11 [pid 317]
Initializing PoolWorker-12 [pid 318]
Initializing PoolWorker-13 [pid 319]
Initializing PoolWorker-14 [pid 320]
Initializing PoolWorker-15 [pid 321]
PoolWorker-11: Connected to madlib db.
PoolWorker-12: Connected to madlib db.
PoolWorker-13: Connected to madlib db.
PoolWorker-14: Connected to madlib db.
PoolWorker-15: Connected to madlib db.
PoolWorker-11: Loaded 1000 images into fashion_mnist_test
PoolWorker-12: Loaded 1000 images into fashion_mnist_test
PoolWorker-13: Loaded 1000 images into fashion_mnist_test
PoolWorker-14: Loaded 1000 images into fashion_mnist_test
PoolWorker-15: Loaded 1000 images into fashion_mnist_test
PoolWorker-11: Loaded 1000 images into fashion_mnist_test
PoolWorker-12: Loaded 1000 images into fashion_mnist_test
PoolWorker-13: Loaded 1000 images into fashion_mnist_test
Po