# Preprocessor for image data
This is a mini-batch preprocessor utility for image data:
* training_preprocessor_dl() for training datasets
* validation_preprocessor_dl() for validation datasets

Note that there is a separate mini-batch preprocessor utility for general use cases
http://madlib.apache.org/docs/latest/group__grp__minibatch__preprocessing.html

The preprocessor for image data was added in MADlib 1.16.

## Table of contents

<a href="#load_data">1. Load data</a>

<a href="#pp_train">2. Run preprocessor for training image data</a>

<a href="#pp_val">3. Run preprocessor for validation image data</a>

<a href="#load_data2">4. Load data, another format</a>

<a href="#pp_train2">5. Run preprocessor for training image data</a>

<a href="#pp_val2">6. Run preprocessor for validation image data</a>

<a href="#change_buffer">7. Change buffer size</a>

<a href="#set_num_classes">8. Setting number of classes</a>

In [1]:
%load_ext sql

  warn("IPython.utils.traitlets has moved to a top-level traitlets package.")


In [2]:
# Greenplum Database 5.x on GCP (PM demo machine)
%sql postgresql://gpadmin@35.239.240.26:5432/madlib
        
# PostgreSQL local
#%sql postgresql://fmcquillan@localhost:5432/madlib

u'Connected: gpadmin@madlib'

In [3]:
%sql select madlib.version();
#%sql select version();

1 rows affected.


version
"MADlib version: 1.16-dev, git revision: rel/v1.15.1-98-g544a8e5, cmake configuration time: Mon May 20 16:40:50 UTC 2019, build type: release, build system: Linux-3.10.0-957.12.1.el7.x86_64, C compiler: gcc 4.8.5, C++ compiler: g++ 4.8.5"


<a id="load_data"></a>
# 1. Load data

Create an artificial 2x2 resolution color image data set with 3 possible classifications.  The RGB values are per-pixel arrays:

In [4]:
%%sql
DROP TABLE IF EXISTS image_data;

CREATE TABLE image_data AS (
    SELECT ARRAY[
        ARRAY[
            ARRAY[(random() * 256)::integer, -- pixel (1,1)
                (random() * 256)::integer,
                (random() * 256)::integer],
            ARRAY[(random() * 256)::integer, -- pixel (2,1)
                (random() * 256)::integer,
                (random() * 256)::integer]
        ],
        ARRAY[
            ARRAY[(random() * 256)::integer, -- pixel (1,2)
                (random() * 256)::integer,
                (random() * 256)::integer],
            ARRAY[(random() * 256)::integer, -- pixel (2,1)
                (random() * 256)::integer,
                (random() * 256)::integer]
        ]
    ] as rgb, ('{cat,dog,bird}'::text[])[ceil(random()*3)] as species
    FROM generate_series(1, 52)
);

SELECT * FROM image_data;

Done.
52 rows affected.
52 rows affected.


rgb,species
"[[[76, 125, 240], [191, 13, 20]], [[153, 77, 7], [41, 143, 172]]]",bird
"[[[15, 126, 174], [246, 129, 81]], [[143, 220, 157], [96, 207, 223]]]",bird
"[[[84, 24, 1], [201, 28, 77]], [[70, 12, 11], [83, 33, 165]]]",dog
"[[[40, 206, 47], [211, 138, 62]], [[82, 56, 52], [210, 137, 195]]]",bird
"[[[38, 35, 125], [5, 18, 209]], [[29, 19, 153], [57, 95, 223]]]",cat
"[[[107, 50, 102], [15, 210, 142]], [[222, 1, 97], [103, 63, 179]]]",dog
"[[[115, 133, 40], [55, 51, 78]], [[89, 176, 83], [108, 129, 112]]]",dog
"[[[27, 169, 222], [249, 239, 73]], [[43, 85, 88], [253, 227, 54]]]",bird
"[[[68, 157, 61], [246, 60, 176]], [[123, 100, 230], [175, 178, 64]]]",dog
"[[[4, 172, 224], [116, 42, 251]], [[30, 8, 244], [12, 81, 31]]]",dog


<a id="pp_train"></a>
# 2.  Run preprocessor for training image data

Run the preprocessor to generate the packed output table:

In [5]:
%%sql
DROP TABLE IF EXISTS image_data_packed, image_data_packed_summary;

SELECT madlib.training_preprocessor_dl('image_data',         -- Source table
                                        'image_data_packed',  -- Output table
                                        'species',            -- Dependent variable
                                        'rgb',                -- Independent variable
                                        NULL,                 -- Buffer size
                                        255                   -- Normalizing constant
                                        );

SELECT * FROM image_data_packed ORDER BY buffer_id;

Done.
1 rows affected.
2 rows affected.


independent_var,dependent_var,buffer_id
"[[[[0.0862745, 0.278431, 0.25098], [1.0, 0.152941, 0.627451]], [[0.101961, 0.870588, 0.631373], [0.745098, 0.258824, 0.537255]]], [[[0.0588235, 0.494118, 0.682353], [0.964706, 0.505882, 0.317647]], [[0.560784, 0.862745, 0.615686], [0.376471, 0.811765, 0.87451]]], [[[0.156863, 0.807843, 0.184314], [0.827451, 0.541176, 0.243137]], [[0.321569, 0.219608, 0.203922], [0.823529, 0.537255, 0.764706]]], [[[0.419608, 0.196078, 0.4], [0.0588235, 0.823529, 0.556863]], [[0.870588, 0.00392157, 0.380392], [0.403922, 0.247059, 0.701961]]], [[[0.929412, 0.658824, 0.803922], [0.176471, 0.027451, 0.823529]], [[0.85098, 0.905882, 0.27451], [0.0117647, 0.886275, 0.392157]]], [[[0.00784314, 0.47451, 0.964706], [0.988235, 0.960784, 0.878431]], [[0.0117647, 0.713726, 0.137255], [0.286275, 0.792157, 0.576471]]], [[[0.92549, 0.764706, 0.254902], [0.886275, 0.337255, 0.160784]], [[0.423529, 0.94902, 0.137255], [0.784314, 0.588235, 0.980392]]], [[[0.376471, 0.105882, 0.717647], [0.00392157, 0.643137, 0.392157]], [[0.909804, 0.909804, 0.835294], [0.984314, 0.243137, 0.772549]]], [[[0.32549, 0.176471, 0.686275], [0.152941, 0.329412, 0.258824]], [[0.4, 0.584314, 0.921569], [0.741176, 0.498039, 0.12549]]], [[[0.498039, 0.14902, 0.356863], [0.247059, 0.592157, 0.94902]], [[0.776471, 0.788235, 0.301961], [0.980392, 0.576471, 0.917647]]], [[[0.105882, 0.0862745, 0.541176], [0.0823529, 0.196078, 0.466667]], [[0.741176, 1.0, 0.643137], [0.768628, 0.819608, 0.490196]]], [[[0.803922, 0.592157, 0.435294], [0.172549, 0.101961, 0.545098]], [[0.258824, 0.639216, 0.623529], [0.454902, 0.101961, 0.360784]]], [[[0.831373, 0.956863, 0.85098], [0.713726, 0.72549, 0.937255]], [[0.992157, 0.976471, 0.933333], [0.141176, 0.6, 0.027451]]], [[[0.905882, 0.541176, 0.611765], [0.980392, 0.360784, 0.647059]], [[0.843137, 0.0313726, 0.490196], [0.788235, 0.239216, 0.815686]]], [[[0.596078, 0.258824, 0.643137], [0.00784314, 0.556863, 0.423529]], [[0.713726, 0.4, 0.415686], [0.564706, 0.454902, 0.113725]]], [[[0.0156863, 0.67451, 0.878431], [0.454902, 0.164706, 0.984314]], [[0.117647, 0.0313726, 0.956863], [0.0470588, 0.317647, 0.121569]]], [[[0.682353, 0.905882, 0.266667], [0.439216, 0.0745098, 0.341176]], [[0.941177, 0.160784, 0.831373], [0.258824, 0.0470588, 0.909804]]], [[[0.203922, 0.709804, 0.0235294], [0.247059, 0.341176, 0.952941]], [[0.00784314, 0.596078, 0.831373], [0.345098, 0.756863, 0.25098]]], [[[0.917647, 0.027451, 0.756863], [0.262745, 0.27451, 0.0784314]], [[0.439216, 0.960784, 0.231373], [0.768628, 0.215686, 0.631373]]], [[[0.670588, 0.552941, 0.533333], [0.188235, 0.788235, 0.796079]], [[0.443137, 0.701961, 0.568627], [0.611765, 0.105882, 0.498039]]], [[[0.0745098, 0.501961, 0.0862745], [0.490196, 0.321569, 0.890196]], [[0.0784314, 0.756863, 0.054902], [0.176471, 0.298039, 0.313726]]], [[[0.588235, 0.533333, 0.196078], [0.356863, 0.203922, 0.792157]], [[0.615686, 0.85098, 0.8], [0.168627, 0.266667, 0.509804]]], [[[0.105882, 0.662745, 0.870588], [0.976471, 0.937255, 0.286275]], [[0.168627, 0.333333, 0.345098], [0.992157, 0.890196, 0.211765]]], [[[0.576471, 0.247059, 0.25098], [0.662745, 0.341176, 0.921569]], [[0.211765, 0.87451, 0.101961], [0.996078, 0.666667, 0.545098]]], [[[0.784314, 0.709804, 0.85098], [0.996078, 0.854902, 0.0509804]], [[0.701961, 0.878431, 0.298039], [0.0392157, 0.823529, 0.305882]]], [[[0.258824, 0.258824, 0.917647], [0.780392, 0.168627, 0.411765]], [[0.929412, 0.52549, 0.658824], [0.517647, 0.470588, 0.431373]]]]","[[0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0]]",0
"[[[[0.45098, 0.521569, 0.156863], [0.215686, 0.2, 0.305882]], [[0.34902, 0.690196, 0.32549], [0.423529, 0.505882, 0.439216]]], [[[0.0901961, 0.501961, 0.552941], [0.482353, 0.541176, 0.388235]], [[0.92549, 0.901961, 0.345098], [0.741176, 0.917647, 0.415686]]], [[[0.921569, 0.768628, 0.921569], [0.278431, 0.215686, 0.666667]], [[0.403922, 0.482353, 0.901961], [0.196078, 0.843137, 0.631373]]], [[[0.772549, 0.407843, 0.882353], [0.686275, 0.231373, 0.25098]], [[0.772549, 0.32549, 0.133333], [0.423529, 0.0980392, 0.0862745]]], [[[0.133333, 0.372549, 0.847059], [0.262745, 0.988235, 0.443137]], [[0.380392, 0.262745, 0.588235], [0.192157, 0.772549, 0.886275]]], [[[0.0901961, 0.517647, 0.894118], [0.862745, 0.658824, 0.968628]], [[0.886275, 0.843137, 0.945098], [0.92549, 0.12549, 1.0]]], [[[0.372549, 0.0823529, 0.0941177], [0.886275, 0.654902, 0.776471]], [[0.956863, 0.67451, 0.572549], [0.466667, 0.443137, 0.521569]]], [[[0.74902, 0.12549, 0.266667], [0.235294, 0.796079, 0.360784]], [[0.737255, 0.345098, 0.843137], [0.27451, 0.729412, 0.764706]]], [[[0.6, 0.121569, 0.278431], [0.145098, 0.811765, 0.509804]], [[0.298039, 0.607843, 0.239216], [0.592157, 0.164706, 0.980392]]], [[[0.764706, 0.0117647, 0.607843], [0.192157, 0.313726, 0.376471]], [[0.6, 0.192157, 0.0588235], [0.831373, 0.443137, 0.831373]]], [[[0.298039, 0.490196, 0.941177], [0.74902, 0.0509804, 0.0784314]], [[0.6, 0.301961, 0.027451], [0.160784, 0.560784, 0.67451]]], [[[0.792157, 0.666667, 0.133333], [0.917647, 0.0941177, 0.027451]], [[0.388235, 0.133333, 0.0431373], [0.72549, 0.627451, 0.964706]]], [[[0.890196, 0.0313726, 0.878431], [0.870588, 0.847059, 0.952941]], [[0.631373, 0.898039, 0.843137], [0.490196, 0.972549, 0.415686]]], [[[0.0705882, 0.027451, 0.184314], [0.980392, 0.0392157, 0.286275]], [[0.0588235, 0.34902, 0.705882], [0.956863, 0.580392, 0.886275]]], [[[0.266667, 0.615686, 0.239216], [0.964706, 0.235294, 0.690196]], [[0.482353, 0.392157, 0.901961], [0.686275, 0.698039, 0.25098]]], [[[0.839216, 0.439216, 0.356863], [0.964706, 0.819608, 0.0156863]], [[0.0705882, 0.0823529, 0.890196], [0.172549, 0.615686, 0.372549]]], [[[0.662745, 0.109804, 0.266667], [0.87451, 0.101961, 0.533333]], [[0.486275, 0.341176, 0.494118], [0.721569, 0.027451, 0.980392]]], [[[0.0745098, 0.768628, 0.0509804], [0.894118, 0.858824, 0.0745098]], [[0.576471, 0.811765, 0.815686], [0.294118, 0.552941, 0.211765]]], [[[0.654902, 0.65098, 0.388235], [0.905882, 0.937255, 0.27451]], [[0.937255, 0.811765, 0.141176], [0.784314, 0.760784, 0.772549]]], [[[0.329412, 0.0941177, 0.00392157], [0.788235, 0.109804, 0.301961]], [[0.27451, 0.0470588, 0.0431373], [0.32549, 0.129412, 0.647059]]], [[[0.682353, 0.0705882, 0.0196078], [0.8, 0.509804, 0.768628]], [[0.952941, 0.772549, 0.823529], [0.741176, 0.682353, 0.521569]]], [[[0.74902, 0.152941, 0.258824], [0.0509804, 0.792157, 0.913726]], [[0.701961, 0.172549, 0.819608], [0.635294, 0.447059, 0.752941]]], [[[0.101961, 0.0823529, 0.309804], [0.415686, 0.901961, 0.231373]], [[0.180392, 0.819608, 0.509804], [0.396078, 0.482353, 0.913726]]], [[[0.0235294, 0.686275, 0.956863], [0.741176, 0.721569, 0.745098]], [[0.403922, 0.854902, 0.654902], [0.498039, 0.882353, 0.0392157]]], [[[0.231373, 0.156863, 0.8], [0.729412, 0.290196, 0.560784]], [[0.741176, 0.898039, 0.752941], [0.054902, 0.270588, 0.34902]]], [[[0.14902, 0.137255, 0.490196], [0.0196078, 0.0705882, 0.819608]], [[0.113725, 0.0745098, 0.6], [0.223529, 0.372549, 0.87451]]]]","[[0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0]]",1


For small datasets like in this example, buffer size is mainly determined by the number of segments in the database. For a Greenplum database with 2 segments, there will be 2 rows with a buffer size of 26. For PostgresSQL, there would be only one row with a buffer size of 52 since it is a single node database. For larger data sets, other factors go into computing buffers size besides number of segments. 

Review the output summary table:

In [6]:
%%sql
SELECT * FROM image_data_packed_summary;

1 rows affected.


source_table,output_table,dependent_varname,independent_varname,dependent_vartype,class_values,buffer_size,normalizing_const,num_classes
image_data,image_data_packed,species,rgb,text,"[u'bird', u'cat', u'dog']",26,255.0,3


<a id="pp_val"></a>
# 3.  Run preprocessor for validation image data

Run the preprocessor for the validation dataset. In this example, we use the same images for validation to demonstrate, but normally validation data is different than training data:

In [7]:
%%sql
DROP TABLE IF EXISTS val_image_data_packed, val_image_data_packed_summary;
SELECT madlib.validation_preprocessor_dl(
      'image_data',             -- Source table
      'val_image_data_packed',  -- Output table
      'species',                -- Dependent variable
      'rgb',                    -- Independent variable
      'image_data_packed',      -- From training preprocessor step
      2                         -- Buffer size
      );
SELECT * FROM val_image_data_packed ORDER BY buffer_id;

Done.
1 rows affected.
26 rows affected.


independent_var,dependent_var,buffer_id
"[[[[0.290196, 0.105882, 0.152941], [0.47451, 0.670588, 0.356863]], [[0.101961, 0.458824, 0.188235], [0.584314, 0.921569, 0.854902]]], [[[0.941177, 0.92549, 0.34902], [0.137255, 0.360784, 0.411765]], [[0.0627451, 0.917647, 0.898039], [0.203922, 0.313726, 0.247059]]]]","[[1, 0, 0], [0, 0, 1]]",0
"[[[[0.815686, 0.294118, 0.568627], [0.603922, 0.662745, 0.870588]], [[0.913726, 0.352941, 0.0745098], [0.0117647, 0.596078, 0.6]]], [[[0.0509804, 0.262745, 0.933333], [0.894118, 0.603922, 0.0901961]], [[0.643137, 0.12549, 0.623529], [0.0392157, 0.713726, 0.819608]]]]","[[0, 1, 0], [0, 0, 1]]",1
"[[[[0.14902, 0.52549, 0.6], [0.784314, 0.619608, 0.823529]], [[0.0823529, 0.0862745, 0.454902], [0.835294, 0.231373, 0.996078]]], [[[0.713726, 0.803922, 0.0156863], [0.678431, 0.415686, 0.470588]], [[0.156863, 0.85098, 0.941177], [0.27451, 0.141176, 0.72549]]]]","[[1, 0, 0], [0, 0, 1]]",2
"[[[[0.584314, 0.890196, 0.94902], [0.419608, 0.709804, 0.760784]], [[0.713726, 0.270588, 0.360784], [0.372549, 0.141176, 0.270588]]], [[[0.819608, 0.823529, 0.0980392], [0.262745, 0.713726, 0.552941]], [[0.203922, 0.890196, 0.945098], [0.156863, 0.235294, 0.466667]]]]","[[1, 0, 0], [0, 1, 0]]",3
"[[[[0.215686, 0.282353, 0.321569], [0.815686, 0.588235, 0.466667]], [[0.337255, 0.184314, 0.247059], [0.956863, 0.00392157, 0.329412]]], [[[0.129412, 0.388235, 0.270588], [0.980392, 0.623529, 0.984314]], [[0.780392, 0.639216, 0.658824], [0.192157, 0.105882, 0.815686]]]]","[[0, 1, 0], [0, 1, 0]]",4
"[[[[0.458824, 0.160784, 0.270588], [0.45098, 0.94902, 0.854902]], [[0.337255, 0.894118, 0.27451], [0.0431373, 0.65098, 0.988235]]], [[[0.0431373, 0.0862745, 0.180392], [0.772549, 0.615686, 1.0]], [[0.588235, 0.713726, 0.254902], [0.298039, 0.262745, 0.458824]]]]","[[0, 1, 0], [0, 1, 0]]",5
"[[[[0.00784314, 0.356863, 0.454902], [0.282353, 0.0784314, 0.670588]], [[0.564706, 0.4, 0.478431], [0.14902, 0.866667, 0.815686]]], [[[0.207843, 0.615686, 0.419608], [0.670588, 0.760784, 0.54902]], [[0.054902, 0.0313726, 0.52549], [0.678431, 0.0117647, 0.298039]]]]","[[0, 1, 0], [0, 1, 0]]",6
"[[[[0.666667, 0.490196, 0.419608], [0.478431, 0.529412, 0.462745]], [[0.564706, 0.709804, 0.231373], [0.176471, 0.701961, 0.819608]]], [[[0.113725, 0.764706, 0.337255], [0.439216, 0.803922, 0.796079]], [[0.6, 0.0745098, 0.243137], [0.54902, 0.929412, 0.580392]]]]","[[1, 0, 0], [0, 0, 1]]",7
"[[[[0.196078, 0.619608, 0.0862745], [0.180392, 0.933333, 0.0941177]], [[0.537255, 0.384314, 0.376471], [0.619608, 0.0509804, 0.941177]]], [[[0.960784, 0.113725, 0.14902], [0.415686, 0.301961, 0.356863]], [[0.027451, 0.721569, 0.0235294], [0.788235, 0.266667, 0.0784314]]]]","[[0, 1, 0], [1, 0, 0]]",8
"[[[[0.529412, 0.0862745, 0.882353], [0.341176, 0.415686, 0.996078]], [[0.101961, 0.752941, 0.431373], [0.909804, 0.545098, 0.027451]]], [[[0.792157, 0.760784, 0.827451], [0.0862745, 0.0705882, 0.490196]], [[0.576471, 0.490196, 0.972549], [0.101961, 0.952941, 0.533333]]]]","[[1, 0, 0], [1, 0, 0]]",9


Review the output summary table:

In [8]:
%%sql
SELECT * FROM val_image_data_packed_summary;

1 rows affected.


source_table,output_table,dependent_varname,independent_varname,dependent_vartype,class_values,buffer_size,normalizing_const,num_classes
image_data,val_image_data_packed,species,rgb,text,"[u'bird', u'cat', u'dog']",2,255.0,3


<a id="load_data2"></a>
# 4. Load data, another format
Create an artificial 2x2 resolution color image data set with 3 possible classifications.  The RGB values are unrolled in to a flat array:

In [6]:
%%sql
DROP TABLE IF EXISTS image_data;

CREATE TABLE image_data AS (
SELECT ARRAY[
        (random() * 256)::integer, -- R values
        (random() * 256)::integer,
        (random() * 256)::integer,
        (random() * 256)::integer,
        (random() * 256)::integer, -- G values
        (random() * 256)::integer,
        (random() * 256)::integer,
        (random() * 256)::integer,
        (random() * 256)::integer, -- B values
        (random() * 256)::integer,
        (random() * 256)::integer,
        (random() * 256)::integer
    ] as rgb, ('{cat,dog,bird}'::text[])[ceil(random()*3)] as species
FROM generate_series(1, 52)
);

SELECT * FROM image_data;

Done.
52 rows affected.
52 rows affected.


rgb,species
"[87, 118, 166, 176, 152, 5, 135, 219, 1, 249, 60, 67]",dog
"[109, 9, 113, 57, 10, 234, 47, 6, 223, 16, 9, 148]",cat
"[82, 94, 197, 145, 99, 28, 7, 8, 203, 159, 13, 83]",dog
"[14, 75, 182, 81, 218, 36, 90, 74, 93, 100, 52, 140]",dog
"[19, 156, 115, 167, 206, 198, 5, 147, 86, 104, 175, 93]",dog
"[122, 252, 125, 205, 118, 140, 24, 44, 221, 242, 80, 55]",cat
"[173, 155, 112, 57, 6, 131, 212, 121, 42, 162, 63, 47]",cat
"[149, 150, 227, 242, 6, 93, 238, 132, 42, 100, 15, 66]",dog
"[236, 52, 225, 36, 112, 141, 191, 224, 198, 197, 98, 154]",cat
"[140, 60, 125, 187, 113, 18, 81, 84, 5, 88, 178, 243]",bird


<a id="pp_train2"></a>
# 5.  Run preprocessor for training image data

Run the preprocessor to generate the packed output table:

In [7]:
%%sql
DROP TABLE IF EXISTS image_data_packed, image_data_packed_summary;

SELECT madlib.training_preprocessor_dl('image_data',         -- Source table
                                        'image_data_packed',  -- Output table
                                        'species',            -- Dependent variable
                                        'rgb',                -- Independent variable
                                        NULL,                 -- Buffer size
                                        255                   -- Normalizing constant
                                        );

SELECT * FROM image_data_packed ORDER BY buffer_id;

Done.
1 rows affected.
2 rows affected.


independent_var,dependent_var,buffer_id
"[[0.105882, 0.498039, 0.0196078, 0.709804, 0.741176, 0.568627, 0.945098, 0.227451, 0.298039, 0.380392, 0.298039, 0.615686], [0.537255, 0.866667, 0.776471, 0.172549, 0.133333, 0.352941, 0.164706, 0.529412, 0.14902, 0.254902, 0.427451, 0.670588], [0.235294, 0.262745, 0.74902, 0.309804, 0.0941177, 0.721569, 0.533333, 0.560784, 0.572549, 0.435294, 0.643137, 0.788235], [0.560784, 0.458824, 0.627451, 0.290196, 0.0901961, 0.733333, 0.784314, 0.109804, 0.435294, 0.521569, 0.678431, 0.376471], [0.576471, 0.109804, 0.733333, 0.537255, 0.258824, 0.54902, 0.701961, 0.843137, 0.827451, 0.67451, 0.964706, 0.976471], [0.427451, 0.0352941, 0.443137, 0.223529, 0.0392157, 0.917647, 0.184314, 0.0235294, 0.87451, 0.0627451, 0.0352941, 0.580392], [0.396078, 0.901961, 0.0980392, 0.368627, 0.388235, 0.388235, 0.415686, 0.301961, 0.533333, 0.466667, 0.780392, 0.133333], [0.121569, 0.423529, 0.635294, 0.819608, 0.803922, 0.878431, 0.286275, 0.603922, 0.894118, 0.188235, 0.14902, 0.196078], [0.568627, 0.458824, 0.0313726, 0.576471, 0.686275, 0.803922, 0.843137, 0.443137, 0.223529, 0.2, 0.721569, 0.533333], [0.772549, 0.266667, 0.764706, 0.419608, 0.360784, 0.278431, 0.313726, 0.215686, 0.937255, 0.27451, 0.101961, 0.776471], [0.54902, 0.235294, 0.490196, 0.733333, 0.443137, 0.0705882, 0.317647, 0.329412, 0.0196078, 0.345098, 0.698039, 0.952941], [0.580392, 1.0, 0.396078, 0.8, 0.0901961, 0.905882, 0.52549, 0.764706, 0.105882, 0.541176, 0.996078, 0.772549], [0.584314, 0.588235, 0.890196, 0.94902, 0.0235294, 0.364706, 0.933333, 0.517647, 0.164706, 0.392157, 0.0588235, 0.258824], [0.819608, 0.164706, 0.588235, 0.282353, 0.976471, 0.117647, 0.145098, 0.74902, 0.290196, 0.278431, 0.0941177, 0.454902], [0.615686, 0.364706, 0.741176, 0.419608, 0.431373, 0.972549, 0.54902, 0.188235, 0.807843, 0.0313726, 0.152941, 0.721569], [0.176471, 0.270588, 0.478431, 0.470588, 0.894118, 0.6, 0.894118, 0.52549, 0.415686, 0.694118, 0.403922, 0.701961], [0.109804, 0.294118, 0.109804, 0.0, 0.741176, 0.686275, 0.113725, 0.470588, 0.219608, 0.368627, 0.0117647, 0.921569], [0.243137, 0.470588, 0.803922, 0.0156863, 0.905882, 0.54902, 0.305882, 0.545098, 0.109804, 0.921569, 0.423529, 0.933333], [0.286275, 0.0470588, 0.917647, 0.145098, 0.0509804, 0.482353, 0.603922, 0.0823529, 0.054902, 0.282353, 0.886275, 0.898039], [0.972549, 0.223529, 0.466667, 0.854902, 0.905882, 0.0823529, 0.215686, 0.643137, 0.498039, 0.65098, 0.611765, 0.0431373], [0.509804, 0.729412, 0.67451, 0.207843, 0.0156863, 0.231373, 0.666667, 0.643137, 0.521569, 0.756863, 0.368627, 0.301961], [0.227451, 0.133333, 0.756863, 0.839216, 0.486275, 0.972549, 0.917647, 0.403922, 0.027451, 0.694118, 0.717647, 0.984314], [0.74902, 0.290196, 0.643137, 0.564706, 0.615686, 0.878431, 0.827451, 0.360784, 0.188235, 0.917647, 0.0784314, 0.721569], [0.211765, 0.0784314, 0.921569, 0.0588235, 0.780392, 0.313726, 0.960784, 0.878431, 0.682353, 0.341176, 0.262745, 0.0941177], [0.054902, 0.294118, 0.713726, 0.317647, 0.854902, 0.141176, 0.352941, 0.290196, 0.364706, 0.392157, 0.203922, 0.54902], [0.478431, 0.988235, 0.490196, 0.803922, 0.462745, 0.54902, 0.0941177, 0.172549, 0.866667, 0.94902, 0.313726, 0.215686]]","[[1, 0, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 1, 0]]",0
"[[0.313726, 0.960784, 0.403922, 0.262745, 0.819608, 0.262745, 0.603922, 0.737255, 0.380392, 0.509804, 0.580392, 0.701961], [0.654902, 0.811765, 0.517647, 0.427451, 0.0235294, 0.541176, 0.32549, 0.235294, 0.835294, 0.0509804, 0.4, 0.976471], [0.180392, 0.411765, 0.14902, 0.160784, 0.980392, 0.545098, 0.486275, 0.807843, 0.0313726, 0.45098, 0.427451, 0.0745098], [0.0235294, 0.901961, 0.211765, 0.294118, 0.729412, 0.164706, 0.141176, 0.439216, 0.890196, 0.0745098, 0.427451, 0.862745], [0.321569, 0.368627, 0.772549, 0.568627, 0.388235, 0.109804, 0.027451, 0.0313726, 0.796079, 0.623529, 0.0509804, 0.32549], [0.796079, 0.560784, 0.419608, 0.929412, 0.101961, 0.254902, 0.341176, 0.533333, 0.984314, 0.482353, 0.0627451, 0.803922], [0.607843, 0.85098, 0.356863, 0.32549, 0.321569, 0.0, 0.478431, 0.435294, 0.431373, 0.709804, 0.690196, 0.294118], [0.678431, 0.607843, 0.439216, 0.223529, 0.0235294, 0.513726, 0.831373, 0.47451, 0.164706, 0.635294, 0.247059, 0.184314], [0.533333, 0.211765, 0.00392157, 0.305882, 0.388235, 0.517647, 0.831373, 0.937255, 0.329412, 0.219608, 0.286275, 0.964706], [0.92549, 0.203922, 0.882353, 0.141176, 0.439216, 0.552941, 0.74902, 0.878431, 0.776471, 0.772549, 0.384314, 0.603922], [0.513726, 0.52549, 0.54902, 0.815686, 0.392157, 0.352941, 0.635294, 0.933333, 0.533333, 0.203922, 0.439216, 0.466667], [0.976471, 0.756863, 0.0392157, 0.588235, 0.466667, 0.356863, 0.545098, 0.870588, 0.619608, 0.360784, 0.129412, 0.219608], [0.396078, 0.32549, 0.831373, 0.490196, 0.968628, 0.623529, 0.0470588, 0.384314, 0.545098, 0.14902, 0.639216, 0.886275], [0.862745, 0.341176, 0.921569, 0.117647, 0.909804, 0.847059, 0.321569, 0.784314, 0.984314, 0.760784, 0.333333, 0.729412], [0.101961, 0.0941177, 0.447059, 0.639216, 0.509804, 0.0980392, 0.447059, 0.0235294, 0.52549, 0.466667, 0.564706, 0.85098], [0.843137, 0.592157, 0.172549, 0.811765, 0.00784314, 0.419608, 0.0588235, 0.521569, 0.121569, 0.109804, 0.278431, 0.537255], [0.341176, 0.462745, 0.65098, 0.690196, 0.596078, 0.0196078, 0.529412, 0.858824, 0.00392157, 0.976471, 0.235294, 0.262745], [0.0745098, 0.611765, 0.45098, 0.654902, 0.807843, 0.776471, 0.0196078, 0.576471, 0.337255, 0.407843, 0.686275, 0.364706], [0.415686, 0.0784314, 0.992157, 0.133333, 0.513726, 0.168627, 0.545098, 0.666667, 0.329412, 0.521569, 0.207843, 0.815686], [0.835294, 0.415686, 0.427451, 0.729412, 0.141176, 0.533333, 0.823529, 0.592157, 0.168627, 0.329412, 0.690196, 0.611765], [0.254902, 0.113725, 0.152941, 0.188235, 0.392157, 0.760784, 0.0313726, 0.745098, 0.0823529, 0.352941, 0.745098, 0.564706], [0.807843, 0.823529, 0.541176, 0.180392, 0.568627, 0.513726, 0.937255, 0.611765, 0.0941177, 0.4, 0.964706, 0.639216], [0.243137, 0.709804, 0.231373, 0.913726, 0.72549, 0.764706, 0.121569, 0.733333, 0.0666667, 0.509804, 0.247059, 0.898039], [0.65098, 0.152941, 0.113725, 0.431373, 0.231373, 0.254902, 0.866667, 0.917647, 0.435294, 0.129412, 0.0117647, 0.698039], [0.137255, 0.431373, 0.686275, 0.184314, 0.266667, 0.0980392, 0.262745, 0.254902, 0.231373, 0.776471, 0.419608, 0.776471], [0.25098, 0.247059, 0.513726, 0.705882, 0.0509804, 0.756863, 0.411765, 0.282353, 0.666667, 0.137255, 0.0431373, 0.788235]]","[[0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1], [1, 0, 0]]",1


<a id="pp_val2"></a>
# 6.  Run preprocessor for validation image data

Run the preprocessor for the validation dataset. In this example, we use the same images for validation to demonstrate, but normally validation data is different than training data:

In [11]:
%%sql
DROP TABLE IF EXISTS val_image_data_packed, val_image_data_packed_summary;

SELECT madlib.validation_preprocessor_dl(
    'image_data',             -- Source table
    'val_image_data_packed',  -- Output table
    'species',                -- Dependent variable
    'rgb',                    -- Independent variable
    'image_data_packed',      -- From training preprocessor step
    NULL                      -- Buffer size
    );

SELECT * FROM val_image_data_packed ORDER BY buffer_id;

Done.
1 rows affected.
2 rows affected.


independent_var,dependent_var,buffer_id
"[[0.576471, 0.701961, 0.580392, 0.627451, 0.964706, 0.509804, 0.517647, 0.564706, 0.6, 0.152941, 0.690196, 0.215686], [0.258824, 0.478431, 0.772549, 0.105882, 0.152941, 0.345098, 0.803922, 0.729412, 0.972549, 0.764706, 0.235294, 0.482353], [0.72549, 0.682353, 0.109804, 0.105882, 0.796079, 0.368627, 0.584314, 0.564706, 0.47451, 0.733333, 0.909804, 0.27451], [0.152941, 0.870588, 0.623529, 0.917647, 0.384314, 0.345098, 0.596078, 0.494118, 0.45098, 0.388235, 0.862745, 0.0313726], [0.00392157, 0.901961, 0.160784, 0.654902, 0.184314, 0.313726, 0.521569, 0.807843, 0.227451, 0.905882, 0.152941, 0.823529], [0.843137, 0.85098, 0.972549, 0.92549, 0.227451, 0.980392, 0.823529, 0.388235, 0.631373, 0.00784314, 0.701961, 0.14902], [0.482353, 0.211765, 0.886275, 0.32549, 0.745098, 0.72549, 0.172549, 0.717647, 0.647059, 0.4, 0.694118, 0.466667], [0.00392157, 0.231373, 0.941177, 0.6, 0.364706, 0.419608, 0.811765, 0.243137, 0.745098, 0.552941, 0.968628, 0.913726], [0.145098, 0.203922, 0.878431, 0.258824, 0.858824, 0.882353, 0.490196, 0.796079, 0.478431, 0.854902, 0.215686, 0.286275], [0.0666667, 0.101961, 0.827451, 0.313726, 0.168627, 0.972549, 0.521569, 0.0431373, 0.227451, 0.376471, 0.929412, 0.717647], [0.113725, 0.647059, 0.00392157, 0.396078, 0.486275, 0.0705882, 0.494118, 0.309804, 0.384314, 0.666667, 0.278431, 0.905882], [0.380392, 0.866667, 0.529412, 0.760784, 0.541176, 0.647059, 0.407843, 0.54902, 0.0352941, 0.894118, 0.619608, 0.533333], [0.282353, 0.407843, 0.635294, 0.52549, 0.556863, 0.0117647, 0.384314, 0.0862745, 0.772549, 0.92549, 0.729412, 0.176471], [0.658824, 0.827451, 0.835294, 0.462745, 0.764706, 0.752941, 0.811765, 0.901961, 0.113725, 0.215686, 0.964706, 0.0235294], [0.835294, 0.690196, 0.639216, 0.227451, 0.372549, 0.294118, 0.0509804, 0.203922, 0.756863, 0.815686, 0.956863, 0.564706], [0.278431, 0.854902, 0.623529, 0.184314, 0.270588, 0.45098, 0.870588, 0.909804, 0.682353, 0.239216, 0.2, 0.733333], [0.219608, 0.933333, 0.223529, 0.145098, 0.443137, 0.505882, 1.0, 0.0627451, 0.690196, 0.266667, 0.513726, 0.556863], [0.635294, 0.337255, 0.419608, 0.607843, 0.780392, 0.639216, 0.541176, 0.00392157, 0.784314, 0.984314, 0.509804, 0.776471], [0.764706, 0.866667, 0.486275, 0.913726, 0.517647, 0.113725, 0.247059, 0.937255, 0.72549, 0.0235294, 0.572549, 0.258824], [0.254902, 0.792157, 0.87451, 0.396078, 0.192157, 0.635294, 0.254902, 0.67451, 0.545098, 0.772549, 0.788235, 0.792157], [0.580392, 0.282353, 0.713726, 0.596078, 0.239216, 0.968628, 0.388235, 0.109804, 0.360784, 0.576471, 0.745098, 0.615686], [0.72549, 0.603922, 0.207843, 0.631373, 0.733333, 0.792157, 0.913726, 0.443137, 0.384314, 0.14902, 0.407843, 0.772549], [0.478431, 0.988235, 0.188235, 0.796079, 0.0901961, 0.913726, 0.4, 0.298039, 0.545098, 0.12549, 0.0823529, 0.454902], [0.521569, 0.733333, 0.968628, 0.776471, 0.945098, 0.443137, 0.760784, 0.129412, 0.235294, 0.847059, 0.0392157, 0.635294], [0.145098, 0.678431, 0.517647, 0.0235294, 0.470588, 0.0392157, 0.756863, 0.435294, 0.815686, 0.698039, 0.882353, 0.572549], [0.407843, 0.607843, 0.152941, 0.913726, 0.972549, 0.298039, 0.588235, 0.486275, 0.321569, 0.054902, 0.52549, 0.0745098]]","[[0, 1, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1]]",0
"[[0.231373, 0.423529, 0.152941, 0.521569, 0.533333, 0.305882, 0.996078, 0.2, 0.662745, 0.0941177, 0.654902, 0.85098], [0.4, 0.0352941, 0.639216, 0.968628, 0.141176, 0.87451, 0.388235, 0.294118, 0.388235, 0.921569, 0.6, 0.384314], [0.831373, 0.635294, 0.0117647, 0.0470588, 0.831373, 0.411765, 0.0784314, 0.466667, 0.372549, 0.223529, 0.337255, 0.764706], [0.878431, 0.0392157, 0.694118, 0.356863, 0.364706, 0.52549, 0.996078, 0.372549, 0.568627, 0.823529, 0.784314, 0.65098], [0.333333, 0.764706, 0.862745, 0.611765, 0.223529, 0.737255, 0.647059, 0.917647, 0.0901961, 0.00784314, 0.439216, 0.0823529], [0.603922, 0.207843, 0.254902, 0.635294, 0.160784, 0.592157, 0.396078, 0.0156863, 0.196078, 0.619608, 0.752941, 0.843137], [0.933333, 0.0470588, 0.964706, 0.752941, 0.443137, 0.564706, 0.960784, 0.701961, 0.196078, 0.113725, 0.286275, 0.596078], [0.317647, 0.470588, 0.486275, 0.466667, 0.282353, 0.411765, 0.513726, 0.247059, 0.160784, 0.956863, 0.811765, 0.113725], [0.607843, 0.309804, 0.956863, 0.0705882, 0.0901961, 0.27451, 0.545098, 0.576471, 0.741176, 0.827451, 0.988235, 0.25098], [0.960784, 0.839216, 0.0588235, 0.870588, 0.101961, 0.666667, 0.176471, 0.054902, 0.737255, 0.266667, 0.329412, 0.278431], [0.403922, 0.564706, 0.384314, 0.690196, 0.658824, 0.341176, 0.521569, 0.717647, 0.207843, 0.623529, 0.380392, 0.380392], [0.501961, 0.278431, 0.635294, 0.215686, 0.45098, 0.0313726, 0.780392, 0.835294, 0.721569, 0.435294, 0.172549, 0.239216], [0.0, 0.192157, 0.478431, 0.905882, 0.901961, 0.980392, 0.180392, 0.533333, 0.192157, 0.631373, 0.564706, 0.976471], [0.168627, 0.858824, 0.027451, 0.972549, 0.458824, 0.556863, 0.407843, 0.494118, 0.721569, 0.784314, 0.219608, 0.4], [0.164706, 0.337255, 0.360784, 0.619608, 0.529412, 0.533333, 0.470588, 0.556863, 0.498039, 0.929412, 0.109804, 0.905882], [0.0666667, 0.780392, 0.67451, 0.0901961, 0.894118, 0.839216, 0.431373, 0.254902, 0.454902, 0.960784, 0.784314, 0.929412], [0.960784, 0.545098, 0.396078, 0.521569, 0.254902, 0.458824, 0.298039, 0.933333, 0.54902, 0.192157, 0.768628, 0.980392], [0.192157, 0.796079, 0.376471, 0.92549, 0.235294, 0.329412, 0.470588, 0.627451, 0.85098, 0.72549, 0.0823529, 0.14902], [0.192157, 0.0392157, 0.556863, 0.74902, 0.211765, 0.74902, 0.541176, 0.588235, 0.67451, 0.776471, 0.917647, 0.137255], [0.34902, 0.239216, 0.537255, 0.12549, 0.282353, 0.729412, 0.164706, 0.839216, 0.478431, 0.376471, 0.588235, 0.0156863], [0.509804, 0.815686, 0.270588, 0.768628, 0.843137, 0.623529, 0.00784314, 0.376471, 0.74902, 0.290196, 0.101961, 0.909804], [0.286275, 0.388235, 0.0352941, 0.0745098, 0.0862745, 0.545098, 0.890196, 0.360784, 0.309804, 0.733333, 0.984314, 0.317647], [0.768628, 0.345098, 0.00392157, 0.380392, 0.592157, 0.290196, 0.768628, 0.627451, 0.368627, 0.854902, 0.168627, 0.254902], [0.466667, 0.603922, 0.972549, 0.235294, 0.866667, 0.737255, 0.580392, 0.870588, 0.113725, 0.168627, 0.156863, 0.882353], [0.584314, 0.760784, 0.227451, 0.0313726, 0.32549, 0.694118, 0.639216, 0.294118, 0.929412, 0.498039, 0.027451, 0.505882], [0.113725, 0.419608, 0.862745, 0.74902, 0.560784, 0.443137, 0.509804, 0.788235, 0.478431, 0.831373, 0.478431, 0.109804]]","[[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0]]",1


<a id="change_buffer"></a>
# 7.  Change buffer size 

Generally the default buffer size will work well, but if you have occasion to change it:

In [12]:
%%sql
DROP TABLE IF EXISTS image_data_packed, image_data_packed_summary;

SELECT madlib.training_preprocessor_dl('image_data',         -- Source table
                                       'image_data_packed',  -- Output table
                                       'species',            -- Dependent variable
                                       'rgb',                -- Independent variable
                                        10,                   -- Buffer size
                                        255                   -- Normalizing constant
                                        );

SELECT * FROM image_data_packed ORDER BY buffer_id;

Done.
1 rows affected.
6 rows affected.


independent_var,dependent_var,buffer_id
"[[0.72549, 0.682353, 0.109804, 0.105882, 0.796079, 0.368627, 0.584314, 0.564706, 0.47451, 0.733333, 0.909804, 0.27451], [0.521569, 0.733333, 0.968628, 0.776471, 0.945098, 0.443137, 0.760784, 0.129412, 0.235294, 0.847059, 0.0392157, 0.635294], [0.380392, 0.866667, 0.529412, 0.760784, 0.541176, 0.647059, 0.407843, 0.54902, 0.0352941, 0.894118, 0.619608, 0.533333], [0.145098, 0.203922, 0.878431, 0.258824, 0.858824, 0.882353, 0.490196, 0.796079, 0.478431, 0.854902, 0.215686, 0.286275], [0.835294, 0.690196, 0.639216, 0.227451, 0.372549, 0.294118, 0.0509804, 0.203922, 0.756863, 0.815686, 0.956863, 0.564706], [0.635294, 0.337255, 0.419608, 0.607843, 0.780392, 0.639216, 0.541176, 0.00392157, 0.784314, 0.984314, 0.509804, 0.776471], [0.843137, 0.85098, 0.972549, 0.92549, 0.227451, 0.980392, 0.823529, 0.388235, 0.631373, 0.00784314, 0.701961, 0.14902], [0.580392, 0.282353, 0.713726, 0.596078, 0.239216, 0.968628, 0.388235, 0.109804, 0.360784, 0.576471, 0.745098, 0.615686]]","[[0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0]]",0
"[[0.192157, 0.0392157, 0.556863, 0.74902, 0.211765, 0.74902, 0.541176, 0.588235, 0.67451, 0.776471, 0.917647, 0.137255], [0.231373, 0.423529, 0.152941, 0.521569, 0.533333, 0.305882, 0.996078, 0.2, 0.662745, 0.0941177, 0.654902, 0.85098], [0.0, 0.192157, 0.478431, 0.905882, 0.901961, 0.980392, 0.180392, 0.533333, 0.192157, 0.631373, 0.564706, 0.976471], [0.0666667, 0.780392, 0.67451, 0.0901961, 0.894118, 0.839216, 0.431373, 0.254902, 0.454902, 0.960784, 0.784314, 0.929412], [0.286275, 0.388235, 0.0352941, 0.0745098, 0.0862745, 0.545098, 0.890196, 0.360784, 0.309804, 0.733333, 0.984314, 0.317647], [0.960784, 0.839216, 0.0588235, 0.870588, 0.101961, 0.666667, 0.176471, 0.054902, 0.737255, 0.266667, 0.329412, 0.278431], [0.933333, 0.0470588, 0.964706, 0.752941, 0.443137, 0.564706, 0.960784, 0.701961, 0.196078, 0.113725, 0.286275, 0.596078], [0.584314, 0.760784, 0.227451, 0.0313726, 0.32549, 0.694118, 0.639216, 0.294118, 0.929412, 0.498039, 0.027451, 0.505882], [0.878431, 0.0392157, 0.694118, 0.356863, 0.364706, 0.52549, 0.996078, 0.372549, 0.568627, 0.823529, 0.784314, 0.65098]]","[[0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0]]",1
"[[0.282353, 0.407843, 0.635294, 0.52549, 0.556863, 0.0117647, 0.384314, 0.0862745, 0.772549, 0.92549, 0.729412, 0.176471], [0.145098, 0.678431, 0.517647, 0.0235294, 0.470588, 0.0392157, 0.756863, 0.435294, 0.815686, 0.698039, 0.882353, 0.572549], [0.576471, 0.701961, 0.580392, 0.627451, 0.964706, 0.509804, 0.517647, 0.564706, 0.6, 0.152941, 0.690196, 0.215686], [0.152941, 0.870588, 0.623529, 0.917647, 0.384314, 0.345098, 0.596078, 0.494118, 0.45098, 0.388235, 0.862745, 0.0313726], [0.482353, 0.211765, 0.886275, 0.32549, 0.745098, 0.72549, 0.172549, 0.717647, 0.647059, 0.4, 0.694118, 0.466667], [0.0666667, 0.101961, 0.827451, 0.313726, 0.168627, 0.972549, 0.521569, 0.0431373, 0.227451, 0.376471, 0.929412, 0.717647], [0.764706, 0.866667, 0.486275, 0.913726, 0.517647, 0.113725, 0.247059, 0.937255, 0.72549, 0.0235294, 0.572549, 0.258824], [0.72549, 0.603922, 0.207843, 0.631373, 0.733333, 0.792157, 0.913726, 0.443137, 0.384314, 0.14902, 0.407843, 0.772549], [0.278431, 0.854902, 0.623529, 0.184314, 0.270588, 0.45098, 0.870588, 0.909804, 0.682353, 0.239216, 0.2, 0.733333]]","[[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1]]",2
"[[0.168627, 0.858824, 0.027451, 0.972549, 0.458824, 0.556863, 0.407843, 0.494118, 0.721569, 0.784314, 0.219608, 0.4], [0.113725, 0.419608, 0.862745, 0.74902, 0.560784, 0.443137, 0.509804, 0.788235, 0.478431, 0.831373, 0.478431, 0.109804], [0.960784, 0.545098, 0.396078, 0.521569, 0.254902, 0.458824, 0.298039, 0.933333, 0.54902, 0.192157, 0.768628, 0.980392], [0.403922, 0.564706, 0.384314, 0.690196, 0.658824, 0.341176, 0.521569, 0.717647, 0.207843, 0.623529, 0.380392, 0.380392], [0.317647, 0.470588, 0.486275, 0.466667, 0.282353, 0.411765, 0.513726, 0.247059, 0.160784, 0.956863, 0.811765, 0.113725], [0.34902, 0.239216, 0.537255, 0.12549, 0.282353, 0.729412, 0.164706, 0.839216, 0.478431, 0.376471, 0.588235, 0.0156863], [0.768628, 0.345098, 0.00392157, 0.380392, 0.592157, 0.290196, 0.768628, 0.627451, 0.368627, 0.854902, 0.168627, 0.254902], [0.333333, 0.764706, 0.862745, 0.611765, 0.223529, 0.737255, 0.647059, 0.917647, 0.0901961, 0.00784314, 0.439216, 0.0823529], [0.4, 0.0352941, 0.639216, 0.968628, 0.141176, 0.87451, 0.388235, 0.294118, 0.388235, 0.921569, 0.6, 0.384314]]","[[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0]]",3
"[[0.658824, 0.827451, 0.835294, 0.462745, 0.764706, 0.752941, 0.811765, 0.901961, 0.113725, 0.215686, 0.964706, 0.0235294], [0.00392157, 0.901961, 0.160784, 0.654902, 0.184314, 0.313726, 0.521569, 0.807843, 0.227451, 0.905882, 0.152941, 0.823529], [0.113725, 0.647059, 0.00392157, 0.396078, 0.486275, 0.0705882, 0.494118, 0.309804, 0.384314, 0.666667, 0.278431, 0.905882], [0.478431, 0.988235, 0.188235, 0.796079, 0.0901961, 0.913726, 0.4, 0.298039, 0.545098, 0.12549, 0.0823529, 0.454902], [0.00392157, 0.231373, 0.941177, 0.6, 0.364706, 0.419608, 0.811765, 0.243137, 0.745098, 0.552941, 0.968628, 0.913726], [0.258824, 0.478431, 0.772549, 0.105882, 0.152941, 0.345098, 0.803922, 0.729412, 0.972549, 0.764706, 0.235294, 0.482353], [0.219608, 0.933333, 0.223529, 0.145098, 0.443137, 0.505882, 1.0, 0.0627451, 0.690196, 0.266667, 0.513726, 0.556863], [0.254902, 0.792157, 0.87451, 0.396078, 0.192157, 0.635294, 0.254902, 0.67451, 0.545098, 0.772549, 0.788235, 0.792157], [0.407843, 0.607843, 0.152941, 0.913726, 0.972549, 0.298039, 0.588235, 0.486275, 0.321569, 0.054902, 0.52549, 0.0745098]]","[[0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]]",4
"[[0.466667, 0.603922, 0.972549, 0.235294, 0.866667, 0.737255, 0.580392, 0.870588, 0.113725, 0.168627, 0.156863, 0.882353], [0.164706, 0.337255, 0.360784, 0.619608, 0.529412, 0.533333, 0.470588, 0.556863, 0.498039, 0.929412, 0.109804, 0.905882], [0.192157, 0.796079, 0.376471, 0.92549, 0.235294, 0.329412, 0.470588, 0.627451, 0.85098, 0.72549, 0.0823529, 0.14902], [0.509804, 0.815686, 0.270588, 0.768628, 0.843137, 0.623529, 0.00784314, 0.376471, 0.74902, 0.290196, 0.101961, 0.909804], [0.607843, 0.309804, 0.956863, 0.0705882, 0.0901961, 0.27451, 0.545098, 0.576471, 0.741176, 0.827451, 0.988235, 0.25098], [0.603922, 0.207843, 0.254902, 0.635294, 0.160784, 0.592157, 0.396078, 0.0156863, 0.196078, 0.619608, 0.752941, 0.843137], [0.831373, 0.635294, 0.0117647, 0.0470588, 0.831373, 0.411765, 0.0784314, 0.466667, 0.372549, 0.223529, 0.337255, 0.764706], [0.501961, 0.278431, 0.635294, 0.215686, 0.45098, 0.0313726, 0.780392, 0.835294, 0.721569, 0.435294, 0.172549, 0.239216]]","[[1, 0, 0], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 1, 0]]",5


Review the output summary data:

In [13]:
%%sql
SELECT * FROM image_data_packed_summary;

1 rows affected.


source_table,output_table,dependent_varname,independent_varname,dependent_vartype,class_values,buffer_size,normalizing_const,num_classes
image_data,image_data_packed,species,rgb,text,"[u'bird', u'cat', u'dog']",10,255.0,3


<a id="set_num_classes"></a>
# 8. Setting number of classes

If want the 1-hot encoded vector to have more classes than present in the data, use the num_classes param 

In [14]:
%%sql
DROP TABLE IF EXISTS image_data_packed, image_data_packed_summary;

SELECT madlib.training_preprocessor_dl('image_data',         -- Source table
                                        'image_data_packed',  -- Output table
                                        'species',            -- Dependent variable
                                        'rgb',                -- Independent variable
                                        NULL,                 -- Buffer size
                                        255,                  -- Normalizing constant
                                        5                     -- Number of desired class values
                                        );

SELECT * FROM image_data_packed ORDER BY buffer_id;

Done.
1 rows affected.
2 rows affected.


independent_var,dependent_var,buffer_id
"[[0.258824, 0.478431, 0.772549, 0.105882, 0.152941, 0.345098, 0.803922, 0.729412, 0.972549, 0.764706, 0.235294, 0.482353], [0.482353, 0.211765, 0.886275, 0.32549, 0.745098, 0.72549, 0.172549, 0.717647, 0.647059, 0.4, 0.694118, 0.466667], [0.72549, 0.603922, 0.207843, 0.631373, 0.733333, 0.792157, 0.913726, 0.443137, 0.384314, 0.14902, 0.407843, 0.772549], [0.580392, 0.282353, 0.713726, 0.596078, 0.239216, 0.968628, 0.388235, 0.109804, 0.360784, 0.576471, 0.745098, 0.615686], [0.152941, 0.870588, 0.623529, 0.917647, 0.384314, 0.345098, 0.596078, 0.494118, 0.45098, 0.388235, 0.862745, 0.0313726], [0.835294, 0.690196, 0.639216, 0.227451, 0.372549, 0.294118, 0.0509804, 0.203922, 0.756863, 0.815686, 0.956863, 0.564706], [0.72549, 0.682353, 0.109804, 0.105882, 0.796079, 0.368627, 0.584314, 0.564706, 0.47451, 0.733333, 0.909804, 0.27451], [0.764706, 0.866667, 0.486275, 0.913726, 0.517647, 0.113725, 0.247059, 0.937255, 0.72549, 0.0235294, 0.572549, 0.258824], [0.478431, 0.988235, 0.188235, 0.796079, 0.0901961, 0.913726, 0.4, 0.298039, 0.545098, 0.12549, 0.0823529, 0.454902], [0.145098, 0.678431, 0.517647, 0.0235294, 0.470588, 0.0392157, 0.756863, 0.435294, 0.815686, 0.698039, 0.882353, 0.572549], [0.380392, 0.866667, 0.529412, 0.760784, 0.541176, 0.647059, 0.407843, 0.54902, 0.0352941, 0.894118, 0.619608, 0.533333], [0.843137, 0.85098, 0.972549, 0.92549, 0.227451, 0.980392, 0.823529, 0.388235, 0.631373, 0.00784314, 0.701961, 0.14902], [0.407843, 0.607843, 0.152941, 0.913726, 0.972549, 0.298039, 0.588235, 0.486275, 0.321569, 0.054902, 0.52549, 0.0745098], [0.0666667, 0.101961, 0.827451, 0.313726, 0.168627, 0.972549, 0.521569, 0.0431373, 0.227451, 0.376471, 0.929412, 0.717647], [0.576471, 0.701961, 0.580392, 0.627451, 0.964706, 0.509804, 0.517647, 0.564706, 0.6, 0.152941, 0.690196, 0.215686], [0.635294, 0.337255, 0.419608, 0.607843, 0.780392, 0.639216, 0.541176, 0.00392157, 0.784314, 0.984314, 0.509804, 0.776471], [0.00392157, 0.231373, 0.941177, 0.6, 0.364706, 0.419608, 0.811765, 0.243137, 0.745098, 0.552941, 0.968628, 0.913726], [0.278431, 0.854902, 0.623529, 0.184314, 0.270588, 0.45098, 0.870588, 0.909804, 0.682353, 0.239216, 0.2, 0.733333], [0.00392157, 0.901961, 0.160784, 0.654902, 0.184314, 0.313726, 0.521569, 0.807843, 0.227451, 0.905882, 0.152941, 0.823529], [0.658824, 0.827451, 0.835294, 0.462745, 0.764706, 0.752941, 0.811765, 0.901961, 0.113725, 0.215686, 0.964706, 0.0235294], [0.282353, 0.407843, 0.635294, 0.52549, 0.556863, 0.0117647, 0.384314, 0.0862745, 0.772549, 0.92549, 0.729412, 0.176471], [0.219608, 0.933333, 0.223529, 0.145098, 0.443137, 0.505882, 1.0, 0.0627451, 0.690196, 0.266667, 0.513726, 0.556863], [0.521569, 0.733333, 0.968628, 0.776471, 0.945098, 0.443137, 0.760784, 0.129412, 0.235294, 0.847059, 0.0392157, 0.635294], [0.145098, 0.203922, 0.878431, 0.258824, 0.858824, 0.882353, 0.490196, 0.796079, 0.478431, 0.854902, 0.215686, 0.286275], [0.113725, 0.647059, 0.00392157, 0.396078, 0.486275, 0.0705882, 0.494118, 0.309804, 0.384314, 0.666667, 0.278431, 0.905882], [0.254902, 0.792157, 0.87451, 0.396078, 0.192157, 0.635294, 0.254902, 0.67451, 0.545098, 0.772549, 0.788235, 0.792157]]","[[0, 1, 0, 0, 0], [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0]]",0
"[[0.509804, 0.815686, 0.270588, 0.768628, 0.843137, 0.623529, 0.00784314, 0.376471, 0.74902, 0.290196, 0.101961, 0.909804], [0.831373, 0.635294, 0.0117647, 0.0470588, 0.831373, 0.411765, 0.0784314, 0.466667, 0.372549, 0.223529, 0.337255, 0.764706], [0.192157, 0.0392157, 0.556863, 0.74902, 0.211765, 0.74902, 0.541176, 0.588235, 0.67451, 0.776471, 0.917647, 0.137255], [0.164706, 0.337255, 0.360784, 0.619608, 0.529412, 0.533333, 0.470588, 0.556863, 0.498039, 0.929412, 0.109804, 0.905882], [0.933333, 0.0470588, 0.964706, 0.752941, 0.443137, 0.564706, 0.960784, 0.701961, 0.196078, 0.113725, 0.286275, 0.596078], [0.768628, 0.345098, 0.00392157, 0.380392, 0.592157, 0.290196, 0.768628, 0.627451, 0.368627, 0.854902, 0.168627, 0.254902], [0.584314, 0.760784, 0.227451, 0.0313726, 0.32549, 0.694118, 0.639216, 0.294118, 0.929412, 0.498039, 0.027451, 0.505882], [0.403922, 0.564706, 0.384314, 0.690196, 0.658824, 0.341176, 0.521569, 0.717647, 0.207843, 0.623529, 0.380392, 0.380392], [0.4, 0.0352941, 0.639216, 0.968628, 0.141176, 0.87451, 0.388235, 0.294118, 0.388235, 0.921569, 0.6, 0.384314], [0.603922, 0.207843, 0.254902, 0.635294, 0.160784, 0.592157, 0.396078, 0.0156863, 0.196078, 0.619608, 0.752941, 0.843137], [0.501961, 0.278431, 0.635294, 0.215686, 0.45098, 0.0313726, 0.780392, 0.835294, 0.721569, 0.435294, 0.172549, 0.239216], [0.878431, 0.0392157, 0.694118, 0.356863, 0.364706, 0.52549, 0.996078, 0.372549, 0.568627, 0.823529, 0.784314, 0.65098], [0.286275, 0.388235, 0.0352941, 0.0745098, 0.0862745, 0.545098, 0.890196, 0.360784, 0.309804, 0.733333, 0.984314, 0.317647], [0.960784, 0.839216, 0.0588235, 0.870588, 0.101961, 0.666667, 0.176471, 0.054902, 0.737255, 0.266667, 0.329412, 0.278431], [0.113725, 0.419608, 0.862745, 0.74902, 0.560784, 0.443137, 0.509804, 0.788235, 0.478431, 0.831373, 0.478431, 0.109804], [0.0666667, 0.780392, 0.67451, 0.0901961, 0.894118, 0.839216, 0.431373, 0.254902, 0.454902, 0.960784, 0.784314, 0.929412], [0.960784, 0.545098, 0.396078, 0.521569, 0.254902, 0.458824, 0.298039, 0.933333, 0.54902, 0.192157, 0.768628, 0.980392], [0.231373, 0.423529, 0.152941, 0.521569, 0.533333, 0.305882, 0.996078, 0.2, 0.662745, 0.0941177, 0.654902, 0.85098], [0.0, 0.192157, 0.478431, 0.905882, 0.901961, 0.980392, 0.180392, 0.533333, 0.192157, 0.631373, 0.564706, 0.976471], [0.168627, 0.858824, 0.027451, 0.972549, 0.458824, 0.556863, 0.407843, 0.494118, 0.721569, 0.784314, 0.219608, 0.4], [0.34902, 0.239216, 0.537255, 0.12549, 0.282353, 0.729412, 0.164706, 0.839216, 0.478431, 0.376471, 0.588235, 0.0156863], [0.192157, 0.796079, 0.376471, 0.92549, 0.235294, 0.329412, 0.470588, 0.627451, 0.85098, 0.72549, 0.0823529, 0.14902], [0.607843, 0.309804, 0.956863, 0.0705882, 0.0901961, 0.27451, 0.545098, 0.576471, 0.741176, 0.827451, 0.988235, 0.25098], [0.466667, 0.603922, 0.972549, 0.235294, 0.866667, 0.737255, 0.580392, 0.870588, 0.113725, 0.168627, 0.156863, 0.882353], [0.317647, 0.470588, 0.486275, 0.466667, 0.282353, 0.411765, 0.513726, 0.247059, 0.160784, 0.956863, 0.811765, 0.113725], [0.333333, 0.764706, 0.862745, 0.611765, 0.223529, 0.737255, 0.647059, 0.917647, 0.0901961, 0.00784314, 0.439216, 0.0823529]]","[[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0]]",1


In [16]:
%%sql
SELECT * FROM image_data_packed_summary;

1 rows affected.


source_table,output_table,dependent_varname,independent_varname,dependent_vartype,class_values,buffer_size,normalizing_const,num_classes
image_data,image_data_packed,species,rgb,text,"[u'bird', u'cat', u'dog', None, None]",26,255.0,5
