Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/mainpage.dox.in
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ Interface and implementation are subject to change.
@details A collection of modules for deep learning.
@{
@defgroup grp_keras_model_arch Load Model Architecture
@defgroup grp_minibatch_preprocessing_dl Mini-Batch Preprocessor for Images
@defgroup grp_input_preprocessor_dl Preprocessor for Images
@}
@defgroup grp_bayes Naive Bayes Classification
@defgroup grp_sample Random Sampling
Expand Down
619 changes: 619 additions & 0 deletions src/ports/postgres/modules/deep_learning/input_data_preprocessor.py_in

Large diffs are not rendered by default.

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,48 @@ def expand_input_dims(input_data, target_type=None):
input_data = input_data.astype(target_type)
return input_data

def strip_trailing_nulls_from_class_values(class_values):
"""
class_values is a list of unique class levels in training data. This
could have multiple Nones in it, and this function strips out all the
Nones that occur after the first element in the list.
Examples:
1) input class_values = ['cat', 'dog']
output class_values = ['cat', 'dog']

2) input class_values = [None, 'cat', 'dog']
output class_values = [None, 'cat', 'dog']

3) input class_values = [None, 'cat', 'dog', None, None]
output class_values = [None, 'cat', 'dog']

4) input class_values = ['cat', 'dog', None, None]
output class_values = ['cat', 'dog']

5) input class_values = [None, None]
output class_values = [None]
@args:
@param: class_values, list
@returns:
updated class_values list
"""
num_of_valid_class_values = 0
if class_values is not None:
for ele in class_values:
if ele is None and num_of_valid_class_values > 0:
break
num_of_valid_class_values += 1
# Pass only the valid class_values for creating columns
class_values = class_values[:num_of_valid_class_values]
return class_values

# Name of columns in model summary table.
CLASS_VALUES_COLNAME = "class_values"
NORMALIZING_CONST_COLNAME = "normalizing_const"
COMPILE_PARAMS_COLNAME = "compile_params"
DEPENDENT_VARNAME_COLNAME = "dependent_varname"
DEPENDENT_VARTYPE_COLNAME = "dependent_vartype"
INDEPENDENT_VARNAME_COLNAME = "independent_varname"
MODEL_ARCH_TABLE_COLNAME = "model_arch_table"
MODEL_ARCH_ID_COLNAME = "model_arch_id"
MODEL_DATA_COLNAME = "model_data"
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import numpy as np

from madlib_keras_helper import expand_input_dims
from madlib_keras_helper import MODEL_DATA_COLNAME
from madlib_keras_helper import strip_trailing_nulls_from_class_values
from madlib_keras_validator import PredictInputValidator
from madlib_keras_wrapper import get_device_name_and_set_cuda_env
from madlib_keras_wrapper import set_model_weights
Expand All @@ -45,41 +46,6 @@ import madlib_keras_serializer

MODULE_NAME = 'madlib_keras_predict'

def _strip_trailing_nulls_from_class_values(class_values):
"""
class_values is a list of unique class levels in training data. This
could have multiple Nones in it, and this function strips out all the
Nones that occur after the first element in the list.
Examples:
1) input class_values = ['cat', 'dog']
output class_values = ['cat', 'dog']

2) input class_values = [None, 'cat', 'dog']
output class_values = [None, 'cat', 'dog']

3) input class_values = [None, 'cat', 'dog', None, None]
output class_values = [None, 'cat', 'dog']

4) input class_values = ['cat', 'dog', None, None]
output class_values = ['cat', 'dog']

5) input class_values = [None, None]
output class_values = [None]
@args:
@param: class_values, list
@returns:
updated class_values list
"""
num_of_valid_class_values = 0
if class_values is not None:
for ele in class_values:
if ele is None and num_of_valid_class_values > 0:
break
num_of_valid_class_values += 1
# Pass only the valid class_values for creating columns
class_values = class_values[:num_of_valid_class_values]
return class_values

def predict(schema_madlib, model_table, test_table, id_col,
independent_varname, output_table, pred_type, use_gpu, **kwargs):
if not pred_type:
Expand Down Expand Up @@ -108,7 +74,7 @@ def predict(schema_madlib, model_table, test_table, id_col,
pred_col_name = "prob"
pred_col_type = 'double precision'

class_values = _strip_trailing_nulls_from_class_values(class_values)
class_values = strip_trailing_nulls_from_class_values(class_values)

prediction_select_clause = create_cols_from_array_sql_string(
class_values, intermediate_col, pred_col_name,
Expand Down
Loading