From 47eefc1c91a33db0a788c2c145b8016b892de5ad Mon Sep 17 00:00:00 2001 From: Nandish Jayaram Date: Wed, 4 Apr 2018 14:42:38 -0700 Subject: [PATCH] MLP: Fix bug in array dep var for regression MLP training for regression fails if the dependent var type is an array. This is because the variable dependent_varname was not updated to reflect the new column created in the standardized table. This commit fixes that issue. Co-authored-by: Nikhil Kak --- src/ports/postgres/modules/convex/mlp_igd.py_in | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in index 80105792e..2799355a2 100644 --- a/src/ports/postgres/modules/convex/mlp_igd.py_in +++ b/src/ports/postgres/modules/convex/mlp_igd.py_in @@ -155,21 +155,21 @@ def mlp(schema_madlib, source_table, output_table, independent_varname, normalize_data(locals()) dependent_vartype = get_expr_type(dependent_varname, source_table) + # We are now using tbl_data_scaled, so change the dependent + # varname accordingly. + dependent_varname = col_dep_var_norm_new if is_classification: # If dependent variable is an array during classification, assume # that it is already one-hot-encoded. if "[]" in dependent_vartype: - # We are now using tbl_data_scaled, so change the dependent - # varname accordingly. - dependent_varname = col_dep_var_norm_new num_output_nodes = get_col_dimension(tbl_data_scaled, dependent_varname) else: labels = plpy.execute("SELECT DISTINCT {0} FROM {1}". - format(dependent_varname, source_table)) + format(dependent_varname_backup, source_table)) num_output_nodes = len(labels) for label_obj in labels: - label = _format_label(label_obj[dependent_varname]) + label = _format_label(label_obj[dependent_varname_backup]) classes.append(label) classes.sort() level_vals_str = ','.join(["{0}={1}".format(