From 47eefc1c91a33db0a788c2c145b8016b892de5ad Mon Sep 17 00:00:00 2001
From: Nandish Jayaram <njayaram@apache.org>
Date: Wed, 4 Apr 2018 14:42:38 -0700
Subject: [PATCH] MLP: Fix bug in array dep var for regression

MLP training for regression fails if the dependent var type is an array.
This is because the variable dependent_varname was not updated to
reflect the new column created in the standardized table. This commit
fixes that issue.

Co-authored-by: Nikhil Kak <nkak@pivotal.io>
---
 src/ports/postgres/modules/convex/mlp_igd.py_in | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 80105792e..2799355a2 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -155,21 +155,21 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
         normalize_data(locals())
         dependent_vartype = get_expr_type(dependent_varname, source_table)
 
+        # We are now using tbl_data_scaled, so change the dependent
+        # varname accordingly.
+        dependent_varname = col_dep_var_norm_new
         if is_classification:
             # If dependent variable is an array during classification, assume
             # that it is already one-hot-encoded.
             if "[]" in dependent_vartype:
-                # We are now using tbl_data_scaled, so change the dependent
-                # varname accordingly.
-                dependent_varname = col_dep_var_norm_new
                 num_output_nodes = get_col_dimension(tbl_data_scaled,
                                                      dependent_varname)
             else:
                 labels = plpy.execute("SELECT DISTINCT {0} FROM {1}".
-                                      format(dependent_varname, source_table))
+                                      format(dependent_varname_backup, source_table))
                 num_output_nodes = len(labels)
                 for label_obj in labels:
-                    label = _format_label(label_obj[dependent_varname])
+                    label = _format_label(label_obj[dependent_varname_backup])
                     classes.append(label)
                 classes.sort()
                 level_vals_str = ','.join(["{0}={1}".format(