Merge pull request #41 from bjherger/auto_loss

Auto loss
bjherger · Jul 27, 2018 · 0ea1378 · 0ea1378
2 parents 874e380 + 0d6c717
commit 0ea1378
Show file tree

Hide file tree

Showing 10 changed files with 58 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -64,7 +64,7 @@ x = Dense(32)(x)
 x = auto.output_nub(x)
 
 model = Model(inputs=auto.input_layers, outputs=x)
-model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
+model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])
 
 # Train model
 model.fit(X, y, epochs=4, validation_split=.2)

diff --git a/examples/__init__.py b/examples/__init__.py
diff --git a/examples/mushrooms.py b/examples/mushrooms.py
@@ -22,7 +22,7 @@ def main():
     x = auto.output_nub(x)
 
     model = Model(inputs=auto.input_layers, outputs=x)
-    model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
+    model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])
 
     # Train model
     model.fit(X, y, epochs=10, validation_split=.5)

diff --git a/examples/titanic.py b/examples/titanic.py
@@ -33,7 +33,7 @@ def main():
     x = auto.output_nub(x)
 
     model = Model(inputs=auto.input_layers, outputs=x)
-    model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
+    model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])
 
     # Train model
     model.fit(X, y, epochs=4, validation_split=.2)

diff --git a/keras_pandas/Automater.py b/keras_pandas/Automater.py
@@ -37,13 +37,17 @@ def __init__(self, numerical_vars=list(), categorical_vars=list(), boolean_vars=
         # Create input variable type handler
         self.input_nub_type_handlers = constants.default_input_nub_type_handlers
 
+        # Initialize list of variables fed into Keras nubs
+        self.keras_input_variable_list = list()
+
         # Initialize Keras nubs & layers
         self.input_layers = None
         self.input_nub = None
         self.output_nub = None
 
-        # Initialize list of variables fed into Keras nubs
-        self.keras_input_variable_list = list()
+        # Initialize suggested Keras loss
+        self.loss = None
+
 
     def fit(self, input_dataframe):
         """
@@ -89,6 +93,10 @@ def fit(self, input_dataframe):
             self.output_nub = self._create_output_nub(self._variable_type_dict, output_variables_df=output_variables_df,
                                                       y=self.response_var)
 
+        # Initialize & set suggested loss
+        if self.response_var is not None:
+            self.loss = self._suggest_loss(self._variable_type_dict, y=self.response_var)
+
         # Set self.fitted to True
         self.fitted = True
 
@@ -309,17 +317,11 @@ def _create_output_nub(self, variable_type_dict, output_variables_df, y):
         """
         logging.info('Creating output nub, for variable: {}'.format(y))
 
-        # Find which variable type for response variable
+        # Find response variable's variable type
         response_variable_types = lib.get_variable_type(y, variable_type_dict, self.response_var)
-
-        logging.info('Found response variable type(s)'.format(response_variable_types))
-        if len(response_variable_types) < 1:
-            raise ValueError('Response variable: {} is not in provided variable type lists'.format(y))
-        elif len(response_variable_types) > 1:
-            raise ValueError('Response variable: {} appears in more than one provided variable type lists'.format(
-                y))
-
         response_variable_type = response_variable_types[0]
+        logging.info('Found response variable type'.format(response_variable_type))
+
 
         if response_variable_type == 'numerical_vars':
             # Create Dense layer w/ single node
@@ -377,3 +379,23 @@ def _create_mappers(self, variable_type_dict):
         output_mapper = DataFrameMapper(output_transformation_list, df_out=True)
 
         return input_mapper, output_mapper
+
+    def _suggest_loss(self, variable_type_dict, y):
+        # Find response variable's variable type
+        logging.info('Finding suggested loss, for variable: {}'.format(y))
+
+        # Find response variable's variable type
+        response_variable_types = lib.get_variable_type(y, variable_type_dict, self.response_var)
+        response_variable_type = response_variable_types[0]
+        logging.info('Found response variable type'.format(response_variable_type))
+
+        # TODO Look up suggested loss
+        if response_variable_type in constants.default_suggested_losses:
+            suggested_loss = constants.default_suggested_losses[response_variable_type]
+            logging.info('Suggesting loss: {}'.format(suggested_loss))
+        else:
+            raise ValueError('No default loss for variable {}, '
+                             'with variable_type: {}'.format(y, response_variable_type))
+
+        return suggested_loss
+
diff --git a/keras_pandas/constants.py b/keras_pandas/constants.py
@@ -3,6 +3,7 @@
 
 import keras
 import numpy
+from keras import losses
 from keras.layers import Embedding, Flatten, Bidirectional, LSTM
 from sklearn.preprocessing import Imputer, StandardScaler, LabelEncoder
 
@@ -18,6 +19,12 @@
     'non_transformed_vars': []
 })
 
+default_suggested_losses = {
+    'numerical_vars': losses.mean_squared_error,
+    'categorical_vars': losses.sparse_categorical_crossentropy,
+    'boolean_vars': losses.sparse_categorical_crossentropy
+}
+
 
 def input_nub_numeric_handler(variable, input_dataframe):
     # Get transformed data for shaping

diff --git a/tests/testcategorical.py b/tests/testcategorical.py
@@ -131,7 +131,7 @@ def test_numerical_whole(self):
         x = output_nub(x)
 
         model = Model(inputs=auto.input_layers, outputs=x)
-        model.compile(optimizer='Adam', loss=losses.sparse_categorical_crossentropy)
+        model.compile(optimizer='Adam', loss=auto.loss)
 
         # Train DL model
         model.fit(X_train, y_train)

diff --git a/tests/testexamples.py b/tests/testexamples.py
@@ -0,0 +1,11 @@
+from examples import mushrooms, titanic
+from tests.testbase import TestBase
+
+
+class TestExamples(TestBase):
+
+    def test_mushrooms(self):
+        mushrooms.main()
+
+    def test_titanic(self):
+        titanic.main()
diff --git a/tests/testnumeric.py b/tests/testnumeric.py
@@ -1,7 +1,6 @@
 import logging
-import unittest
 
-from keras import Model, losses
+from keras import Model
 from keras.layers import Dense
 
 from keras_pandas import lib
@@ -97,7 +96,7 @@ def test_numerical_whole(self):
         x = output_nub(x)
 
         model = Model(inputs=auto.input_layers, outputs=x)
-        model.compile(optimizer='Adam', loss=losses.mean_squared_error)
+        model.compile(optimizer='Adam', loss=auto.loss)
 
         # Train DL model
         model.fit(X_train, y_train)

diff --git a/tests/testtext.py b/tests/testtext.py
@@ -87,7 +87,7 @@ def test_whole(self):
         x = auto.output_nub(x)
 
         model = Model(inputs=auto.input_layers, outputs=x)
-        model.compile(optimizer='Adam', loss=losses.sparse_categorical_crossentropy)
+        model.compile(optimizer='Adam', loss=auto.loss)
 
         # Train DL model
         model.fit(X_train, y_train)