Skip to content

Commit

Permalink
Merge pull request #41 from bjherger/auto_loss
Browse files Browse the repository at this point in the history
Auto loss
  • Loading branch information
bjherger committed Jul 27, 2018
2 parents 874e380 + 0d6c717 commit 0ea1378
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 19 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ x = Dense(32)(x)
x = auto.output_nub(x)

model = Model(inputs=auto.input_layers, outputs=x)
model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])

# Train model
model.fit(X, y, epochs=4, validation_split=.2)
Expand Down
Empty file added examples/__init__.py
Empty file.
2 changes: 1 addition & 1 deletion examples/mushrooms.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def main():
x = auto.output_nub(x)

model = Model(inputs=auto.input_layers, outputs=x)
model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])

# Train model
model.fit(X, y, epochs=10, validation_split=.5)
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
x = auto.output_nub(x)

model = Model(inputs=auto.input_layers, outputs=x)
model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])

# Train model
model.fit(X, y, epochs=4, validation_split=.2)
Expand Down
44 changes: 33 additions & 11 deletions keras_pandas/Automater.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,17 @@ def __init__(self, numerical_vars=list(), categorical_vars=list(), boolean_vars=
# Create input variable type handler
self.input_nub_type_handlers = constants.default_input_nub_type_handlers

# Initialize list of variables fed into Keras nubs
self.keras_input_variable_list = list()

# Initialize Keras nubs & layers
self.input_layers = None
self.input_nub = None
self.output_nub = None

# Initialize list of variables fed into Keras nubs
self.keras_input_variable_list = list()
# Initialize suggested Keras loss
self.loss = None


def fit(self, input_dataframe):
"""
Expand Down Expand Up @@ -89,6 +93,10 @@ def fit(self, input_dataframe):
self.output_nub = self._create_output_nub(self._variable_type_dict, output_variables_df=output_variables_df,
y=self.response_var)

# Initialize & set suggested loss
if self.response_var is not None:
self.loss = self._suggest_loss(self._variable_type_dict, y=self.response_var)

# Set self.fitted to True
self.fitted = True

Expand Down Expand Up @@ -309,17 +317,11 @@ def _create_output_nub(self, variable_type_dict, output_variables_df, y):
"""
logging.info('Creating output nub, for variable: {}'.format(y))

# Find which variable type for response variable
# Find response variable's variable type
response_variable_types = lib.get_variable_type(y, variable_type_dict, self.response_var)

logging.info('Found response variable type(s)'.format(response_variable_types))
if len(response_variable_types) < 1:
raise ValueError('Response variable: {} is not in provided variable type lists'.format(y))
elif len(response_variable_types) > 1:
raise ValueError('Response variable: {} appears in more than one provided variable type lists'.format(
y))

response_variable_type = response_variable_types[0]
logging.info('Found response variable type'.format(response_variable_type))


if response_variable_type == 'numerical_vars':
# Create Dense layer w/ single node
Expand Down Expand Up @@ -377,3 +379,23 @@ def _create_mappers(self, variable_type_dict):
output_mapper = DataFrameMapper(output_transformation_list, df_out=True)

return input_mapper, output_mapper

def _suggest_loss(self, variable_type_dict, y):
# Find response variable's variable type
logging.info('Finding suggested loss, for variable: {}'.format(y))

# Find response variable's variable type
response_variable_types = lib.get_variable_type(y, variable_type_dict, self.response_var)
response_variable_type = response_variable_types[0]
logging.info('Found response variable type'.format(response_variable_type))

# TODO Look up suggested loss
if response_variable_type in constants.default_suggested_losses:
suggested_loss = constants.default_suggested_losses[response_variable_type]
logging.info('Suggesting loss: {}'.format(suggested_loss))
else:
raise ValueError('No default loss for variable {}, '
'with variable_type: {}'.format(y, response_variable_type))

return suggested_loss

7 changes: 7 additions & 0 deletions keras_pandas/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import keras
import numpy
from keras import losses
from keras.layers import Embedding, Flatten, Bidirectional, LSTM
from sklearn.preprocessing import Imputer, StandardScaler, LabelEncoder

Expand All @@ -18,6 +19,12 @@
'non_transformed_vars': []
})

default_suggested_losses = {
'numerical_vars': losses.mean_squared_error,
'categorical_vars': losses.sparse_categorical_crossentropy,
'boolean_vars': losses.sparse_categorical_crossentropy
}


def input_nub_numeric_handler(variable, input_dataframe):
# Get transformed data for shaping
Expand Down
2 changes: 1 addition & 1 deletion tests/testcategorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_numerical_whole(self):
x = output_nub(x)

model = Model(inputs=auto.input_layers, outputs=x)
model.compile(optimizer='Adam', loss=losses.sparse_categorical_crossentropy)
model.compile(optimizer='Adam', loss=auto.loss)

# Train DL model
model.fit(X_train, y_train)
Expand Down
11 changes: 11 additions & 0 deletions tests/testexamples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from examples import mushrooms, titanic
from tests.testbase import TestBase


class TestExamples(TestBase):

def test_mushrooms(self):
mushrooms.main()

def test_titanic(self):
titanic.main()
5 changes: 2 additions & 3 deletions tests/testnumeric.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import unittest

from keras import Model, losses
from keras import Model
from keras.layers import Dense

from keras_pandas import lib
Expand Down Expand Up @@ -97,7 +96,7 @@ def test_numerical_whole(self):
x = output_nub(x)

model = Model(inputs=auto.input_layers, outputs=x)
model.compile(optimizer='Adam', loss=losses.mean_squared_error)
model.compile(optimizer='Adam', loss=auto.loss)

# Train DL model
model.fit(X_train, y_train)
Expand Down
2 changes: 1 addition & 1 deletion tests/testtext.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_whole(self):
x = auto.output_nub(x)

model = Model(inputs=auto.input_layers, outputs=x)
model.compile(optimizer='Adam', loss=losses.sparse_categorical_crossentropy)
model.compile(optimizer='Adam', loss=auto.loss)

# Train DL model
model.fit(X_train, y_train)
Expand Down

0 comments on commit 0ea1378

Please sign in to comment.