Skip to content

Commit

Permalink
Merge pull request #72 from bjherger/examples
Browse files Browse the repository at this point in the history
Updated examples, major refactoring and feature improvements
  • Loading branch information
bjherger committed Oct 11, 2018
2 parents 632ec9a + fe99003 commit f57d27e
Show file tree
Hide file tree
Showing 22 changed files with 643 additions and 286 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,15 @@ board games with my partner in Seattle.

### Development

- Adding CI/CD and PyPi links, and updating contact section w/ about the author (#70)
- Adding CI/CD and PyPi links, and updating contact section w/ about the author (#70)
- Major rewrite / update of examples (#72)
- Fixes bug in embedding transformer. Embeddings will now be at least length 1.
- Add functionality to check if `resp_var` is in the list of user provided variables
- Added better null filling w/ `CategoricalImputer`
- Added filling unseen values w/ `CategoricalImputer`
- Converted default transformer pipeline to use `copy.deepcopy` instead of `copy.copy`. This was a hotfix for a
previously unknown issue.
- Standardizing setting logging level, only in test base class and examples (when `__main__`)

### 1.3.5

Expand Down
5 changes: 5 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# `keras-pandas` examples

## Numerical response variable

## Categorical response variable
78 changes: 78 additions & 0 deletions examples/categorical_lending_club.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging

import numpy
import pandas
from keras import Model
from keras.layers import Dense
from sklearn.model_selection import train_test_split

from keras_pandas import lib
from keras_pandas.Automater import Automater


def main():

# Load data
observations = lib.load_lending_club()
# observations = lib.load_lending_club(test_run=False)
print('Observation columns: {}'.format(list(observations.columns)))
print('Class balance:\n {}'.format(observations['loan_status'].value_counts()))


# Heuristic data transformations
for var in ['int_rate', 'revol_util']:

# Strip out percent signs
observations[var] = observations[var].apply(lambda x: str(x).replace('%', ''))
observations[var] = pandas.to_numeric(observations[var], errors='coerce')
for var in ['mths_since_last_delinq', 'annual_inc_joint']:

# Heuristic null filling for some variables
observations[var] = observations[var].fillna(0)

# List out variable types
numerical_vars = ['loan_amnt', 'annual_inc', 'open_acc', 'dti', 'delinq_2yrs',
'inq_last_6mths', 'mths_since_last_delinq', 'pub_rec', 'revol_bal', 'revol_util',
'total_acc', 'pub_rec_bankruptcies']
categorical_vars = ['term', 'grade', 'emp_length', 'home_ownership', 'loan_status', 'addr_state',
'application_type', 'disbursement_method']
text_vars = ['desc', 'purpose', 'title']

train_observations, test_observations = train_test_split(observations)
train_observations = train_observations.copy()
test_observations = test_observations.copy()

# Create and fit Automater
auto = Automater(numerical_vars=numerical_vars, categorical_vars=categorical_vars, text_vars=text_vars,
response_var='loan_status')
auto.fit(train_observations)

# Create and fit keras (deep learning) model
# The auto.transform, auto.input_nub, auto.input_layers, and auto.loss are provided by keras-pandas, and
# everything else is core Keras
train_X, train_y = auto.transform(train_observations)
test_X, test_y = auto.transform(test_observations)

x = auto.input_nub
x = Dense(32)(x)
x = Dense(32, activation='relu')(x)
x = Dense(32)(x)
x = auto.output_nub(x)

model = Model(inputs=auto.input_layers, outputs=x)
model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])

model.fit(train_X, train_y)

test_y_pred = model.predict(test_X)

# Inverse transform model output, to get usable results and save all results
test_observations[auto.response_var + '_pred'] = auto.inverse_transform_output(test_y_pred)
print('Predictions: {}'.format(test_observations[auto.response_var + '_pred']))

pass


if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
main()
63 changes: 63 additions & 0 deletions examples/categorical_mushrooms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import logging

import numpy
import pandas
from keras import Model
from keras.layers import Dense
from sklearn.model_selection import train_test_split

from keras_pandas import lib
from keras_pandas.Automater import Automater


def main():

# Load data
observations = lib.load_mushroom()
# observations = lib.load_lending_club(test_run=False)
print('Observation columns: {}'.format(list(observations.columns)))
print('Class balance:\n {}'.format(observations['class'].value_counts()))

# List out variable types
numerical_vars = []
categorical_vars = ['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring', 'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number', 'ring-type', 'spore-print-color', 'population', 'habitat']
text_vars = []

train_observations, test_observations = train_test_split(observations)
train_observations = train_observations.copy()
test_observations = test_observations.copy()

# Create and fit Automater
auto = Automater(numerical_vars=numerical_vars, categorical_vars=categorical_vars, text_vars=text_vars,
response_var='class')
auto.fit(train_observations)

# Create and fit keras (deep learning) model
# The auto.transform, auto.input_nub, auto.input_layers, and auto.loss are provided by keras-pandas, and
# everything else is core Keras
train_X, train_y = auto.transform(train_observations)
test_X, test_y = auto.transform(test_observations)

x = auto.input_nub
x = Dense(32)(x)
x = Dense(32, activation='relu')(x)
x = Dense(32)(x)
x = auto.output_nub(x)

model = Model(inputs=auto.input_layers, outputs=x)
model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])

model.fit(train_X, train_y)

test_y_pred = model.predict(test_X)

# Inverse transform model output, to get usable results and save all results
test_observations[auto.response_var + '_pred'] = auto.inverse_transform_output(test_y_pred)
print('Predictions: {}'.format(test_observations[auto.response_var + '_pred']))

pass


if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
main()
66 changes: 66 additions & 0 deletions examples/categorical_titanic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import logging

import numpy
import pandas
from keras import Model
from keras.layers import Dense
from sklearn.model_selection import train_test_split

from keras_pandas import lib
from keras_pandas.Automater import Automater


def main():

# Load data
observations = lib.load_titanic()
# observations = lib.load_lending_club(test_run=False)
print('Observation columns: {}'.format(list(observations.columns)))
print('Class balance:\n {}'.format(observations['survived'].value_counts()))

# List out variable types
numerical_vars = ['age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare']
categorical_vars = ['survived', 'pclass', 'sex']
text_vars = ['name']

for var in categorical_vars:
observations[var] = observations[var].astype(str)

train_observations, test_observations = train_test_split(observations)
train_observations = train_observations.copy()
test_observations = test_observations.copy()

# Create and fit Automater
auto = Automater(numerical_vars=numerical_vars, categorical_vars=categorical_vars, text_vars=text_vars,
response_var='survived')
auto.fit(train_observations)

# Create and fit keras (deep learning) model
# The auto.transform, auto.input_nub, auto.input_layers, and auto.loss are provided by keras-pandas, and
# everything else is core Keras
train_X, train_y = auto.transform(train_observations)
test_X, test_y = auto.transform(test_observations)

x = auto.input_nub
x = Dense(32)(x)
x = Dense(32, activation='relu')(x)
x = Dense(32)(x)
x = auto.output_nub(x)

model = Model(inputs=auto.input_layers, outputs=x)
model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])

model.fit(train_X, train_y)

test_y_pred = model.predict(test_X)

# Inverse transform model output, to get usable results and save all results
test_observations[auto.response_var + '_pred'] = auto.inverse_transform_output(test_y_pred)
print('Predictions: {}'.format(test_observations[auto.response_var + '_pred']))

pass


if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
main()
16 changes: 16 additions & 0 deletions examples/example_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

def main():
# TODO Load data
# TODO Heuristic data transformations
# TODO List out variable types
# TODO Create and fit Automater
# TODO Create and fit keras (deep learning) model.
# TODO List out which components are supplied by Automaterl
# TODO Inverse transform model output, to get usable results
# TODO Save all results

pass

if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
main()
61 changes: 0 additions & 61 deletions examples/lending_club_classification.py

This file was deleted.

77 changes: 0 additions & 77 deletions examples/lending_club_regression.py

This file was deleted.

0 comments on commit f57d27e

Please sign in to comment.