In [None]:
import stickbugml
from stickbugml.decorators import dataset, preprocess, feature, model

In [None]:
import seaborn.apionly as sns
import pandas as pd

@dataset(train_valid_test=(0.6, 0.2, 0.2))
def raw_dataset():
    titanic_dataset = sns.load_dataset('titanic')

    # Drop NaN rows for simplicity
    titanic_dataset.dropna(inplace=True)

    # Extract X and y
    X = titanic_dataset.drop('survived', axis=1)
    y = titanic_dataset['survived']
    return X, y

# my_dataset is now a variable that holds the X values of the evaluated function
# (the test data's ground truth is locked away to prevent accidentially fitting to it)
raw_dataset.head()

In [None]:
@preprocess
def preprocessed_dataset(X):
    # Encode categorical columns
    categorical_column_names = [
        'sex', 'embarked', 'class',
        'who', 'adult_male', 'deck',
        'embark_town', 'alive', 'alone'
    ]

    X = pd.get_dummies(X,
                       columns=categorical_column_names,
                       prefix=categorical_column_names)

    return X

preprocessed_dataset.head()

In [None]:
from sklearn import decomposition
import numpy as np

@feature('pca')
def pca_feature(X):
    pca = decomposition.PCA(n_components=3)
    pca.fit(X)
    pca_out = pca.transform(X)

    pca_out = np.transpose(pca_out, (1, 0))
    return pd.DataFrame(pca_out)

pca_feature.head()

In [None]:
import xgboost as xgb

@model('xgboost')
def xgboost_model():
    def define(num_columns):
        return None # xgboost models are not pre-defined
    
    def train(model, params, train, validation):
        params['objective'] = 'binary:logistic'
        params['eval_metric'] = 'logloss'
        
        d_train = xgb.DMatrix(train['X'], label=train['y'])
        d_valid = xgb.DMatrix(validation['X'], label=validation['y'])

        watchlist = [(d_train, 'train'), (d_valid, 'valid')]

        return xgb.train(params, d_train, 10000, watchlist, early_stopping_rounds=50, verbose_eval=200)
    
    def predict(model, X):
        return model.predict(xgb.DMatrix(X))
    
    return define, train, predict

In [None]:
stickbugml.train('xgboost', {
    'max_depth': 7,
    'eta': 0.005
})

In [None]:
stickbugml.evaluate('xgboost')

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

@model('keras_nn')
def keras_nn_model():
    def define(num_columns):
        model = Sequential()
        model.add(Dense(64, input_dim=num_columns, activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(1, activation='sigmoid'))

        model.compile(loss='binary_crossentropy',
                      optimizer='rmsprop',
                      metrics=['accuracy'])
        return model

    
    def train(model, params, train, validation):
        model.fit(train['X'].values, train['y'].values,
                  epochs=50,
                  batch_size=5)
        
        return model
    
    def predict(model, X):
        return model.predict(X.values)
    
    return define, train, predict

In [None]:
stickbugml.train('keras_nn', {})

In [None]:
stickbugml.evaluate('keras_nn')