In [7]:
%matplotlib inline

In [8]:
import pandas as pd
import numpy as np
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.models import Model
from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from utils import get_engine
from sklearn import linear_model
import matplotlib.pyplot as plt

if __name__ == '__main__':

    # load dataset
    table = "data_fraud_little"
    engine = get_engine()
    dataframe = pd.read_sql_query("select * from {table} limit 500".format(table=table),engine)
    dataset = dataframe.values
    print("First one row of the dataset")
    print("Shape [{}]".format(dataset.shape))
    print(dataset[0:2,:])
    # split into input (X) and output (Y) variables
    data_dimensions = 45
    #first dimension is the index, must be removed!!!!
    X = dataset[:, 1:data_dimensions]
    Y = dataset[:, data_dimensions]

    print("Fraud {}% ".format(float(np.sum(Y==1))*100.0/Y.shape[0]))
    print("Total #samples:",Y.shape[0])
    Y = to_categorical(Y, nb_classes=None)


    input_dimensions = X.shape[1]
    print("shapes: X[{}]=====Y[{}]".format(X.shape, Y.shape))






    # define base mode
    def baseline_model():
        return logistic_regresion()
        # return linear_regression()


    def keras_lin_reg():
        x = Input((None,input_dimensions))
        y = Dense(1,activation='linear')(x)
        model = Model(x,y,"Linear Regression")
        model.compile(loss='mse', optimizer='sgd')
        return model

    def logistic_regresion():
        logistic = linear_model.LogisticRegression(solver='sag', n_jobs=-1,max_iter=500)
        return logistic
    def linear_regression():
        lr = linear_model.LinearRegression(n_jobs=-1)
        return lr

    def mlp_model(hidden=None,layers=1):
        # create model
        model = Sequential()
        model.add(Dense(input_dimensions, input_dim=input_dimensions, init='normal', activation='relu'))
        if hidden is not None:
            for l in range(layers):
                model.add(Dense(hidden))
        model.add(Dense(2, init='normal', activation='softmax'))
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer='adam')
        return model

    def mlp_model_wrap(layers=1):
        return mlp_model(100,layers)
    # fix random seed for reproducibility
    seed = 7
    np.random.seed(seed)
    # evaluate model with standardized dataset
    estimators = []
    estimators.append(('standardize', StandardScaler()))
    # estimators.append(('mlp', KerasClassifier(build_fn=mlp_model, nb_epoch=100, batch_size=10000, verbose=1)))
    estimators.append(('mlp', KerasClassifier(build_fn=mlp_model_wrap, nb_epoch=100, batch_size=10000, verbose=0)))
    # estimators.append(('liner reg', KerasClassifier(build_fn=keras_lin_reg, nb_epoch=100, batch_size=100000, verbose=1)))
    # estimators.append(('linear_reg', baseline_model()))
    pipeline = Pipeline(estimators)
    pipeline.set_params(mlp__layers=2)
    kfold = KFold(n_splits=2, random_state=seed)
    results = cross_val_score(pipeline, X, Y, cv=kfold, scoring='roc_auc',n_jobs=1)
    print("Results:", results)
    print("Results: %.24f (%.24f) ROC" % (results.mean(), results.std()))
    print(pipeline)
    plt.errorbar([0], [results.mean()], np.array(results.std()))

    plt.title(
        'Cross Validation')
    plt.xlabel('Model')
    plt.ylabel('AUC')

    plt.axis('tight')
    plt.show()

First one row of the dataset
Shape [(500, 47)]
[[  4.75200320e+07   7.49524000e+05   1.38868315e+18   6.89620000e+04
    5.83500000e+01   1.77484800e+06   5.77500000e+04   1.87362000e+05
    3.67000000e+02   1.62381800e+06   1.68885100e+06   5.20000000e+02
    5.00000000e+00   0.00000000e+00   0.00000000e+00   8.00000000e+00
    1.00000000e+00   0.00000000e+00   4.00000000e+00   0.00000000e+00
    1.00000000e+00   1.00000000e+02   0.00000000e+00   0.00000000e+00
    0.00000000e+00   5.60000000e+01   1.00000000e+00   1.00000000e+00
    0.00000000e+00   1.00000000e+00   3.00000000e+00   0.00000000e+00
    1.90000000e+01   1.84000000e+03   8.26000000e+02   6.10000000e+07
    5.83500000e+01   5.13265000e+03   7.50000000e+03   7.50000000e+03
    2.18985000e+03  -9.22337204e+18   1.38602880e+18   1.47048000e+03
    1.37419200e+18   0.00000000e+00  -9.22337204e+18]
 [  3.88424760e+07   6.72120000e+05   1.39431354e+18   1.00165000e+05
    1.49900000e+01   4.20536000e+05   1.32586000e+05   0.00

In [10]:
pipeline.fit(X,Y)
pipeline.predict_proba(X)

array([[  1.00000000e+00,   2.34180564e-10],
       [  9.91887629e-01,   8.11230578e-03],
       [  3.39784436e-02,   9.66021597e-01],
       [  9.93469596e-01,   6.53044274e-03],
       [  2.36431565e-02,   9.76356864e-01],
       [  1.00000000e+00,   3.87060306e-12],
       [  1.00000000e+00,   1.53236132e-10],
       [  1.00000000e+00,   2.31401631e-09],
       [  9.99999285e-01,   6.85733085e-07],
       [  9.99383569e-01,   6.16431120e-04],
       [  1.00000000e+00,   4.76656359e-09],
       [  7.88428187e-01,   2.11571753e-01],
       [  1.00000000e+00,   1.16372743e-12],
       [  1.00000000e+00,   3.79162035e-09],
       [  1.00000000e+00,   9.22906640e-09],
       [  5.39086908e-02,   9.46091354e-01],
       [  1.00000000e+00,   2.20846865e-11],
       [  9.99942660e-01,   5.73005382e-05],
       [  8.28953460e-04,   9.99171019e-01],
       [  1.00000000e+00,   3.23459287e-10],
       [  9.84769940e-01,   1.52300727e-02],
       [  9.95903194e-01,   4.09686007e-03],
       [  

In [37]:
import model_export.py

NameError: name 'seq_len_param' is not defined

0.991576


             precision    recall  f1-score   support

    class 0       0.33      1.00      0.50         1
    class 1       0.00      0.00      0.00         2
    class 2       1.00      1.00      1.00         1

avg / total       0.33      0.50      0.38         4



0.44333333333333336

0.3325