In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.metrics import recall_score, precision_score, accuracy_score, log_loss


def eval(model, x, y, loss_weights):
    y_score = model.predict(x) 
    y_pred =  y_score > 0.5
    
    if loss_weights is not None:
        weights = np.empty_like(y)
        weights[y == 0] = loss_weights[0]
        weights[y == 1] = loss_weights[1]
    else:
        weights = None
        
    loss = log_loss(y, y_score, 10e-8, sample_weight=weights)
    acc = accuracy_score(y, y_pred)
    recall =  recall_score(y, y_pred)
    precision =  precision_score(y, y_pred)
    auc = roc_auc_score(y, y_score, sample_weight=weights)

    return loss, acc, recall, precision, auc


def log_scores(model, x, y, log_id, scores, loss_weights):
    loss, acc, recall, precision, auc = eval(model, x, y, loss_weights)
    print('\t%s: loss = %.8f, recall = %.6f, precision = %.6f, accuracy = %.6f, auc = %.6f' 
          % (log_id, loss, recall, precision, acc, auc))
    scores['loss'].append(loss)
    scores['acc'].append(acc)
    scores['recall'].append(recall)
    scores['precision'].append(precision)
    scores['auc'].append(auc)


def train(model, epochs, epochs_to_eval):
    scores_train = {
        'loss': [],
        'acc': [],
        'recall': [],
        'precision': [],
        'auc': []
    }
    
    scores_val = {
        'loss': [],
        'acc': [],
        'recall': [],
        'precision': [],
        'auc': []
    }
    
    for epoch in range(0, epochs, epochs_to_eval):
        print('Epoch %d/%d' % (epoch, epochs))
        log_scores(model, x_train, y_train, 'train', scores_train, params['weights'])
        log_scores(model, x_val, y_val, 'valid', scores_val,params['weights'])
        model.fit(x_train, y_train, batch_size=200, verbose=0, epochs=epochs_to_eval, 
                  class_weight=params['weights'])
    
    if 'epoch' not in locals():
        epoch = 0
    else:
        epoch += epochs_to_eval
    
    print('Epoch %d/%d' % (epoch, epochs))
    log_scores(model, x_train, y_train, 'train', scores_train, params['weights'])
    log_scores(model, x_val, y_val, 'valid', scores_val, params['weights'])
    
    return (scores_train, scores_val)

# Always predict anomaly

In [6]:
data = np.load('../data/creditcard_train.npz')
x_train, y_train = data['x_train'], data['y_train']
x_val, y_val = data['x_val'], data['y_val']


class EmptyModel(object):
    def __init__(self):
        self.predict = None
        self.fit = None

params = {
    'loss': 'binary_crossentropy',
    'weights': None
}
        
model = EmptyModel()
model.predict = lambda x: np.ones((x.shape[0],))
model.fit = lambda x, y, batch_size, verbose, epochs: 0

_, _ = train(model, 0, 1)

Epoch 0/0
	train: loss = 16.09024134, recall = 1.000000, precision = 0.001728, accuracy = 0.001728, auc = 0.500000
	valid: loss = 16.09050586, recall = 1.000000, precision = 0.001712, accuracy = 0.001712, auc = 0.500000


# Always predict not anomaly

In [7]:
model.predict = lambda x: np.zeros((x.shape[0],))
_, _ = train(model, 0, 1)

Epoch 0/0
	train: loss = 0.02785441, recall = 0.000000, precision = 0.000000, accuracy = 0.998272, auc = 0.500000
	valid: loss = 0.02758989, recall = 0.000000, precision = 0.000000, accuracy = 0.998288, auc = 0.500000


  'precision', 'predicted', average, warn_for)
