In [51]:
from __future__ import print_function

import double_log
def print(*args, **kwargs):
    return double_log.print(*args, **kwargs)

import keras
import pickle
import operator
import numpy as np
import pandas as pd
import tensorflow as tf
import plot_conf_matrix as pcm
from keras import backend as K
from keras.datasets import mnist
from keras.engine.topology import Layer
from keras.utils import to_categorical
from keras.models import Sequential, Model
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import confusion_matrix, f1_score
from keras.layers import Dense, Dropout, Softmax, BatchNormalization, LeakyReLU, ELU, ThresholdedReLU
from sklearn.model_selection import StratifiedKFold, train_test_split
reload(pcm)

print("===== IMPORTING ||| SCRIPT STARTS ||| LOGGING PURPOSE ======")



## Loading Data, Splitting, and Correct Fromating
As my noisy dataset, I use MNIST but I flipped the labels with 50% probability to something else with a random distribution. As my ground truth, I use the clean MNIST. You can see the noise distribution in the confusion matrix below.

In [52]:
train = pd.read_csv('../Data/uniform_table.csv')
train = train[train['ethnicity'] != 'hispanic']

test = pd.read_csv('../Data/feret_table.csv')
test = test[test['race'] != "other"]
test = test[test['race'] != "african"]
test = test[test['race'] != "hispanic"]
test['race'].value_counts()

caucasian     5150
eastasian     1549
southasian     465
Name: race, dtype: int64

In [53]:
test_bin = pd.DataFrame(test)
test_bin['eastasian'] = (test_bin['race'] == 'eastasian').astype(int)
test_bin['eastasian'].value_counts()

0    5615
1    1549
Name: eastasian, dtype: int64

In [60]:
batch_size = 128
num_classes = 2
epochs = 80
input_shape = (128,)

x_train = train.loc[:,'0':'127'].as_matrix()
x_test = test.loc[:,'0':'127'].as_matrix()
# y_train = train.loc[:,'ethnicity'].as_matrix()
# y_test = test.loc[:,'race'].as_matrix()

y_train = train.loc[:,'eastasian'].as_matrix()
y_test = test.loc[:,'eastasian'].as_matrix()

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = keras.utils.normalize(x_train)
x_test = keras.utils.normalize(x_test)
   
# 10-fold cross validation
folds = list(StratifiedKFold(n_splits=10, shuffle=True, random_state=1).split(x_train, y_train))

print(x_train.shape, y_train.shape, 'train shape')
print(x_test.shape, y_test.shape,'test shape')

# convert class vectors to binary class matrices
y_test_clean = y_test
encoder = LabelBinarizer()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

y_test = to_categorical(y_test)
y_train = to_categorical(y_train)

#if need to figure out relation for the confusion matrix, use inverse_transform
cm = np.asarray([[0.82, 0.03, 0.03],[0.02,0.95,0],[0.04,0,0.91]])

(116009, 128) (116009,) train shape
(7164, 128) (7164,) test shape


In [61]:
y_train

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]])

## Define noise layer and models
I defined my custom layer for the noise layer. It initializes its weight matrix either as the true noise distribution (confusion matrix) or as a 10x10 identity matrix depending on the init parameters.

In [62]:
#returns the confusion matrix (the perfectly known noise distribution) in the NoiseLayer initializer
def confusion_kernel(shape):
    print("####################### don not #########")
    return cm
def identity_kernel(shape):
    return np.rot90(np.eye(num_classes), 3)

#noise layer defined according to Keras functional API
class NoiseLayer(Layer):
    def __init__(self, output_dim, dynamic=True, initializer=identity_kernel, **kwargs):
        self.output_dim = output_dim
        self.dynamic = dynamic
        self.initializer = initializer
        super(NoiseLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        # weight matrix
        self.kernel = self.add_weight(name='kernel', 
                                      shape=input_shape,
                                      initializer=self.initializer,
                                      trainable=self.dynamic) #change this to false for static weights
        super(NoiseLayer, self).build(input_shape)
    
    #forward pass - vector matrix multiplication of the input and the weights. FIXED for batches
    def call(self, x):
        return tf.einsum('bn,nm->bn',x, self.kernel)

    def compute_output_shape(self, input_shape):
        return (1, self.output_dim)
    
class SoftMaxLayer(Layer):
    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(SoftMaxLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        # weight matrix
        self.kernel = self.add_weight(name='kernel', 
                                      shape=input_shape,
                                      initializer='uniform',
                                      trainable=False) #change this to false for static weights
        super(SoftMaxLayer, self).build(input_shape)
    
    #forward pass - vector matrix multiplication of the input and the weights. FIXED for batches
    def call(self, x):
        return tf.nn.softmax(x)

    def compute_output_shape(self, input_shape):
        return (1, self.output_dim)

In [63]:
#for logging in cnn.log
print_callback = keras.callbacks.LambdaCallback(
        on_epoch_end=lambda epoch,logs: double_log.logger.debug('epoch: '+ str(epoch+1)+ ' logs: '+ str(logs)))


activ_list = {"elu": ELU(), "leaky": LeakyReLU(), "relu": keras.layers.Activation("relu"), "sigmoid": keras.layers.Activation("sigmoid")}
optim_list = {"adadelta": keras.optimizers.Adadelta()} 
matrix_list = {"identity_kernel": identity_kernel, "confusion_kernel": confusion_kernel}

# depth, nodes, initializer, activation, dropout, noise, kernel_init
dynamic_params = []
for nodes in [32, 64, 128, 248]:
    for dropout in [ 0.4, 0.6]:
        for depth in [2,3,]:
            for activ in ["leaky", "sigmoid"]:
                for init_func in ["glorot_uniform"]:
                    for optim in ["adadelta"]:
                        for noise_init in ["identity_kernel", "confusion_kernel"]:
                            dynamic_params.append({'noise': True, 'dynamic':  True, 'nodes': nodes, 'dropout': dropout, 'depth': depth, 'activation': activ, "initializer": init_func, "optimizer": optim, 'noise_init': noise_init})

static_params = []
for nodes in [32, 64, 128, 248]:
    for dropout in [0.4, 0.6]:
        for depth in [3,4]:
            for activ in ["leaky", "sigmoid"]:
                for init_func in ["glorot_uniform"]:
                    for optim in ["adadelta"]:
                        for noise_init in ["identity_kernel", "confusion_kernel"]:
                            static_params.append({'noise': True, 'dynamic':  False, 'nodes': nodes, 'dropout': dropout, 'depth': depth, 'activation': activ, "initializer": init_func, "optimizer": optim, 'noise_init': noise_init})

default_params = []
for nodes in [32, 64, 128, 248]:
    for dropout in [0.1,0.3, 0.4, 0.6, 0.8]:
        for depth in [1,2,3,4,6]:
            for activ in ["elu", "leaky", "relu", "sigmoid"]:
                for init_func in ["glorot_uniform"]:
                    for optim in ["adadelta"]:
                        default_params.append({'noise': False, 'dynamic':  False, 'nodes': nodes, 'dropout': dropout, 'depth': depth, 'activation': activ, "initializer": init_func, "optimizer": optim})

dynamic_params = pd.Series(dynamic_params).sample(1).repeat(5)                        
static_params = pd.Series(static_params).sample(1).repeat(5)                        
default_params = pd.Series(default_params).sample(1).repeat(5)                        
                        
def build_model(param):
    model = Sequential()
    
    model.add(Dense(param['nodes']+64, kernel_initializer=param['initializer'], input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(activ_list[param['activation']])
    model.add(Dropout(param['dropout']))
    
    for i in range(param['depth']):
        model.add(Dense(param['nodes'], kernel_initializer=param['initializer'], input_shape=input_shape))
        model.add(BatchNormalization())
        model.add(activ_list[param['activation']])
        model.add(Dropout(param['dropout']))
        
    model.add(Dense(64, kernel_initializer=param['initializer']))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, kernel_initializer=param['initializer'], activation="softmax"))
    
    # attach noise layer
    if (param['noise']):
        model.add(NoiseLayer(num_classes, dynamic=param['dynamic'], initializer=matrix_list[param['noise_init']]))
        model.add(Dropout(0.1))
        model.add(SoftMaxLayer(num_classes))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=optim_list[param['optimizer']],
                  metrics=[keras.metrics.categorical_accuracy])
    print(model.summary())
    return model

In [64]:
default_params = default_params[0:1]
dynamic_params = dynamic_params[0:2]
x_train.shape

(116009, 128)

# DEFAULT MODEL

In [None]:
# Testing default moel without noise model attached
results = []
best_score = 0
for h, param in enumerate(default_params):
    cvscores = []
    for j, (train_idx, val_idx) in enumerate(folds[0:3]):
        print("===Default - Fold ",j," - Param set number ",h," ======")
        
        x_train_cv = x_train[train_idx]
        y_train_cv = y_train[train_idx]
        x_valid_cv = x_train[val_idx]
        y_valid_cv= y_train[val_idx]
        
        model = build_model(param)
        model.fit(x_train_cv, y_train_cv,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=0,
                  validation_data=(x_valid_cv, y_valid_cv),
                  callbacks=[print_callback])
        
        pred = model.predict(x_test, verbose=0)
        f1 = f1_score([max(enumerate(i), key=operator.itemgetter(1))[0] for i in pred.tolist()], [max(enumerate(i), key=operator.itemgetter(1))[0] for i in y_test.tolist()], average='micro')
        cvscores.append(f1)
        if (f1 > best_score):
            pred_saved = pred
    
    results.append({"parameters": param, "f1": np.mean(cvscores), "std": np.std(cvscores)})
    
results.sort(key=lambda x: x['f1'])
with open('../Data/default_results.pickle', 'wb') as fileObj:
    pickle.dump(results, fileObj)  
    
with open('../Data/default_pred.pickle', 'wb') as fileObj:
    pickle.dump(pred_saved, fileObj)  

print("DONE AND SAVED SORTED RESULTS")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_53 (Dense)             (None, 312)               40248     
_________________________________________________________________
batch_normalization_35 (Batc (None, 312)               1248      
_________________________________________________________________
leaky_re_lu_10 (LeakyReLU)   multiple                  0         
_________________________________________________________________
dropout_44 (Dropout)         (None, 312)               0         
_________________________________________________________________
dense_54 (Dense)             (None, 248)               77624     
_________________________________________________________________
batch_normalization_36 (Batc (None, 248)               992       
_________________________________________________________________
leaky_re_lu_10 (LeakyReLU)   multiple                  0         
__________

# STATIC MODEL

In [7]:
# Testing static model without noise model attached
results = []
for h, param in enumerate(static_params):
    cvscores = []
    cvscores2 = []
    for j, (train_idx, val_idx) in enumerate(folds[0:3]):
        print("===Static - Fold ",j," - Param set number ",h," ======")
        
        x_train_cv = x_train[train_idx]
        y_train_cv = y_train[train_idx]
        x_valid_cv = x_train[val_idx]
        y_valid_cv= y_train[val_idx]
        
        model = build_model(param)
        model.fit(x_train_cv, y_train_cv,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=0,
                  validation_data=(x_valid_cv, y_valid_cv),
                  callbacks=[print_callback])
        
        pred = model.predict(x_test, verbose=0)
        f1 = f1_score([max(enumerate(i), key=operator.itemgetter(1))[0] for i in pred.tolist()], [max(enumerate(i), key=operator.itemgetter(1))[0] for i in y_test.tolist()], average='micro')
        cvscores.append(f1) 
        
        #remove the noise layers to reveal what the base model learnt
        model2 = Model(model.input, model.layers[-4].output)
        pred2 = model2.predict(x_test, verbose=0)
        f12 = f1_score([max(enumerate(i), key=operator.itemgetter(1))[0] for i in pred2.tolist()], [max(enumerate(i), key=operator.itemgetter(1))[0] for i in y_test.tolist()], average='micro')
        cvscores2.append(f12)
        
    
    results.append({"parameters": param, "f1": np.mean(cvscores2), "f1-noise-on": np.mean(cvscores), "std": np.std(cvscores)})
    
results.sort(key=lambda x: x['f1'])
with open('../Data/static_results.pickle', 'wb') as fileObj:
    pickle.dump(results, fileObj)  

print("DONE AND SAVED SORTED RESULTS")

DONE AND SAVED SORTED RESULTS


# DYNAMIC MODEL

In [11]:
# Testing dynamic model without noise model attached
results = []
best_score = 0
for h, param in enumerate(dynamic_params[:40]):
    cvscores = []
    cvscores2 = []
    for j, (train_idx, val_idx) in enumerate(folds[0:3]):
        print("===Dynamic - Fold ",j," - Param set number ",h," ======")
        
        x_train_cv = x_train[train_idx]
        y_train_cv = y_train[train_idx]
        x_valid_cv = x_train[val_idx]
        y_valid_cv= y_train[val_idx]
        
        model = build_model(param)
        model.fit(x_train_cv, y_train_cv,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=0,
                  validation_data=(x_valid_cv, y_valid_cv),
                  callbacks=[print_callback])
        
        pred = model.predict(x_test, verbose=0)
        f1 = f1_score([max(enumerate(i), key=operator.itemgetter(1))[0] for i in pred.tolist()], [max(enumerate(i), key=operator.itemgetter(1))[0] for i in y_test.tolist()], average='micro')
        cvscores.append(f1) 
        
        #remove the noise layers to reveal what the base model learnt
        model2 = Model(model.input, model.layers[-4].output)
        pred2 = model2.predict(x_test, verbose=0)
        f12 = f1_score([max(enumerate(i), key=operator.itemgetter(1))[0] for i in pred2.tolist()], [max(enumerate(i), key=operator.itemgetter(1))[0] for i in y_test.tolist()], average='micro')
        cvscores2.append(f12)
        if (f12 > best_score):
            pred_saved = pred2
        
    
    results.append({"parameters": param, "f1": np.mean(cvscores2), "f1-noise-on": np.mean(cvscores), "std": np.std(cvscores)})
     
results.sort(key=lambda x: x['f1'])
with open('../Data/dynamic_results.pickle', 'wb') as fileObj:
    pickle.dump(results, fileObj)  
    
with open('../Data/dynamic_pred.pickle', 'wb') as fileObj:
    pickle.dump(pred_saved, fileObj) 

print("DONE AND SAVED SORTED RESULTS")

DONE AND SAVED SORTED RESULTS
