In [101]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import roc_auc_score

In [93]:
raw_data = pd.read_csv('train.csv')
Y_LABEL = 'y'                                   			        # Name of the variable to be predicted
KEYS = [i for i in raw_data.keys().tolist() if i != Y_LABEL]	# Name of predictors
N_INSTANCES = raw_data.shape[0]                     			    # Number of instances
N_INPUT = raw_data.shape[1] - 1                     			    # Input size
N_CLASSES = raw_data[Y_LABEL].unique().shape[0]     			    # Number of classes (output size)
TEST_SIZE = 0.1                                    			      # Test set size (% of dataset)
TRAIN_SIZE = int(N_INSTANCES * (1 - TEST_SIZE))     			    # Train size
LEARNING_RATE = 0.01                               			    # Learning rate
TRAINING_EPOCHS = 400                               			    # Number of epochs
BATCH_SIZE = 100                                    			    # Batch size
DISPLAY_STEP = 20                                    			    # Display progress each x epochs
HIDDEN_SIZE = [128, 128, 64, 32]	                                   		      # Number of hidden neurons 256
ACTIVATION_FUNCTION_OUT = tf.nn.softmax                          # Last layer act fct
STDDEV = 0.1                                        			    # Standard deviation (for weights random init)
RANDOM_STATE = 100								                            # Random state for train_test_split

In [94]:
# Load data
data = raw_data[KEYS].get_values()                  			# X data
labels = raw_data[Y_LABEL].get_values()  
labels_ = np.zeros((N_INSTANCES, N_CLASSES))
labels_[np.arange(N_INSTANCES), labels.astype(np.int0)] = 1

In [95]:
data_train, data_test, labels_train, labels_test = train_test_split(data,
                                                                    labels_,
                                                                    test_size = TEST_SIZE,
                                                                    random_state = RANDOM_STATE)

In [96]:
# Net params
n_input = N_INPUT                   # input n labels
n_hidden_1 = HIDDEN_SIZE[0]            # 1st layer
n_hidden_2 = HIDDEN_SIZE[1]            # 2nd layer
n_hidden_3 = HIDDEN_SIZE[2]            # 3rd layer
n_hidden_4 = HIDDEN_SIZE[3]            # 4th layer
n_classes = N_CLASSES               # output m classes

# regularization params
lambda_1 = .15
lambda_2 = .1

alpha_1 = .5
alpha_2 = .15

In [99]:
X = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout_keep_prob = tf.placeholder(tf.float32)

def mlp(_X, _weights, _biases, dropout_keep_prob):
    weighted_layer = tf.multiply(_weights['W'], _X)
    layer1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(weighted_layer, _weights['h1']), _biases['b1'])), dropout_keep_prob)
    layer2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer1, _weights['h2']), _biases['b2'])), dropout_keep_prob)
    layer3 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer2, _weights['h3']), _biases['b3'])), dropout_keep_prob)
    layer4 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer3, _weights['h4']), _biases['b4'])), dropout_keep_prob)
    out = ACTIVATION_FUNCTION_OUT(tf.add(tf.matmul(layer4, _weights['out']), _biases['out']))
    return out

weights = {
    'W': tf.Variable(tf.random_normal([n_input], stddev=STDDEV)),
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=STDDEV)),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], stddev=STDDEV)),
    'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], stddev=STDDEV)),
    'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], stddev=STDDEV)),
    'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], stddev=STDDEV)),                                   
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'b4': tf.Variable(tf.random_normal([n_hidden_4])),
    'out': tf.Variable(tf.random_normal([n_classes]))

}
hidden_weights = ['h1', 'h2', 'h3', 'h4', 'out']

# Build model
pred = mlp(X, weights, biases, dropout_keep_prob)

# Loss, regularization and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y)) # softmax loss

regularizers = lambda_1*(((1-lambda_2)/2) * tf.nn.l2_loss(weights['W']) \
                         + lambda_2*tf.sqrt(tf.nn.l2_loss(weights['W']))) + \
alpha_1 * ((1 - alpha_2)/2 * tf.reduce_sum([tf.nn.l2_loss(weights[hidden]) \
                                            for hidden in hidden_weights]) + \
           alpha_2*tf.reduce_sum([tf.sqrt(tf.nn.l2_loss(weights[hidden])) for hidden in hidden_weights]))
cost += regularizers
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(cost, var_list=tf.trainable_variables())

# Accuracy
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
auc = tf.metrics.auc(tf.argmax(y, 1), tf.argmax(pred, 1), curve='ROC')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [100]:
# Initialize variables
init_all = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

# Launch session
sess = tf.Session()
sess.run(init_all)

# Training loop
for epoch in range(TRAINING_EPOCHS):
    avg_cost = 0.
    total_batch = int(data_train.shape[0] / BATCH_SIZE)
    # Loop over all batches
    for i in range(total_batch):
        randidx = np.random.randint(int(TRAIN_SIZE), size=BATCH_SIZE)
        batch_xs = data_train[randidx, :]
        batch_ys = labels_train[randidx]
        # Fit using batched data
        sess.run(optimizer, feed_dict={X: batch_xs, y: batch_ys, dropout_keep_prob: 0.9})
        # Calculate average cost
        avg_cost += sess.run(cost, feed_dict={X: batch_xs, y: batch_ys, dropout_keep_prob:1.})/total_batch
    # Display progress
    if epoch % DISPLAY_STEP == 0:
        print (f"Epoch: {epoch:03d}/{TRAINING_EPOCHS:03d} cost: {avg_cost:.9f}")
        train_auc = sess.run(auc, feed_dict={X: batch_xs, y: batch_ys, dropout_keep_prob:1.})

        print (f"Training AUC: {train_auc}")


Epoch: 000/400 cost: 113.562293582
Training AUC: (0.0, 0.49999997)
Epoch: 020/400 cost: 0.575876468
Training AUC: (0.49999997, 0.5)
Epoch: 040/400 cost: 0.591606994
Training AUC: (0.5, 0.5)
Epoch: 060/400 cost: 0.593868223
Training AUC: (0.5, 0.5)
Epoch: 080/400 cost: 0.616684894
Training AUC: (0.5, 0.5)
Epoch: 100/400 cost: 0.579788496
Training AUC: (0.5, 0.5)
Epoch: 120/400 cost: 0.610240916
Training AUC: (0.5, 0.5)
Epoch: 140/400 cost: 0.580692808
Training AUC: (0.5, 0.5)
Epoch: 160/400 cost: 0.594021930
Training AUC: (0.5, 0.5)
Epoch: 180/400 cost: 0.576585743
Training AUC: (0.5, 0.5)
Epoch: 200/400 cost: 0.612409406
Training AUC: (0.5, 0.5)
Epoch: 220/400 cost: 0.595846580
Training AUC: (0.5, 0.5)
Epoch: 240/400 cost: 0.596026937
Training AUC: (0.5, 0.5)
Epoch: 260/400 cost: 0.601577169
Training AUC: (0.5, 0.5)
Epoch: 280/400 cost: 0.615422997
Training AUC: (0.5, 0.5)
Epoch: 300/400 cost: 0.596251574
Training AUC: (0.5, 0.5)
Epoch: 320/400 cost: 0.584533248
Training AUC: (0.5, 0.5

In [90]:
test_auc = sess.run(auc, feed_dict={X: data_test, y: labels_test, dropout_keep_prob:1.})
