In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split

% matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
# import the data
bcw_data = pd.read_csv(os.path.join("data", "uci", "breast-cancer-wisconsin.data"), na_values="?", header=None)

# set the column names
bcw_data.columns = ["ID","THICKNESS","CELL_SIZE_UNIFORMITY","CELL_SHAPE_UNIFORMITY","MARGINAL_ADHESION","EPI_CELL_SIZE","BARE_NUCLEI","BLAND_CHROMATIN","NORMAL_NUCLEOLI","MITOSES","CLASS"]

# remove NAs
bcw_data = bcw_data.dropna(axis=0, how="any")
bcw_data = bcw_data.drop("ID", axis=1)

y1 = bcw_data.pop("CLASS").values
y1 = (y1 / 2) - 1
X1 = bcw_data.values

# split the data
X_tr, X_te, y_tr, y_te = train_test_split(X1, y1, test_size=0.2, random_state=0)
print("X_tr:", X_tr.shape)
print("X_te:", X_te.shape)
print("y_tr:", y_tr.shape)
print("y_te:", y_te.shape)

X_tr: (546, 9)
X_te: (137, 9)
y_tr: (546,)
y_te: (137,)


In [3]:
np.unique(y1)

array([0., 1.])

In [4]:
num_classes = len(np.unique(y1))
batch_size = 64
steps_per_epoch = X_tr.shape[0] / batch_size

# Batch generator
def get_batches(X, y, batch_size):
    # Shuffle X,y
    shuffled_idx = np.arange(len(y))
    np.random.shuffle(shuffled_idx)

    # Enumerate indexes by steps of batch_size
    for i in range(0, len(y), batch_size):
        # Batch indexes
        batch_idx = shuffled_idx[i:i+batch_size]
        
        yield X[batch_idx], y[batch_idx]

In [5]:
## MODEL 7.20.2.9j
# Create new graph
graph = tf.Graph()
model_name = "model_0.0.1"

with graph.as_default():
    # Placeholders
    X = tf.placeholder(dtype=tf.float32, shape=[None, 9])
    y = tf.placeholder(dtype=tf.int32, shape=[None])
    training = tf.placeholder(dtype=tf.bool)
    
    # create global step for decaying learning rate
    global_step = tf.Variable(0, trainable=False)
    
    # learning rate o
    epochs_per_decay = 20
    starting_rate = 0.1
    decay_factor = 0.80
    staircase = True
    
    learning_rate = tf.train.exponential_decay(starting_rate,                 # start at 0.003
                                               global_step, 
                                               steps_per_epoch * epochs_per_decay,       # 100 epochs
                                               decay_factor,                   # 0.5 decrease
                                               staircase=staircase) 
    
    # Hidden layer with 64 units
    with tf.name_scope('local1') as scope:
        hidden = tf.layers.dense(
            X,                              # input
            64,                             # 64 units
            activation=tf.nn.relu,          # activation
            kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),  # kernel initializer
            bias_initializer=tf.zeros_initializer(), # bias
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
            name='hidden'                   # name
        )

        # Apply dropout
        #hidden = tf.layers.dropout(hidden, rate=0.5, seed=0, training=training)
    
    with tf.name_scope('local2') as scope:
        hidden2 = tf.layers.dense(
            hidden,                         # input
            48,                             # 48 units
            activation=tf.nn.relu,          # activation
            kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),  # kernel initializer
            bias_initializer=tf.zeros_initializer(), # bias
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
            name='hidden2'                  # name
        )
    
    # Output layer
    logits = tf.layers.dense(
        hidden2,                       # input
        num_classes,                             # 4 units
        activation=None,               # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
        name='output'
    )
    
    # Loss fuction: mean cross-entropy
    mean_ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))
    
    # easier way to handle regularization loss
    loss = mean_ce + tf.losses.get_regularization_loss()
    
    # Gradient descent
    gd = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

    # Minimize loss
    train_op = gd.minimize(loss, global_step=global_step)

    # Compute predictions and accuracy
    predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
    is_correct = tf.equal(y, predictions)
    accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32))

In [6]:
# Validation accuracy
valid_acc_values = []
tr_acc_values = []
lr_values = []
cv_cost_values = []
tr_cost_values = []
config = tf.ConfigProto(device_count = {'GPU': 0})

with tf.Session(graph=graph, config=config) as sess:
    # Initialize variables
    sess.run(tf.global_variables_initializer())
    
    saver = tf.train.Saver()
    
    # Set seed
    np.random.seed(0)

    # Train several epochs
    for epoch in range(100):
        # Accuracy values (train) after each batch
        batch_acc = []
        batch_cost = []

        # Get batches of data
        for X_batch, y_batch in get_batches(X_tr, y_tr, batch_size):
            # Run training and evaluate accuracy
            _, acc_value, lr, tr_cost = sess.run([train_op, accuracy, learning_rate, loss], feed_dict={
                X: X_batch,
                y: y_batch,
                training: True # Apply dropout
            })

            # Save accuracy (current batch)
            batch_acc.append(acc_value)
            batch_cost.append(tr_cost)

            # Evaluate validation accuracy
            valid_acc, cv_cost = sess.run([accuracy, loss], feed_dict={
                X: X_te,
                y: y_te,
                training: False # Do not apply dropout
            })
            valid_acc_values.append(valid_acc)
            tr_acc_values.append(np.mean(batch_acc))
            cv_cost_values.append(cv_cost)
            tr_cost_values.append(np.mean(batch_cost))
            lr_values.append(lr)

        # Print progress
        if epoch % 5 == 0:
            print('Epoch {} - valid: {:.3f} train: {:.3f} (mean) learning rate {:.3f}'.format(
                epoch+1, valid_acc, np.mean(batch_acc), lr
            ))
    
    save_path = saver.save(sess, "./model/nn_model.ckpt")
    
    test_acc, yhat = sess.run([accuracy, predictions], feed_dict = {
        X: X_te,
        y: y_te,
        training: False
    })
    
print("Test accuracy:", test_acc)
yhat

Epoch 1 - valid: 0.825 train: 0.685 (mean) learning rate 0.100
Epoch 6 - valid: 0.905 train: 0.941 (mean) learning rate 0.100
Epoch 11 - valid: 0.956 train: 0.955 (mean) learning rate 0.100
Epoch 16 - valid: 0.942 train: 0.967 (mean) learning rate 0.100
Epoch 21 - valid: 0.949 train: 0.972 (mean) learning rate 0.080
Epoch 26 - valid: 0.971 train: 0.979 (mean) learning rate 0.080
Epoch 31 - valid: 0.942 train: 0.981 (mean) learning rate 0.080
Epoch 36 - valid: 0.971 train: 0.981 (mean) learning rate 0.080
Epoch 41 - valid: 0.942 train: 0.981 (mean) learning rate 0.064
Epoch 46 - valid: 0.964 train: 0.975 (mean) learning rate 0.064
Epoch 51 - valid: 0.971 train: 0.978 (mean) learning rate 0.064
Epoch 56 - valid: 0.964 train: 0.970 (mean) learning rate 0.064
Epoch 61 - valid: 0.956 train: 0.979 (mean) learning rate 0.051
Epoch 66 - valid: 0.964 train: 0.984 (mean) learning rate 0.051
Epoch 71 - valid: 0.956 train: 0.981 (mean) learning rate 0.051
Epoch 76 - valid: 0.956 train: 0.981 (mean

array([0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 1, 0])

## Wisconsin Diagnostic Breast Cancer (WDBC)

In [10]:
# import the data
wdbc_data = pd.read_csv(os.path.join("data", "uci", "wdbc.data"), na_values="?", header=None)

# set the column names
wdbc_data=wdbc_data.rename(columns = {0:'ID', 1:"CLASS", 2: "MEAN_RADIUS", 3: "MEAN_TEXTURE", 4: "MEAN_PERIMETER", 5: "MEAN_AREA", 6: "MEAN_SMOOTHNESS", 7: "MEAN_COMPACTNESS", 8: "MEAN_CONCAVITY", 9:"MEAN_CONCAVE_POINTS", 10: "MEAN_SYMMETRY", 11: "MEAN_FRACTAL_DIMENSIONS", 12: "RADIUS_SE", 13: "TEXTURE_SE", 14: "PERIMETER_SE", 15: "AREA_SE", 16: "SMOOTHNESS_SE", 17: "COMPACTNESS_SE", 18: "CONCAVITY_SE", 19: "CONCAVE_POINTS_SE", 20: "SYMMETRY_SE",21: "FRACTAL_DIMENSIONS_SE", 22: "WORST_RADIUS", 23: "WORST_TEXTURE", 24: "WORST_PERIMETER", 25: "WORST_AREA", 26: "WORST_SMOOTHNESS", 27: "WORST_COMPACTNESS", 28: "WORST_CONCAVITY", 29: "WORST_CONCAVE_POINTS", 30: "WORST_SYMMETRY", 31: "WORST_FRACTAL_DIMENSIONS"})
wdbc_data = wdbc_data.dropna(axis=0, how="any")

y2 = wdbc_data.pop("CLASS").values
labels = np.zeros(len(y2))
labels[y2 == 'M'] = 1
X2 = wdbc_data.drop(["ID"], axis=1).values

# split the data
X_tr, X_te, y_tr, y_te = train_test_split(X2, labels, test_size=0.2, random_state=0)
print("X_tr:", X_tr.shape)
print("X_te:", X_te.shape)
print("y_tr:", y_tr.shape)
print("y_te:", y_te.shape)

X_tr: (455, 30)
X_te: (114, 30)
y_tr: (455,)
y_te: (114,)


In [11]:
pd.value_counts(labels, normalize=True)

0.0    0.627417
1.0    0.372583
dtype: float64

In [14]:
# Create new graph
graph = tf.Graph()
model_name = "model_0.0.2"

with graph.as_default():
    # Placeholders
    X = tf.placeholder(dtype=tf.float32, shape=[None, 30])
    y = tf.placeholder(dtype=tf.int32, shape=[None])
    training = tf.placeholder(dtype=tf.bool)
    
    # create global step for decaying learning rate
    global_step = tf.Variable(0, trainable=False)
    
    # learning rate o
    epochs_per_decay = 30
    starting_rate = 0.1
    decay_factor = 0.80
    staircase = True
    
    learning_rate = tf.train.exponential_decay(starting_rate,                 # start at 0.003
                                               global_step, 
                                               steps_per_epoch * epochs_per_decay,       # 100 epochs
                                               decay_factor,                   # 0.5 decrease
                                               staircase=staircase) 
    
    # Hidden layer with 64 units
    with tf.name_scope('local1') as scope:
        hidden = tf.layers.dense(
            X,                              # input
            64,                             # 64 units
            activation=tf.nn.relu,          # activation
            kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),  # kernel initializer
            bias_initializer=tf.zeros_initializer(), # bias
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
            name='hidden'                   # name
        )

        # Apply dropout
        #hidden = tf.layers.dropout(hidden, rate=0.5, seed=0, training=training)
    
    with tf.name_scope('local2') as scope:
        hidden2 = tf.layers.dense(
            hidden,                         # input
            32,                             # 48 units
            activation=tf.nn.relu,          # activation
            kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),  # kernel initializer
            bias_initializer=tf.zeros_initializer(), # bias
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
            name='hidden2'                  # name
        )
    
    with tf.name_scope('local3') as scope:
        hidden3 = tf.layers.dense(
            hidden2,                         # input
            32,                             # 48 units
            activation=tf.nn.relu,          # activation
            kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),  # kernel initializer
            bias_initializer=tf.zeros_initializer(), # bias
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
            name='hidden3'                  # name
        )
        
    # Output layer
    logits = tf.layers.dense(
        hidden3,                       # input
        num_classes,                             # 4 units
        activation=None,               # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
        name='output'
    )
    
    # Loss fuction: mean cross-entropy
    mean_ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))
    
    # easier way to handle regularization loss
    loss = mean_ce + tf.losses.get_regularization_loss()
    
    # Gradient descent
    gd = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

    # Minimize loss
    train_op = gd.minimize(loss, global_step=global_step)

    # Compute predictions and accuracy
    predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
    is_correct = tf.equal(y, predictions)
    accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32))

In [15]:
# Validation accuracy
valid_acc_values = []
tr_acc_values = []
lr_values = []
cv_cost_values = []
tr_cost_values = []
config = tf.ConfigProto(device_count = {'GPU': 0})

with tf.Session(graph=graph, config=config) as sess:
    # Initialize variables
    sess.run(tf.global_variables_initializer())
    
    saver = tf.train.Saver()
    
    # Set seed
    np.random.seed(0)

    # Train several epochs
    for epoch in range(100):
        # Accuracy values (train) after each batch
        batch_acc = []
        batch_cost = []

        # Get batches of data
        for X_batch, y_batch in get_batches(X_tr, y_tr, batch_size):
            # Run training and evaluate accuracy
            _, acc_value, lr, tr_cost = sess.run([train_op, accuracy, learning_rate, loss], feed_dict={
                X: X_batch,
                y: y_batch,
                training: True # Apply dropout
            })

            # Save accuracy (current batch)
            batch_acc.append(acc_value)
            batch_cost.append(tr_cost)

            # Evaluate validation accuracy
            valid_acc, cv_cost = sess.run([accuracy, loss], feed_dict={
                X: X_te,
                y: y_te,
                training: False # Do not apply dropout
            })
            valid_acc_values.append(valid_acc)
            tr_acc_values.append(np.mean(batch_acc))
            cv_cost_values.append(cv_cost)
            tr_cost_values.append(np.mean(batch_cost))
            lr_values.append(lr)

        # Print progress
        if epoch % 5 == 0:
            print('Epoch {} - valid: {:.3f} train: {:.3f} (mean) learning rate {:.3f}'.format(
                epoch+1, valid_acc, np.mean(batch_acc), lr
            ))
    
    save_path = saver.save(sess, "./model/nn_model.ckpt")
    
    test_acc, yhat = sess.run([accuracy, predictions], feed_dict = {
        X: X_te,
        y: y_te,
        training: False
    })
    
print("Test accuracy:", test_acc)
yhat

Epoch 1 - valid: 0.588 train: 0.498 (mean) learning rate 0.100
Epoch 6 - valid: 0.588 train: 0.614 (mean) learning rate 0.100
Epoch 11 - valid: 0.588 train: 0.630 (mean) learning rate 0.100
Epoch 16 - valid: 0.588 train: 0.646 (mean) learning rate 0.100
Epoch 21 - valid: 0.588 train: 0.630 (mean) learning rate 0.100
Epoch 26 - valid: 0.588 train: 0.662 (mean) learning rate 0.100
Epoch 31 - valid: 0.588 train: 0.630 (mean) learning rate 0.100
Epoch 36 - valid: 0.588 train: 0.646 (mean) learning rate 0.080
Epoch 41 - valid: 0.588 train: 0.646 (mean) learning rate 0.080
Epoch 46 - valid: 0.588 train: 0.646 (mean) learning rate 0.080
Epoch 51 - valid: 0.588 train: 0.678 (mean) learning rate 0.080
Epoch 56 - valid: 0.588 train: 0.630 (mean) learning rate 0.080
Epoch 61 - valid: 0.588 train: 0.662 (mean) learning rate 0.080
Epoch 66 - valid: 0.588 train: 0.678 (mean) learning rate 0.064
Epoch 71 - valid: 0.588 train: 0.630 (mean) learning rate 0.064
Epoch 76 - valid: 0.588 train: 0.614 (mean

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0])

## Wisconsin Prognostic Breast Cancer (WPBC)

In [38]:
# import the data
wpbc_data = pd.read_csv(os.path.join("data", "uci", "wpbc.data"), na_values="?", header=None)

# set the column names
wpbc_data=wpbc_data.rename(columns = {0:'ID', 1:"CLASS", 2: "TIME", 3: "MEAN_RADIUS", 4: "MEAN_TEXTURE", 5: "MEAN_PERIMITER", 6: "MEAN_AREA", 7: "MEAN_SMOOTHNESS", 8: "MEAN_COMPACTNESS", 9: "MEAN_CONCAVITY", 10:"MEAN_CONCAVE_POINTS", 11: "MEAN_SYMMETRY", 12: "MEAN_FRACTAL_DIMENSIONS", 13: "RADIUS_SE", 14: "TEXTURE_SE", 15: "PERIMETER_SE", 16: "AREA_SE", 17: "SMOOTHNESS_SE", 18: "COMPACTNESS_SE", 19: "CONCAVITY_SE", 20: "CONCAVE_POINTS_SE", 21: "SYMMETRY_SE",22: "FRACTAL_DIMENSIONS_SE", 23: "WORST_RADIUS", 24: "WORST_TEXTURE", 25: "WORST_PERIMETER", 26: "WORST_AREA", 27: "WORST_SMOOTHNESS", 28: "WORST_COMPACTNESS", 29: "WORST_CONCAVITY", 30: "WORST_CONCAVE_POINTS", 31: "WORST_SYMMETRY", 32: "WORST_FRACTAL_DIMENSIONS", 33: "TUMOR_SIZE", 34: "LYMPH_STATUS"})

wpbc_data = wpbc_data.dropna(axis=0, how="any")

wpbc_data['OUTCOME'] = 0
wpbc_data['OUTCOME'][(wpbc_data.CLASS == "R") & (wpbc_data.TIME <= 24)] = 1

y_class = wpbc_data.pop("CLASS").values
y3 = wpbc_data.pop("OUTCOME").values
X3 = wpbc_data.drop(["ID","TIME"], axis=1)

# split the data
X_tr, X_te, y_tr, y_te, y_class_tr, y_class_te = train_test_split(X3.values, y3, y_class, test_size=0.2, random_state=1)
print("X_tr:", X_tr.shape)
print("X_te:", X_te.shape)
print("y_tr:", y_tr.shape)
print("y_te:", y_te.shape)
print("y_class_tr:", y_class_tr.shape)
print("y_class_te:", y_class_te.shape)

X_tr: (155, 32)
X_te: (39, 32)
y_tr: (155,)
y_te: (39,)
y_class_tr: (155,)
y_class_te: (39,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


In [39]:
pd.value_counts(y3, normalize=True)

0    0.85567
1    0.14433
dtype: float64

In [40]:
pd.value_counts(y_te, normalize=True)

0    0.820513
1    0.179487
dtype: float64

In [43]:
## MODEL 7.20.2.9j
# Create new graph
graph = tf.Graph()
model_name = "model_0.0.3"

with graph.as_default():
    # Placeholders
    X = tf.placeholder(dtype=tf.float32, shape=[None, 32])
    y = tf.placeholder(dtype=tf.int32, shape=[None])
    training = tf.placeholder(dtype=tf.bool)
    
    # create global step for decaying learning rate
    global_step = tf.Variable(0, trainable=False)
    
    # learning rate o
    epochs_per_decay = 20
    starting_rate = 0.1
    decay_factor = 0.80
    staircase = True
    
    learning_rate = tf.train.exponential_decay(starting_rate,                 # start at 0.003
                                               global_step, 
                                               steps_per_epoch * epochs_per_decay,       # 100 epochs
                                               decay_factor,                   # 0.5 decrease
                                               staircase=staircase) 
    
    # Hidden layer with 64 units
    with tf.name_scope('local1') as scope:
        hidden = tf.layers.dense(
            X,                              # input
            64,                             # 64 units
            activation=tf.nn.relu,          # activation
            kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),  # kernel initializer
            bias_initializer=tf.zeros_initializer(), # bias
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
            name='hidden'                   # name
        )

        # Apply dropout
        hidden = tf.layers.dropout(hidden, rate=0.5, seed=0, training=training)
    
    with tf.name_scope('local2') as scope:
        hidden2 = tf.layers.dense(
            hidden,                         # input
            48,                             # 48 units
            activation=tf.nn.relu,          # activation
            kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),  # kernel initializer
            bias_initializer=tf.zeros_initializer(), # bias
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
            name='hidden2'                  # name
        )
        
        hidden2 = tf.layers.dropout(hidden2, rate=0.5, seed=0, training=training)
        
    # Output layer
    logits = tf.layers.dense(
        hidden2,                       # input
        num_classes,                             # 4 units
        activation=None,               # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01),
        name='output'
    )
    
    # Loss fuction: mean cross-entropy
    mean_ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))
    
    # easier way to handle regularization loss
    loss = mean_ce + tf.losses.get_regularization_loss()
    
    # Gradient descent
    gd = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

    # Minimize loss
    train_op = gd.minimize(loss, global_step=global_step)

    # Compute predictions and accuracy
    predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
    is_correct = tf.equal(y, predictions)
    accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32))

In [44]:
# Validation accuracy
valid_acc_values = []
tr_acc_values = []
lr_values = []
cv_cost_values = []
tr_cost_values = []
config = tf.ConfigProto(device_count = {'GPU': 0})

with tf.Session(graph=graph, config=config) as sess:
    # Initialize variables
    sess.run(tf.global_variables_initializer())
    
    saver = tf.train.Saver()
    
    # Set seed
    np.random.seed(0)

    # Train several epochs
    for epoch in range(100):
        # Accuracy values (train) after each batch
        batch_acc = []
        batch_cost = []

        # Get batches of data
        for X_batch, y_batch in get_batches(X_tr, y_tr, batch_size):
            # Run training and evaluate accuracy
            _, acc_value, lr, tr_cost = sess.run([train_op, accuracy, learning_rate, loss], feed_dict={
                X: X_batch,
                y: y_batch,
                training: True # Apply dropout
            })

            # Save accuracy (current batch)
            batch_acc.append(acc_value)
            batch_cost.append(tr_cost)

            # Evaluate validation accuracy
            valid_acc, cv_cost = sess.run([accuracy, loss], feed_dict={
                X: X_te,
                y: y_te,
                training: False # Do not apply dropout
            })
            valid_acc_values.append(valid_acc)
            tr_acc_values.append(np.mean(batch_acc))
            cv_cost_values.append(cv_cost)
            tr_cost_values.append(np.mean(batch_cost))
            lr_values.append(lr)

        # Print progress
        if epoch % 5 == 0:
            print('Epoch {} - valid: {:.3f} train: {:.3f} (mean) learning rate {:.3f}'.format(
                epoch+1, valid_acc, np.mean(batch_acc), lr
            ))
    
    save_path = saver.save(sess, "./model/nn_model.ckpt")
    
    test_acc, yhat = sess.run([accuracy, predictions], feed_dict = {
        X: X_te,
        y: y_te,
        training: False
    })
    
print("Test accuracy:", test_acc)
yhat

Epoch 1 - valid: 0.744 train: 0.581 (mean) learning rate 0.100
Epoch 6 - valid: 0.821 train: 0.876 (mean) learning rate 0.100
Epoch 11 - valid: 0.821 train: 0.869 (mean) learning rate 0.100
Epoch 16 - valid: 0.821 train: 0.869 (mean) learning rate 0.100
Epoch 21 - valid: 0.821 train: 0.862 (mean) learning rate 0.100
Epoch 26 - valid: 0.821 train: 0.862 (mean) learning rate 0.100
Epoch 31 - valid: 0.821 train: 0.862 (mean) learning rate 0.100
Epoch 36 - valid: 0.821 train: 0.848 (mean) learning rate 0.100
Epoch 41 - valid: 0.821 train: 0.862 (mean) learning rate 0.100
Epoch 46 - valid: 0.821 train: 0.869 (mean) learning rate 0.100
Epoch 51 - valid: 0.821 train: 0.855 (mean) learning rate 0.100
Epoch 56 - valid: 0.821 train: 0.855 (mean) learning rate 0.100
Epoch 61 - valid: 0.821 train: 0.869 (mean) learning rate 0.080
Epoch 66 - valid: 0.821 train: 0.869 (mean) learning rate 0.080
Epoch 71 - valid: 0.821 train: 0.862 (mean) learning rate 0.080
Epoch 76 - valid: 0.821 train: 0.869 (mean

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])