In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from IPython import display
from sklearn.model_selection import train_test_split
%matplotlib inline

In [2]:
# Load the data
with np.load(os.path.join('data','cifar10-20k.npz'), allow_pickle=False) as npz_file:
    data = dict(npz_file.items())
    
y_full = data['labels']
names = data['names']
X_full = data['data'].reshape(-1, 32, 32, 3)

# normalize the data
X_full = X_full / 255

# set number of classes
num_classes = names.shape[0]

# Split data
from sklearn.model_selection import train_test_split

X_tr_full, X_te, y_tr_full, y_te = train_test_split(X_full, y_full, test_size=0.1, random_state=1)

# Split data again into training and cv
X_tr, X_cv, y_tr, y_cv = train_test_split(X_tr_full, y_tr_full, test_size=0.08, random_state=1)

print("X_tr", X_tr.shape)
print("X_cv", X_cv.shape)
print("X_te", X_te.shape)

X_tr (16560, 32, 32, 3)
X_cv (1440, 32, 32, 3)
X_te (2000, 32, 32, 3)


In [21]:
class Graph(object):

    #To build the graph when instantiated
    def __init__(self, num_classes=10, model_name='model'):
        # Initialize variables
        self.model_name = model_name
        self.num_classes = num_classes
        self.graph = tf.Graph()
        self.valid_acc_values = []
        self.valid_cost_values = []
        self.train_acc_values = []
        self.train_cost_values = []
        self.init = True
        
        with self.graph.as_default():
            # placeholders
            self.X = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3])
            self.y = tf.placeholder(dtype=tf.int32, shape=[None])
            self.training = tf.placeholder(dtype=tf.bool)
            
            # create global step for decaying learning rate
            self.global_step = tf.Variable(0, trainable=False)
    
            # Decay the learning rate
            self.learning_rate = tf.train.exponential_decay(0.001,self.global_step, 2000,0.95,staircase=True)
            
            # Convolutional layer 1 
            self.conv1 = tf.layers.conv2d(
                self.X,                           # Input data
                filters=64,                  # 64 filters
                kernel_size=(5, 5),          # Kernel size: 5x5
                strides=(1, 1),              # Stride: 2
                padding='SAME',              # "same" padding
                activation=tf.nn.relu,       # ReLU
                kernel_initializer=tf.truncated_normal_initializer(stddev=5e-2, seed=0), # Small standard deviation
                name='conv1'                  # Add name
                )
    
            # Max pooling layer 1
            self.pool1 = tf.layers.max_pooling2d(
                self.conv1,                       # Input
                pool_size=(3, 3),            # Pool size: 3x3
                strides=(2, 2),              # Stride: 2
                padding='SAME',              # "same" padding
                name='pool1'
            )
            
            self.norm1 = tf.nn.lrn(self.pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
            
            # Convolutional layer 2
            self.conv2 = tf.layers.conv2d(
                self.norm1,                       # Input
                filters=64,                  # 64 filters
                kernel_size=(5, 5),          # Kernel size: 5x5
                strides=(1, 1),              # Stride: 1
                padding='SAME',              # "same" padding
                activation=tf.nn.relu,       # ReLU
                kernel_initializer=tf.truncated_normal_initializer(stddev=5e-2, seed=0),    # Small standard deviation
                name='conv2'                 # Add name
            )
    
            self.norm2 = tf.nn.lrn(self.conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
    
            # Max pooling layer 2 (2x2, stride: 2) - TUNED
            self.pool2 = tf.layers.max_pooling2d(
                self.norm2,                       # input
                pool_size=(3, 3),            # pool size 2x2
                strides=(2, 2),              # stride 2
                padding='SAME'
            )
    
            # try dropout here
            self.pool2 = tf.layers.dropout(self.pool2, rate=0.25, seed=1, training=self.training)
    
            # Flatten output
            self.flat_output = tf.contrib.layers.flatten(self.pool2)
    
            # dropout at 50%
            self.flat_output = tf.layers.dropout(self.flat_output, rate=0.5, seed=1, training=self.training)
    
            # Fully connected layer
            self.fc1 = tf.layers.dense(
                self.flat_output,                 # input
                384,                         # 256 hidden units
                activation=tf.nn.relu,       # ReLU
                kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),
                bias_initializer=tf.zeros_initializer()
            )
    
            self.fc2 = tf.layers.dense(
                self.fc1,                 # input
                192,                         # 256 hidden units
                activation=tf.nn.relu,       # ReLU
                kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),
                bias_initializer=tf.zeros_initializer()
            )
        
            self.logits = tf.layers.dense(
                self.fc2,                         # input
                self.num_classes,                           # One output unit per category
                activation=None,             # No activation function
                kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
                bias_initializer=tf.zeros_initializer()
            )
            
            
            # Mean cross-entropy
            self.mean_ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=self.logits))
        
            # Adam optimizer
            self.gd = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
    
            # Minimize cross-entropy
            self.train_op = self.gd.minimize(self.mean_ce, global_step=self.global_step)

            # Compute predictions and accuracy
            self.predictions = tf.argmax(self.logits, axis=1, output_type=tf.int32)
            self.is_correct = tf.equal(self.y, self.predictions)
            self.accuracy = tf.reduce_mean(tf.cast(self.is_correct, dtype=tf.float32))
    
    ## Parameters - 
    # X - features
    # y - labels
    # batch_size
    # epochs
    # use_gpu
    # print_metrics - whether to print or plot results
    # init - whether to initialize a new model or restore a saved model
    # X_cv, y_cv - cross validation data (optional)    
    # print_every - how often to print out metrics, in epochs
    def train(self, X_tr, y_tr, batch_size=64, epochs=20, use_gpu=False, print_metrics=True, init=True, X_cv=None, y_cv=None, print_every=5):
        self.init = init
        
        if use_gpu:
            config = tf.ConfigProto()
            config.gpu_options.allocator_type = 'BFC'
            config.gpu_options.per_process_gpu_memory_fraction = 0.6
        else:
            config = tf.ConfigProto(device_count = {'GPU': 0})
        
        with tf.Session(graph=self.graph, config=config) as sess:
            if not print_metrics:
                # create a plot to be updated as model is trained
                f, ax = plt.subplots(1,2,figsize=(20,5))
    
            # create the saver
            saver = tf.train.Saver()
            
             # If the model is new initialize variables, else restore the session
            if self.init:
                sess.run(tf.global_variables_initializer())
            else:
                saver.restore(sess, './model/'+self.model_name+'.ckpt')
                
            # Set seed
            np.random.seed(0)
            
            # Train over multiple epochs
            for epoch in range(epochs):
                # Accuracy values (train) after each batch
                batch_acc = []
                batch_cost = []
                
                for X_batch, y_batch in self.get_batches(X_tr, y_tr, batch_size):
                    # Run training and evaluate accuracy
                    _, acc_value, cost_value = sess.run([self.train_op, self.accuracy, self.mean_ce], feed_dict={
                        self.X: X_batch,
                        self.y: y_batch,
                        self.training: True
                    })

                    # Save accuracy (current batch)
                    batch_acc.append(acc_value)
                    batch_cost.append(cost_value)
                    
                if (X_cv != None) and (y_cv != None):
                    valid_acc, valid_cost = self.evaluate(X_cv, y_cv)

                if print_metrics:
                    # Print progress every fifth epoch to keep output to reasonable amount
                    if(epoch % print_every == 0):
                        print('Epoch {:02d} - step {} - cv acc: {:.3f} - train acc: {:.3f} (mean) - cv cost: {:.3f} - lr: {:.5f}'.format(
                            epoch, step, valid_acc, np.mean(batch_acc), valid_cost, lr
                        ))
                    else:
                        self.plot_metrics()

                # save checkpoint every 10th epoch except the first
                if((epoch != 0) & (epoch % 10 == 0)):
                    self.save_model(sess, model_name)
                
                # save after the model is trained
                self.save_model(sess, model_name)
                
                if (X_cv != None):
                    return self.valid_acc_values
                else:
                    return self.train_acc_values
            
    def evaluate(self, X_cv, y_cv):
        # Evaluate validation accuracy
        valid_acc, valid_cost, lr = sess.run([self.accuracy, self.mean_ce, self.learning_rate], feed_dict={
            self.X: X_cv,
            self.y: y_cv,
            self.training: False
        })
        self.valid_acc_values.append(valid_acc)
        self.valid_cost_values.append(valid_cost)
        self.train_acc_values.append(np.mean(batch_acc))
        self.train_cost_values.append(np.mean(batch_cost))
        
        return valid_acc, valid_cost
        
    def predict(self, X_test, y):
        with tf.Session(graph=graph) as sess:
            saver.restore(sess, './model/'+self.model_name+'.ckpt')

            test_labels = sess.run([predictions], feed_dict = 
                {
                    self.X: X_test, 
                    self.training:False
                })
            
            return test_labels
    
    def plot_metrics(self):
        # draw a plot
        ax[0].cla()
        ax[0].plot(self.valid_acc_values, color="red", label="Validation")
        ax[0].plot(self.train_acc_values, color="blue", label="Training")
        ax[0].set_title('Validation accuracy: {:.3f} (mean last 3)'.format(np.mean(self.valid_acc_values[-3:])))
        ax[0].set_xlabel('epoch')
        ax[0].set_ylabel('accuracy')
        
        ax[1].cla()
        ax[1].plot(self.valid_cost_values, color="red", label="Validation")
        ax[1].plot(self.train_cost_values, color="blue", label="Training")
        ax[1].set_title('Validation cost: {:.3f} (mean last 3)'.format(np.mean(self.valid_cost_values[-3:])))
        ax[1].set_xlabel('epoch')
        ax[1].set_ylabel('cost')
        ax[1].legend()
        ax[0].legend()

        display.display(plt.gcf())
        display.clear_output(wait=True)
        
    def save_model(sess):
        print("Saving checkpoint")
        # save the model
        save_path = saver.save(sess, './model/'+self.model_name+'.ckpt')

        # Now that model is saved set init to false so we don't need to constantly retrain it
        self.init = False
        
    def get_batches(self, X, y, batch_size):
        # Shuffle X,y
        shuffled_idx = np.arange(len(y))
        np.random.shuffle(shuffled_idx)

        # Enumerate indexes by steps of batch_size
        for i in range(0, len(y), batch_size):
            # Batch indexes
            batch_idx = shuffled_idx[i:i+batch_size]
            yield X[batch_idx], y[batch_idx]

In [22]:
graph = Graph()

In [24]:
acc = graph.train(X_tr, y_tr, epochs=5, batch_size=32, use_gpu=False, print_every=1, init=True)

KeyboardInterrupt: 

In [None]:
acc