In [1]:
%matplotlib inline
import numpy as np
import itertools
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import random
import matplotlib.pyplot as plt
from tensorflow.contrib.layers import flatten
from PIL import Image, ImageOps
from scipy.ndimage.interpolation import shift 
from IPython.display import Image as Im 
from sklearn.utils import shuffle
import sklearn
import pandas

# Our "library"
from data_augmentation import *

In [2]:
pickle_file = '../dataset/arbimon_0.pickle'

In [3]:
def execute(aug_shifts, pickle_file):
    print("=====================")
    print("Aug Shifts: " + str(aug_shifts + 1))
    
    with open(pickle_file, 'rb') as f:
        save = pickle.load(f)
        train_dataset = save['train_dataset']
        train_labels = save['train_labels']
        test_dataset = save['test_dataset']
        test_labels = save['test_labels']
        del save

    print('Original Training Set Shape: ', train_dataset.shape, train_labels.shape)
    print('Original Test Set Shape: ', test_dataset.shape, test_labels.shape)
    
    augmented_train_dataset, augmented_train_labels = combined_augmentation(train_dataset, aug_shifts, train_labels)
    augmented_test_dataset, augmented_test_labels = combined_augmentation(test_dataset, aug_shifts, test_labels)    
    
    print()
    print('Augmented Training Set Shape: ', augmented_train_dataset.shape, augmented_train_labels.shape)
    print('Augmented Test Set Shape: ', augmented_test_dataset.shape, augmented_test_labels.shape)
    
    augmented_train_dataset = reformat(augmented_train_dataset)
    augmented_test_dataset = reformat(augmented_test_dataset)
    test_dataset = reformat(test_dataset)
    
    X_train = np.pad(augmented_train_dataset, ((0,0),(2,2),(2,2),(0,0)), 'constant')
    X_test = np.pad(augmented_test_dataset, ((0,0),(2,2),(2,2),(0,0)), 'constant')
    test_dataset = np.pad(test_dataset, ((0,0), (2,2), (2,2), (0,0)), 'constant')
    
    y_train = augmented_train_labels
    y_test = augmented_test_labels
    
    X_train, y_train = shuffle(X_train, y_train)
    
    EPOCHS = 50
    BATCH_SIZE = 5
    rate = 0.05
 
    x = tf.placeholder(tf.float32, (None, 32, 32, 1))
    y = tf.placeholder(tf.int32, (None))
    one_hot_y = tf.one_hot(y, 21)
    
    logits = LeNet(x)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = one_hot_y)
    loss_operation = tf.reduce_mean(cross_entropy)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = rate)
    training_operation = optimizer.minimize(loss_operation)
    
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
    accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    def evaluate(X_data, y_data):
        sess = tf.get_default_session()
        accuracy = sess.run(accuracy_operation, feed_dict={x: X_data, y: y_data})
        return accuracy
    
    augmented_rendimiento = []
    augmented_confusion_matrices = []
    
    non_augmented_rendimiento = []
    non_augmented_confusion_matrices = []
    
    for i in range(5):
        print()
        print("Sample #",  str(i+1))
        augmented_prediction_labels = []
        non_augmented_prediction_labels = []

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            num_examples = len(X_train)

            for i in range(EPOCHS):
                X_train, y_train = shuffle(X_train, y_train)
                for offset in range(0, num_examples, BATCH_SIZE):
                    end = offset + BATCH_SIZE
                    batch_x, batch_y = X_train[offset:end], y_train[offset:end]
                    sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})

                train_accuracy = evaluate(X_train, y_train)
                if (i%10 == 0):
                    print("EPOCH {} ".format(i+1))

            probs = tf.nn.softmax(logits)
            
            augmented_test_accuracy = evaluate(X_test, y_test)
            augmented_predictions = sess.run(probs, feed_dict={x: X_test, y: y_test})
            print("Augmented Test Accuracy = {:.3f}".format(augmented_test_accuracy))
            
            non_augmented_test_accuracy = evaluate(test_dataset, test_labels)
            non_augmented_predictions = sess.run(probs, feed_dict={x: test_dataset, y: test_labels})
            print("Non-Augmented Test Accuracy = {:.3f}".format(non_augmented_test_accuracy))

        for prediction in  augmented_predictions:
            augmented_prediction_labels.append(np.argmax(prediction))
            
        with tf.Session() as sess:
            augmented_cm = sklearn.metrics.confusion_matrix(y_test, augmented_prediction_labels)
           
        
        for prediction in  non_augmented_predictions:
            non_augmented_prediction_labels.append(np.argmax(prediction))
        
        with tf.Session() as sess:
            non_augmented_cm = sklearn.metrics.confusion_matrix(test_labels, non_augmented_prediction_labels)
            
        augmented_rendimiento.append(augmented_test_accuracy)
        augmented_confusion_matrices.append([augmented_cm])
        
        non_augmented_rendimiento.append(non_augmented_test_accuracy)
        non_augmented_confusion_matrices.append([non_augmented_cm])
        
    augmented_confusion_data.loc[len(augmented_confusion_data)] = augmented_confusion_matrices
    augmented_performance.loc[len(augmented_performance)] = augmented_rendimiento
    
    non_augmented_confusion_data.loc[len(non_augmented_confusion_data)] = non_augmented_confusion_matrices
    non_augmented_performance.loc[len(non_augmented_performance)] = non_augmented_rendimiento

In [4]:
augmented_confusion_data = pandas.DataFrame(columns = list('12345'))
augmented_performance = pandas.DataFrame(columns = list('12345'))

non_augmented_confusion_data = pandas.DataFrame(columns = list('12345'))
non_augmented_performance = pandas.DataFrame(columns = list('12345'))


for i in range(22):
    execute(aug_shifts = i, pickle_file = pickle_file)
    
augmented_performance.to_pickle('results/combined_augmented_test/performance_combined_augmented_test.pkl')
augmented_confusion_data.to_pickle('results/combined_augmented_test/confusion_matrices_combined_augmented_test.pkl')

non_augmented_confusion_data.to_pickle('results/combined_non_augmented_test/confusion_matrices_combined_non_augmented_test.pkl')
non_augmented_performance.to_pickle('results/combined_non_augmented_test/performance_combined_non_augmented_test.pkl')

Aug Shifts: 1
Original Training Set Shape:  (165, 28, 28) (165,)
Original Test Set Shape:  (82, 28, 28) (82,)

Augmented Training Set Shape:  (1320, 28, 28) (1320,)
Augmented Test Set Shape:  (656, 28, 28) (656,)


NameError: name 'LeNet' is not defined