# COMS 4995_002 Deep Learning Assignment 1
### This assignment can be done in groups of at most 3 students. Everyone must submit on Courseworks individually.
### Write down the UNIs of your group (if applicable)


## Member 1: Kaho Chan, kc3137
## Member 2: Yu Wang, yw3025
## Member 3: Jingxi Xu, jx2324

In [1]:
## %matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc
import glob
import sys
import tensorflow as tf
import math
import datetime

In [2]:
# Helper functions, DO NOT modify this

def get_img_array(path):
    """
    Given path of image, returns it's numpy array
    """
    return scipy.misc.imread(path)

def get_files(folder):
    """
    Given path to folder, returns list of files in it
    """
    filenames = [file for file in glob.glob(folder+'*/*')]
    filenames.sort()
    return filenames

def get_label(filepath, label2id):
    """
    Files are assumed to be labeled as: /path/to/file/999_frog.png
    Returns label for a filepath
    """
    tokens = filepath.split('/')
    label = tokens[-1].split('_')[1][:-4]
    if label in label2id:
        return label2id[label]
    else:
        sys.exit("Invalid label: " + label)

In [3]:
# Functions to load data, DO NOT change these

def get_labels(folder, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    files = get_files(folder)
    y = []
    for f in files:
        y.append(get_label(f,label2id))
    return np.array(y)

def one_hot(y, num_classes=10):
    """
    Converts each label index in y to vector with one_hot encoding
    """
    y_one_hot = np.zeros((y.shape[0], num_classes))
    y_one_hot[y] = 1
    return y_one_hot.T

def get_label_mapping(label_file):
    """
    Returns mappings of label to index and index to label
    The input file has list of labels, each on a separate line.
    """
    with open(label_file, 'r') as f:
        id2label = f.readlines()
        id2label = [l.strip() for l in id2label]
    label2id = {}
    count = 0
    for label in id2label:
        label2id[label] = count
        count += 1
    return id2label, label2id

def get_images(folder):
    """
    returns numpy array of all samples in folder
    each column is a sample resized to 30x30 and flattened
    """
    files = get_files(folder)
    images = []
    count = 0
    
    for f in files:
        count += 1
        if count % 10000 == 0:
            print("Loaded {}/{}".format(count,len(files)))
        img_arr = get_img_array(f).astype(np.float)
#         img_arr = img_arr.flatten() / 255.0
        images.append(img_arr)
    # X = np.column_stack(images)
    X = np.stack(images, axis=0)
    return X

def get_train_data(data_root_path):
    """
    Return X and y
    """
    train_data_path = data_root_path + 'train'
    id2label, label2id = get_label_mapping(data_root_path+'labels.txt')
    print(label2id)
    X = get_images(train_data_path)
    y = get_labels(train_data_path, label2id)
    return X, y

def save_predictions(filename, y):
    """
    Dumps y into .npy file
    """
    np.save(filename, y)

In [4]:
# Load the data
data_root_path = 'cifar10-hw/'
X_train, y_train = get_train_data(data_root_path) # this may take a few minutes
X_test = get_images(data_root_path + 'test')
print('Data loading done')

{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
Loaded 10000/50000
Loaded 20000/50000
Loaded 30000/50000
Loaded 40000/50000
Loaded 50000/50000
Loaded 10000/10000
Data loading done


In [5]:
print(X_train.shape)
print(y_train.shape)

(50000, 32, 32, 3)
(50000,)


In [6]:
# Group's helper function
def split(X, y, val_size):
    '''
    split the data into training and validation set
    '''
    indices = np.random.permutation(X.shape[0])
    test_num = int(val_size * X.shape[0])
    return X[indices[test_num:]], X[indices[:test_num]], y[indices[test_num:]], y[indices[:test_num]]

In [7]:
X_trn, X_val, y_trn, y_val = split(X_train, y_train, val_size=0.1)
print(X_trn.shape)
print(X_val.shape)
print(y_trn.shape)
print(y_val.shape)

(45000, 32, 32, 3)
(5000, 32, 32, 3)
(45000,)
(5000,)


In [8]:
# normalizae the input
def input_normalization(images):
    for i in range(images.shape[0]):
        old_image = images[i,:,:,:]
        new_image = (old_image - np.mean(old_image)) / np.std(old_image)
        images[i,:,:,:] = new_image

    return images

In [9]:
# image whitening
# imput normalization part
X_trn = input_normalization(X_trn)
X_val = input_normalization(X_val)
X_test = input_normalization(X_test)

In [10]:
# GLobal value
H, W, T = 32, 32, 10 # height/width of images, number of classes of images
cnns = dict()

## Part 1

Using TensorFlow, implement a Convolutional Neural Network to classify CIFAR10
- At least two Convolutional Layers followed by normalization and pooling layers. 
- Activation function ReLU.
- Optimizer: Gradient Descent
- At least one fully connected layer followed by softmax transformation.

In [11]:
class CNN():
    def __init__(self, model_fn, trainer, global_step=None):
        if global_step is None:
            tf.reset_default_graph()
            global_step = tf.Variable(0, trainable=False)
        self.X = tf.placeholder(tf.float32, [None, H, W, 3])
        self.Y = tf.placeholder(tf.int64, [None])
        self.is_training = tf.placeholder(tf.bool)
        logit, loss = model_fn(self.X, self.Y, self.is_training)
                    
        # https://stackoverflow.com/a/43285333
        extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(extra_update_ops):
            train_op = trainer.minimize(loss, global_step=global_step)
        # train_op = trainer.minimize(loss)
        # Accuracy
        correct = tf.equal(tf.argmax(logit, 1), self.Y)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        self.variables = {
            'train': [loss, correct, train_op],
            'validate': [loss, correct, accuracy],
            'test':logit}
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
            
    def batch_gen(self, Xd, Yd, batch_size, shuffle=True):
        indicies = np.arange(Xd.shape[0])
        if shuffle:
            np.random.shuffle(indicies)
        for i in range(int(math.ceil(Xd.shape[0] / batch_size))):
            start_idx = (i * batch_size) % Xd.shape[0]
            idx = indicies[start_idx:start_idx + batch_size]
            yield Xd[idx, :], Yd[idx]

    def run(self, Xd, Yd, epochs, batch_size, print_every, plot_losses, status):
        iter_cnt = 0
        for e in range(epochs):
            correct = 0
            losses = []
            for Xb, Yb in self.batch_gen(Xd, Yd, batch_size, shuffle = True):
                feed_dict = {self.X: Xb, self.Y: Yb, self.is_training: status=='train'}
                loss, corr, _ = self.sess.run(self.variables[status], feed_dict = feed_dict)
                losses.append(loss)
                correct += np.sum(corr)
                if status == 'train' and iter_cnt % print_every == 0:
                    print("{} Iter {}: batch trn loss = {:.3f}, accuracy = {:.3f}".format(
                        datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                        iter_cnt,
                        loss,
                        np.mean(corr),
                    ))
                iter_cnt += 1
            epoch_loss = np.mean(losses)
            
            epoch_accuracy = correct / Xd.shape[0]
            print("Epoch: mean loss = {:.3f}, accuracy = {:.3f}".format(epoch_loss, epoch_accuracy))
            if plot_losses:
                plt.plot(losses)
                plt.grid(True)
                plt.title('Epoch {} Mean Loss'.format(e))
                plt.xlabel('minibatch number')
                plt.ylabel('minibatch mean loss')
                plt.show()
        return epoch_loss, epoch_accuracy

    def train(self, Xd, Yd, epochs=1, batch_size=50, print_every=100, plot_losses=False):
        return self.run(Xd, Yd, epochs, batch_size, print_every, plot_losses, status='train')
            
    def validate(self, Xd, Yd, epochs=1, batch_size=50, print_every=100, plot_losses=False):
        return self.run(Xd, Yd, epochs, batch_size, print_every, plot_losses, status='validate')
    
    def predict(self, Xd):
        feed_dict = {self.X: Xd, self.is_training: False}
        logit = self.sess.run(self.variables['test'], feed_dict = feed_dict)
#         variables = [self.Y]
#         Yp = self.sess.run(Xd, None, None, None, None, None, status='test')
        return logit

## Part I 
- trn_loss: 1.598, train_acc: 0.512, test_loss: 1.173 test_acc: 0.595(0 epoch)
- trn_loss: 1.011, train_acc: 0.650, test_loss: 0.985 test_acc: 0.670(1 epoch) 
- trn_loss: 0.853, train_acc: 0.701, test_loss: 0.947 test_acc: 0.676(2 epoch)
- trn_loss: 0.749, train_acc: 0.740, test_loss: 0.888 test_acc: 0.705(3 epoch) 
- trn_loss: 0.671, train_acc: 0.766, test_loss: 0.919 test_acc: 0.695(4 epoch)
- trn_loss: 0.602, train_acc: 0.789, test_loss: 0.889 test_acc: 0.718(5 epoch) 
- trn_loss: 0.550, train_acc: 0.806, test_loss: 0.902 test_acc: 0.716(6 epoch)

In [12]:
# trn: 0.753, val: 0.651
def model_fn(layer_input, labels, is_training):
    F1 = 32
    layer_conv1 = tf.layers.conv2d(inputs=layer_input, filters=F1, kernel_size=[5, 5], padding='same', activation=tf.nn.relu)
    layer_bn1 = tf.layers.batch_normalization(inputs=layer_conv1, training=is_training)
    layer_pool1 = tf.layers.max_pooling2d(inputs=layer_bn1, pool_size=[2, 2], strides=2)
    F2 = 64
    layer_conv2 = tf.layers.conv2d(inputs=layer_pool1, filters=F2, kernel_size=[5, 5], padding='same', activation=tf.nn.relu)
    layer_bn2 = tf.layers.batch_normalization(inputs=layer_conv2, training=is_training)
    layer_pool2 = tf.layers.max_pooling2d(inputs=layer_bn2, pool_size=[2, 2], strides=2)
    flat_size = int(H / 4 * W / 4 * F2)
#     print(flat_size)
    layer_pool2_flat = tf.reshape(layer_pool2, [-1, flat_size])
    layer_dense = tf.layers.dense(inputs=layer_pool2_flat, units=1024)
    # layer_dropout = tf.layers.dropout(inputs=layer_dense, rate=0.4)
    layer_logit = tf.layers.dense(inputs=layer_dense, units=T)
    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int64), depth=10)
    loss = tf.losses.softmax_cross_entropy(
        onehot_labels=onehot_labels, logits=layer_logit)
    return layer_logit, loss

trainer = tf.train.GradientDescentOptimizer(learning_rate=0.03)
cnn = CNN(model_fn, trainer)

In [13]:
for i in range(7):
    print('train: epoch %d' % i)
    cnn.train(X_trn, y_trn, epochs=1, batch_size=50, print_every=100, plot_losses=False)
    print('validation')
    cnn.validate(X_val, y_val, epochs=1, batch_size=y_val.shape[0])    

train: epoch 0
2017-10-30 20:02:34 Iter 0: batch trn loss = 4.834, accuracy = 0.080
2017-10-30 20:02:53 Iter 100: batch trn loss = 1.959, accuracy = 0.400
2017-10-30 20:03:12 Iter 200: batch trn loss = 1.406, accuracy = 0.520
2017-10-30 20:03:32 Iter 300: batch trn loss = 1.713, accuracy = 0.460
2017-10-30 20:03:51 Iter 400: batch trn loss = 1.532, accuracy = 0.480
2017-10-30 20:04:11 Iter 500: batch trn loss = 1.313, accuracy = 0.500
2017-10-30 20:04:31 Iter 600: batch trn loss = 1.465, accuracy = 0.560
2017-10-30 20:04:50 Iter 700: batch trn loss = 1.375, accuracy = 0.560
2017-10-30 20:05:12 Iter 800: batch trn loss = 1.058, accuracy = 0.580
Epoch: mean loss = 1.598, accuracy = 0.512
validation
Epoch: mean loss = 1.173, accuracy = 0.595
train: epoch 1
2017-10-30 20:05:40 Iter 0: batch trn loss = 1.058, accuracy = 0.600
2017-10-30 20:06:01 Iter 100: batch trn loss = 1.363, accuracy = 0.540
2017-10-30 20:06:21 Iter 200: batch trn loss = 1.020, accuracy = 0.620
2017-10-30 20:06:41 Iter 

In [14]:
# predict the result, save in ans1-uni.npy
y_predict = cnn.predict(X_test)
# transpose y
save_predictions('ans1-uni.npy', y_predict.T)

In [15]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans1-uni.npy')
print(loaded_y.shape)

(10, 10000)


In [None]:
# test if your numpy file has been saved correctly
y_predict