This model uses tensorflow to implement a convolutional neural network (CNN) for image classification. A CNN was chosen due to good image processing capabilities (via convolution). The model architecture is simple. The main unit is two convolution layers, followed by a pooling and dropout layer. This unit is repeated twice, followed by a dense layer for the model output. This architecture was chosen as it (loosely) follows the inception architecture, which has historically had good performance.

### Model Build and Train

In [1]:
import tensorflow as tf
import numpy as np
import glob
import tqdm

In [2]:
def load_data(file_path):
    files = [i for i in glob.glob(path, recursive=True)]
    labels = np.array([0 if 'Picasso' in f else 1 for f in files])
    _labels = tf.one_hot(labels, depth=2, on_value=1.0, off_value=0.0)
    return files, _labels


def batch_read_and_process(files, labels, batch_size, img_h=64, img_w=64):
    images, label_index = tf.train.slice_input_producer([files, labels], shuffle=True)
    reader = tf.WholeFileReader()
    img = tf.read_file(images)
    imgs = tf.image.decode_jpeg(img, channels=3) #given rgb jpegs
    imgs_resized = tf.image.resize_images(imgs, [img_w, img_h])
    img_batch, label_batch = tf.train.batch([imgs_resized, label_index], batch_size=batch_size,
                                           allow_smaller_final_batch=True)
    return img_batch, label_batch
    

def model(images, training, drop_out=0.8, filters=32, kernel_size=[3,3], strides=1, activation=tf.nn.relu, padding='same'):
    with tf.variable_scope("conv1"):
        conv1_out = tf.layers.conv2d(images, filters, kernel_size, 
                                  strides=strides, activation=activation, padding=padding)
    with tf.variable_scope("conv2"):
        conv2_out = tf.layers.conv2d(conv1_out, filters, kernel_size, strides=strides,
                                    activation=activation, padding=padding)
    with tf.variable_scope("pool1"):
        pool1_out = tf.layers.max_pooling2d(conv2_out, pool_size=[2,2], strides=strides, padding='valid')
        
    with tf.variable_scope("dropout1"):
        drop1 = tf.layers.dropout(pool1_out, rate=drop_out, training=training)
        
    with tf.variable_scope("conv3"):
        conv3_out = tf.layers.conv2d(drop1, filters, kernel_size,
                                  strides=strides, activation=activation, padding=padding)
    with tf.variable_scope("conv4"):
        conv4_out = tf.layers.conv2d(conv3_out, filters, kernel_size,
                                     strides=strides, activation=activation, padding=padding)
    with tf.variable_scope("pool2"):
        pool2_out = tf.layers.max_pooling2d(conv4_out, pool_size=[2,2], strides=strides, padding='valid')
    with tf.variable_scope("dropout2"):
        drop2 = tf.layers.dropout(pool2_out, rate=drop_out, training=training)
        
    #we need to flatten our dropout tensor before we pass it to the dense layer
    with tf.variable_scope("dense"):
        drop_flat = tf.reshape(drop2, [-1, drop2.shape[1] * drop2.shape[2] * drop2.shape[3]])
        dense = tf.layers.dense(inputs=drop_flat, units=2)
    return dense
        
def loss(labels, logits):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels))
    return loss
    
def accu(labels, logits):
    corr_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(corr_pred, tf.float32))
    return accuracy

def train(labels, logits, batch_size, n_epochs, learning_rate=1e-3, iters_per_epoch=None):
    iter_per_epoch = iters_per_epoch
    global_step = tf.Variable(0, name='global_step',trainable=False)
    
    with tf.variable_scope("loss"):
        _loss = loss(labels, logits)
        accuracy = accu(labels, logits)
    
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.minimize(_loss, global_step=global_step)
    init = tf.global_variables_initializer()
    epoch_dict = {}
    with tf.Session() as sess:
        sess.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        try:
            for n in range(n_epochs):
                print('Starting epoch:  {}'.format(n))
                data_dict = {'loss':[], 'accuracy':[]}
                for _ in tqdm.trange(iter_per_epoch):
                    if not (coord.should_stop()):
                        o = sess.run([_loss, train_op, accuracy, global_step])
                        data_dict['loss'].append(o[0])
                        data_dict['accuracy'].append(o[2])
                epoch_dict[n] = data_dict
                epoch_loss = sum(data_dict['loss']) / len(data_dict['loss'])
                epoch_accu = sum(data_dict['accuracy']) / len(data_dict['accuracy'])
                print("Epoch: {}, Epoch Loss: {}, Epoch Accuracy: {}".format(n, epoch_loss, epoch_accu))
                

        except:
            coord.request_stop()
            raise

        coord.request_stop()
        coord.join(threads)

In [3]:
n_epochs = 15
batch_size = 25

path = 'artist_dataset/**/*.jpg'

_files, _labels = load_data(path)
iters_per_epoch = len(_files) // batch_size
images, labels = batch_read_and_process(_files, _labels, batch_size, img_h=64, img_w=64)
cnn = model(images, training=True, drop_out=0.5) #dropout rate suggested by http://papers.nips.cc/paper/4878-understanding-dropout.pdf
train(labels, cnn, batch_size, n_epochs, iters_per_epoch=iters_per_epoch)

  0%|          | 0/79 [00:00<?, ?it/s]

Starting epoch:  0


100%|██████████| 79/79 [01:16<00:00,  1.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 0, Epoch Loss: 111.60706950969333, Epoch Accuracy: 0.5458227810980398
Starting epoch:  1


100%|██████████| 79/79 [01:22<00:00,  1.04s/it]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 1, Epoch Loss: 0.6670484980450401, Epoch Accuracy: 0.582784810775443
Starting epoch:  2


100%|██████████| 79/79 [01:16<00:00,  1.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 2, Epoch Loss: 0.5984764065169081, Epoch Accuracy: 0.6754430407964731
Starting epoch:  3


100%|██████████| 79/79 [01:14<00:00,  1.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 3, Epoch Loss: 0.5577416797227497, Epoch Accuracy: 0.7108860808082774
Starting epoch:  4


100%|██████████| 79/79 [01:18<00:00,  1.00it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 4, Epoch Loss: 0.5425926228112812, Epoch Accuracy: 0.7255696235578272
Starting epoch:  5


100%|██████████| 79/79 [01:15<00:00,  1.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 5, Epoch Loss: 0.5158670491810087, Epoch Accuracy: 0.7463291181793695
Starting epoch:  6


100%|██████████| 79/79 [01:12<00:00,  1.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 6, Epoch Loss: 0.4687526939035971, Epoch Accuracy: 0.7777215185799177
Starting epoch:  7


100%|██████████| 79/79 [01:11<00:00,  1.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 7, Epoch Loss: 0.463629593577566, Epoch Accuracy: 0.7827848113035853
Starting epoch:  8


100%|██████████| 79/79 [01:14<00:00,  1.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 8, Epoch Loss: 0.4340533014339737, Epoch Accuracy: 0.8060759487031381
Starting epoch:  9


100%|██████████| 79/79 [01:20<00:00,  1.01s/it]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 9, Epoch Loss: 0.44177883297582216, Epoch Accuracy: 0.7918987296804597
Starting epoch:  10


100%|██████████| 79/79 [01:34<00:00,  1.19s/it]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 10, Epoch Loss: 0.4074190460805652, Epoch Accuracy: 0.8086075971398172
Starting epoch:  11


100%|██████████| 79/79 [01:32<00:00,  1.17s/it]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 11, Epoch Loss: 0.3833834510060805, Epoch Accuracy: 0.8237974643707275
Starting epoch:  12


100%|██████████| 79/79 [01:29<00:00,  1.14s/it]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 12, Epoch Loss: 0.3902995280827148, Epoch Accuracy: 0.8222784799865529
Starting epoch:  13


100%|██████████| 79/79 [01:32<00:00,  1.17s/it]
  0%|          | 0/79 [00:00<?, ?it/s]

Epoch: 13, Epoch Loss: 0.3591153240656551, Epoch Accuracy: 0.841012655934201
Starting epoch:  14


100%|██████████| 79/79 [01:40<00:00,  1.27s/it]

Epoch: 14, Epoch Loss: 0.34730357073153123, Epoch Accuracy: 0.8491139245938651





#### Hyperparameter Optimization

This section of the code will optimize the filter (representing number of filters/features in convolutional layers) hyperparameter. This will act to make the model more or less complex, depending on the number of filters. As far as I'm aware, there isn't a straightforward functionality in TensorFlow to automatically tune hyperparameters, so the approach here will simply search across an array of possible values to find the optimal parameter, evaluated via epoch accuracy. Due to memory/cpu constraints on my machine, I will evaluate 8 parameter valeus. 

In [21]:
#here I'm modifying the training function to not print out the results of every epoch, only the final epoch

def opt_model(images, training, drop_out=0.8, filters=32, kernel_size=[3,3], strides=1, activation=tf.nn.relu, padding='same'):
    with tf.variable_scope("conv1"):
        conv1_out = tf.layers.conv2d(images, filters, kernel_size, 
                                  strides=strides, activation=activation, padding=padding)
    with tf.variable_scope("conv2"):
        conv2_out = tf.layers.conv2d(conv1_out, filters, kernel_size, strides=strides,
                                    activation=activation, padding=padding)
    with tf.variable_scope("pool1"):
        pool1_out = tf.layers.max_pooling2d(conv2_out, pool_size=[2,2], strides=strides, padding='valid')
        
    with tf.variable_scope("dropout1"):
        drop1 = tf.layers.dropout(pool1_out, rate=drop_out, training=training)
        
    with tf.variable_scope("conv3"):
        conv3_out = tf.layers.conv2d(drop1, filters, kernel_size,
                                  strides=strides, activation=activation, padding=padding)
    with tf.variable_scope("conv4"):
        conv4_out = tf.layers.conv2d(conv3_out, filters, kernel_size,
                                     strides=strides, activation=activation, padding=padding)
    with tf.variable_scope("pool2"):
        pool2_out = tf.layers.max_pooling2d(conv4_out, pool_size=[2,2], strides=strides, padding='valid')
    with tf.variable_scope("dropout2"):
        drop2 = tf.layers.dropout(pool2_out, rate=drop_out, training=training)
        
    #we need to flatten our dropout tensor before we pass it to the dense layer
    with tf.variable_scope("dense"):
        drop_flat = tf.reshape(drop2, [-1, drop2.shape[1] * drop2.shape[2] * drop2.shape[3]])
        dense = tf.layers.dense(inputs=drop_flat, units=2)
    return dense


def hyperparameter_train(labels, logits, batch_size, n_epochs, learning_rate=1e-3, iters_per_epoch=None):
    
    iter_per_epoch = iters_per_epoch
    global_step = tf.Variable(0, name='global_step',trainable=False)
    
    with tf.variable_scope("loss"):
        _loss = loss(labels, logits)
        accuracy = accu(labels, logits)
    
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.minimize(_loss, global_step=global_step)
    init = tf.global_variables_initializer()
    epoch_dict = {}
    with tf.Session() as sess:
        sess.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        try:
            for n in range(n_epochs):
                #print('Starting epoch:  {}'.format(n))
                data_dict = {'loss':[], 'accuracy':[]}
                for _ in range(iter_per_epoch):
                    if not (coord.should_stop()):
                        o = sess.run([_loss, train_op, accuracy, global_step])
                        data_dict['loss'].append(o[0])
                        data_dict['accuracy'].append(o[2])
                epoch_dict[n] = data_dict
                epoch_loss = sum(data_dict['loss']) / len(data_dict['loss'])
                epoch_accu = sum(data_dict['accuracy']) / len(data_dict['accuracy'])
                #print("Epoch: {}, Epoch Loss: {}, Epoch Accuracy: {}".format(n, epoch_loss, epoch_accu))
                if n == (n_epochs - 1):
                    final_loss = sum(epoch_dict[n]['loss']) / len(epoch_dict[n]['loss'])
                    final_accu = sum(epoch_dict[n]['accuracy']) / len(epoch_dict[n]['accuracy'])
                    print("Final Epoch Loss: {}, Final Epoch Accuracy: {}".format(epoch_loss, epoch_accu)) 
                

        except:
            coord.request_stop()
            raise

        coord.request_stop()
        coord.join(threads)



In [22]:
filter_array = [1,3,5,10,15,20,32,64] #filter values we're going to search through

#we will wrap data and training ops in a for loop to run through filter values
#using a smaller number of epochs

n_epochs = 10
batch_size = 25

path = 'artist_dataset/**/*.jpg'



for i in filter_array:
    print("Number of filters:  {}".format(i))
    tf.reset_default_graph()
    _files, _labels = load_data(path)
    iters_per_epoch = len(_files) // batch_size
    images, labels = batch_read_and_process(_files, _labels, batch_size, img_h=64, img_w=64)
    opt_cnn = opt_model(images, filters=i, training=True, drop_out=0.5) #dropout rate suggested by http://papers.nips.cc/paper/4878-understanding-dropout.pdf
    hyperparameter_train(labels, opt_cnn, batch_size, n_epochs, iters_per_epoch=iters_per_epoch)




Number of filters:  1
Final Epoch Loss: 0.5872002772138089, Final Epoch Accuracy: 0.6708860827397697
Number of filters:  3
Final Epoch Loss: 0.551719833401185, Final Epoch Accuracy: 0.7174683582933643
Number of filters:  5
Final Epoch Loss: 0.69078717126122, Final Epoch Accuracy: 0.5625316466711745
Number of filters:  10
Final Epoch Loss: 0.5898973515516595, Final Epoch Accuracy: 0.6739240571667876
Number of filters:  15
Final Epoch Loss: 0.5149389082872415, Final Epoch Accuracy: 0.7458227887938295
Number of filters:  20
Final Epoch Loss: 0.5424101382871217, Final Epoch Accuracy: 0.7265822793109508
Number of filters:  32
Final Epoch Loss: 0.37788925367065623, Final Epoch Accuracy: 0.832911387274537
Number of filters:  64
Final Epoch Loss: 0.4134841419850724, Final Epoch Accuracy: 0.8086075911039039


Interestingly, 3 filters would probably achieve a good accuracy, given a few more training epochs. This would also reduce the complexity of the default model (32 filters)