<a href="https://colab.research.google.com/github/dauparas/game_set/blob/master/cnn_game_set.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

CNN notebook is inspired by
https://github.com/rasbt/deeplearning-models

In [1]:
import tensorflow as tf
from functools import reduce
import numpy as np
tf.test.is_gpu_available()

True

In [2]:
#Get the dataset
!git clone https://github.com/dauparas/game_set.git

Cloning into 'game_set'...
remote: Enumerating objects: 103, done.[K
remote: Counting objects: 100% (103/103), done.[K
remote: Compressing objects: 100% (103/103), done.[K
remote: Total 103 (delta 9), reused 87 (delta 0), pack-reused 0[K
Receiving objects: 100% (103/103), 28.19 MiB | 49.68 MiB/s, done.
Resolving deltas: 100% (9/9), done.


In [0]:
#create labels for the dataset; 4 categories with 3 possibilities
import numpy as np
#Read Image
filenames = [] #create a list of filenames
labels = np.zeros([1, 4]) #create an array of labels
for i in range(3):
  for j in range(3):
    for k in range(3):
      for l in range(3):
        filenames.append(str(i+1)+str(j+1)+str(k+1)+str(l+1)+'.jpg')
        _lab = np.array([i+1, j+1, k+1, l+1]).reshape(1, 4)
        labels = np.concatenate((labels, _lab), axis=0)
        
labels = labels[1:,:].astype(np.int32)-1

In [0]:
def parse_function(filename, label):
    image_string = tf.read_file("./game_set/original_81/" + filename)

    # Don't use tf.image.decode_image, or the output shape will be undefined
    image = tf.image.decode_jpeg(image_string, channels=3)

    # This will convert to float values in [0, 1]
    image = tf.image.convert_image_dtype(image, tf.float32)
    
    image = tf.image.resize_images(image, [100, 160])
    return image, label

In [0]:
def train_preprocess(image, label):
    "Data augmentation"
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_brightness(image, max_delta=32.0 / 255.0)
    image = tf.image.random_saturation(image, lower=0.5, upper=1.5)

    # Make sure the image is still in [0, 1]
    image = tf.clip_by_value(image, 0.0, 1.0)

    return image, label

In [0]:
##########################
### SETTINGS
##########################

# Hyperparameters
learning_rate = 0.001
training_epochs = 5
batch_size = 32

# Architecture
input_size = 160*100
image_width = 160
image_height = 100

# Other
random_seed = 0


g = tf.Graph()
with g.as_default():
    
    tf.set_random_seed(random_seed)
    
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    dataset = dataset.shuffle(len(filenames))
    dataset = dataset.map(parse_function, num_parallel_calls=4)
    dataset = dataset.map(train_preprocess, num_parallel_calls=4)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(1)

    iterator = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes)
    input_layer, y = iterator.get_next()
    train_init = iterator.make_initializer(dataset)	# initializer for train_data

    # Input image
    #input_layer = tf.placeholder(tf.float32, [None, image_width, image_height, 3], name='inputs')
    
    # Input correct labels
    #y = tf.placeholder(tf.int32, [None, 4], name='y')

    ###########
    # Encoder
    ###########
    
    # 160x100x1 => 160x100x24
    conv1 = tf.layers.conv2d(input_layer, filters=24, kernel_size=(5, 5),
                             strides=(1, 1), padding='same', 
                             activation=tf.nn.relu)
    # 160x100x24 => 80x50x24
    maxpool1 = tf.layers.max_pooling2d(conv1, pool_size=(2, 2), 
                                       strides=(2, 2), padding='same')
    # 80x50x24 => 80x50x12
    conv2 = tf.layers.conv2d(maxpool1, filters=12, kernel_size=(5, 5), 
                             strides=(1, 1), padding='same', 
                             activation=tf.nn.relu)
    # 80x50x24 => 40x25x12
    maxpool2 = tf.layers.max_pooling2d(conv2, pool_size=(2, 2), 
                                     strides=(2, 2), padding='same', 
                                     name='encoding')
    
    # 40x25x12 => 40x25x12
    conv3 = tf.layers.conv2d(maxpool2, filters=12, kernel_size=(5, 5), 
                             strides=(1, 1), padding='same', 
                             activation=tf.nn.relu)
    # 40x25x12 => 20x13x12
    maxpool3 = tf.layers.max_pooling2d(conv3, pool_size=(2, 2), 
                                     strides=(2, 2), padding='same', 
                                     name='encoding')
    #20x13x12 -> 20x13x12
    conv4 = tf.layers.conv2d(maxpool3, filters=12, kernel_size=(5, 5), 
                             strides=(1, 1), padding='same', 
                             activation=tf.nn.relu)
    #20x13x12 => 10x7x12
    encode = tf.layers.max_pooling2d(conv4, pool_size=(2, 2), 
                                     strides=(2, 2), padding='same', 
                                     name='encoding')
    
    
    encode = tf.reshape(encode, (tf.shape(input_layer)[0], 840))
    dense = tf.layers.dense(encode, 32, activation=tf.nn.relu)
    
    g1 = tf.layers.dense(encode, 3, activation=None) #number
    g2 = tf.layers.dense(encode, 3, activation=None) #shading
    g3 = tf.layers.dense(encode, 3, activation=None) #color
    g4 = tf.layers.dense(encode, 3, activation=None) #shape
    
    
    loss1 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y[:,0], logits=g1, name='l1'))
    loss2 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y[:,1], logits=g2, name='l2'))
    loss3 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y[:,2], logits=g3, name='l3'))
    loss4 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y[:,3], logits=g4, name='l4'))

    loss = loss1 + loss2 + loss3 + loss4
    cost = tf.reduce_mean(loss, name='cost')
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train = optimizer.minimize(cost, name='train')    
    
    # Saver to save session for reuse
    saver = tf.train.Saver()

In [9]:
#Create a TF session:
with tf.Session(graph=g) as sess:
  sess.run(tf.global_variables_initializer())
  for i in range(500):
    sess.run(train_init)
    total_loss = 0
    n_batches = 0
    try:
      while True:
        _, batch_loss = sess.run([train, loss])
        total_loss += batch_loss
        n_batches += 1
    except tf.errors.OutOfRangeError:
      pass
    if (i+1)% 10 == 0 or i == 0:
      print('Epoch: {0}, Loss: {1}'.format((i+1), total_loss))

Epoch: 1, Loss: 13.267520427703857
Epoch: 10, Loss: 7.965855598449707
Epoch: 20, Loss: 4.582868576049805
Epoch: 30, Loss: 2.8290826082229614
Epoch: 40, Loss: 2.22158682346344
Epoch: 50, Loss: 1.2740881443023682
Epoch: 60, Loss: 2.576905846595764
Epoch: 70, Loss: 1.3859631717205048
Epoch: 80, Loss: 0.5280037373304367
Epoch: 90, Loss: 0.4536770358681679
Epoch: 100, Loss: 0.22381533682346344
Epoch: 110, Loss: 0.10528392903506756
Epoch: 120, Loss: 0.13971873559057713
Epoch: 130, Loss: 0.14994891453534365
Epoch: 140, Loss: 0.07379152625799179
Epoch: 150, Loss: 0.2363655548542738
Epoch: 160, Loss: 3.797917425632477
Epoch: 170, Loss: 0.43973132222890854
Epoch: 180, Loss: 0.38372571766376495
Epoch: 190, Loss: 0.14156026393175125
Epoch: 200, Loss: 0.3214944712817669
Epoch: 210, Loss: 0.1960030561313033
Epoch: 220, Loss: 0.169401363003999
Epoch: 230, Loss: 0.04993235459551215
Epoch: 240, Loss: 0.014396531973034143
Epoch: 250, Loss: 0.009653221000917256
Epoch: 260, Loss: 0.010752371163107455
Epoc