In [1]:
import os
import sys
from tqdm import tqdm
from PIL import Image
import numpy as np

sys.path.extend(['..'])

from utils.config import process_config

import tensorflow as tf
from tensorflow.layers import (conv2d, max_pooling2d, average_pooling2d, batch_normalization, dropout, dense)
from tensorflow.nn import (relu, sigmoid, softmax, )

from sklearn.utils import shuffle

In [2]:
config_tf = tf.ConfigProto(allow_soft_placement=True)
config_tf.gpu_options.allow_growth = True
config_tf.gpu_options.per_process_gpu_memory_fraction = 0.9

In [3]:
DATA = '../data/data_clean/'
CONF = '../configs/roman.json'

In [4]:
config = process_config(CONF)
config

{'exp_name': 'data',
 'seed': 230,
 'image_size': 128,
 'train_percentage': 0.8,
 'summary_dir': '../experiments/data/summary/',
 'checkpoint_dir': '../experiments/data/checkpoint/'}

In [5]:
def normalize(image):
    return (image - image.min()) / (image.max() - image.min())

def shuffle_sim(a, b):
    assert a.shape[0] == a.shape[0], 'Shapes must be equal'
    
    ind = np.arange(a.shape[0])
    np.random.shuffle(ind)
    return a[ind], b[ind]

In [6]:
def read_train_test(path_to_data):
    data = {}
    for dset in ['train', 'test']:
        path_ = os.path.join(path_to_data, dset)
        X, Y = [], []
        classes = [d for d in os.listdir(path_) if os.path.isdir(os.path.join(path_, d))]
        classes.sort()
        
        for cl in classes:
            y = np.zeros((1, 8), dtype=np.int32)
            y[0, int(cl) - 1] = 1
            
            cl_path = os.path.join(path_, cl)
            filenames = [os.path.join(cl_path, pict) for pict in os.listdir(cl_path) if pict.endswith('.jpg')]
            
            for im in filenames:
                image = np.asarray(Image.open(im), dtype=np.float32)
                X.append(normalize(image).reshape((1, image.shape[0], image.shape[1], image.shape[2])))
                Y.append(y)
        
        a, b = shuffle_sim(np.concatenate(X), np.concatenate(Y))
        data[dset] = ([a, b])
    return data

In [7]:
EPOCHS = 100
LR = 5e-2

In [8]:
class Model():
    
    def __init__(self, config, learning_rate):
        self.lr = learning_rate
        self.sess = tf.Session()

        self.x = tf.placeholder(dtype=tf.float32, shape=(None, config.image_size, config.image_size, 3))
        self.y = tf.placeholder(dtype=tf.int32, shape=(None, 8))
        self.training = tf.placeholder(dtype=tf.bool, shape=())

        global_step = tf.Variable(1, name='global_step', trainable=False, dtype=tf.int32)
        self.step = tf.assign(global_step, global_step + 1)
        
        self.model()
        
        self.summ_writer = tf.summary.FileWriter(config.summary_dir, graph=self.sess.graph)
        self.sess.run(tf.global_variables_initializer())
    
    def block(self, inp,
              ch,
              num,
              c_ker=(3, 3),
              c_str=(1, 1),
              act=relu,
              mp_ker=(2, 2),
              mp_str=(2, 2)):
    
        with tf.variable_scope('block_' + str(num)):
            conv = conv2d(inp, ch, c_ker, strides=c_str)
            out = act(batch_normalization(conv))
            tf.summary.histogram('conv1', conv)

            conv = conv2d(out, ch, c_ker, strides=c_str)
            out = act(batch_normalization(conv))
            tf.summary.histogram('conv2', conv)

            out = max_pooling2d(out, (4, 4), strides=(2, 2))
        return out
    
    def model(self):
        with tf.name_scope('layers'):
            out = self.block(self.x, 64, 1)
#             out = self.block(out, 64, 2)
            out = self.block(out, 128, 3, c_ker=(5, 5), c_str=(2, 2))

            out = conv2d(out, 256, (5, 5), strides=(2, 2), activation=relu)

            dim = np.prod(out.shape[1:])
            out = tf.reshape(out, [-1, dim])

            dense_l = dense(out, 128, activation=relu)
            tf.summary.histogram('dense', dense_l)

            out = dropout(dense_l, rate=0.6, training=self.training)

            self.predictions = dense(out, 8, activation=softmax)
            tf.summary.histogram('pred', self.predictions)

        with tf.name_scope('metrics'):    
            amax_labels = tf.argmax(self.y, 1)
            amax_pred   = tf.argmax(self.predictions, 1)

            self.loss = tf.losses.softmax_cross_entropy(self.y, self.predictions)        
            self.acc = tf.reduce_mean(tf.cast(tf.equal(amax_labels, amax_pred), dtype=tf.float32))

            self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.loss)

            tf.summary.scalar('loss', self.loss)
            tf.summary.scalar('accuracy', self.acc)

        self.summary = tf.summary.merge_all()
        
    def train(self, dat, dat_v, epochs):
        for epoch in range(epochs):
            loss, acc, _, summary, step = self.sess.run([
                self.loss, self.acc, self.optimizer, self.summary, self.step
            ],
                                                feed_dict={
                                                    self.x: dat[0],
                                                    self.y: dat[1],
                                                    self.training: True
                                                })

            summ_writer.add_summary(summary, step)
            print('EP: {:3d}\tLOSS: {:.10f}\tACC: {:.10f}'.format(
                epoch, loss, acc))

            if epoch % 10 == 0 and epoch != 0:
                self.test(dat_v)
                
    def test(self, dat):
        loss, acc = self.sess.run([self.loss, self.acc],
                                         feed_dict={self.x: dat[0],
                                                    self.y: dat[1],
                                                    self.training: False})

        print('\tVALIDATION\tLOSS: {:.10f}\tACC: {:.10f}'.format(loss, acc))
    

In [9]:
m = Model(config, 1e-3)

In [10]:
dat = read_train_test(DATA)

In [11]:
m.train(dat['train'], dat['test'], 100)

ResourceExhaustedError: OOM when allocating tensor with shape[461,64,61,61] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node layers/block_1/max_pooling2d/MaxPool (defined at <ipython-input-8-d9e916c88f37>:37)  = MaxPool[T=DT_FLOAT, data_format="NCHW", ksize=[1, 1, 4, 4], padding="VALID", strides=[1, 1, 2, 2], _device="/job:localhost/replica:0/task:0/device:GPU:0"](layers/block_1/Relu_1)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'layers/block_1/max_pooling2d/MaxPool', defined at:
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/gen.py", line 1080, in __init__
    self.run()
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-1fcf79a0c04c>", line 1, in <module>
    m = Model(config, 1e-3)
  File "<ipython-input-8-d9e916c88f37>", line 14, in __init__
    self.model()
  File "<ipython-input-8-d9e916c88f37>", line 42, in model
    out = self.block(self.x, 64, 1)
  File "<ipython-input-8-d9e916c88f37>", line 37, in block
    out = max_pooling2d(out, (4, 4), strides=(2, 2))
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/layers/pooling.py", line 302, in max_pooling2d
    return layer.apply(inputs)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py", line 817, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 374, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py", line 757, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/keras/layers/pooling.py", line 223, in call
    data_format=conv_utils.convert_data_format(self.data_format, 4))
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 2140, in max_pool
    name=name)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 4641, in max_pool
    data_format=data_format, name=name)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/home/paperspace/anaconda3/envs/data_science/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[461,64,61,61] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node layers/block_1/max_pooling2d/MaxPool (defined at <ipython-input-8-d9e916c88f37>:37)  = MaxPool[T=DT_FLOAT, data_format="NCHW", ksize=[1, 1, 4, 4], padding="VALID", strides=[1, 1, 2, 2], _device="/job:localhost/replica:0/task:0/device:GPU:0"](layers/block_1/Relu_1)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

