# multimodel-inference

Fork from [vashineyu/deep-learning-experiments](https://github.com/vashineyu/deep-learning-experiments/blob/master/DL_MultiModel_multiGraph/Tensorflow%26Keras_RMMonGPUS.ipynb) but consider the keras version and mnist data only

In [1]:
import os
import argparse
import keras
import tensorflow as tf
import numpy as np

from pprint import pprint, pformat
from threading import Thread
from keras import backend as K
from tensorflow.python.client import device_lib

Using TensorFlow backend.


In [2]:
def argparser():
    parser = argparse.ArgumentParser('Experiment to inference multi-model, test by mnist')
    parser.add_argument('--gpus', dest='gpus', required=True)
    parser.add_argument('--lr', dest='lr', default=1e-4, type=float, help='learning rate')
    parser.add_argument('--epochs', dest='epochs', default=100, type=int, help='epochs')
    parser.add_argument('--bz', dest='bz', default=64, type=int, help='batch size')
    parser.add_argument('--skip-log', dest='skip_log', default=5, type=int, help='check K_Logger')
    return parser

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

args = argparser().parse_args(['--gpus', '1,3'])
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
available_gpus = get_available_gpus()

print('arguments: ', args)
print('available_gpus:', pformat(available_gpus))

arguments:  Namespace(bz=64, epochs=100, gpus='1,3', lr=0.0001, skip_log=5)
available_gpus: ['/device:GPU:0', '/device:GPU:1']


## data

In [4]:
from tensorflow.examples.tutorials import mnist

mnist_data = mnist.input_data.read_data_sets("data/mnist", one_hot=True, reshape=False)
x_train, y_train = mnist_data.train.images, mnist_data.train.labels
x_test, y_test = mnist_data.test.images, mnist_data.test.labels

print('x_train.shape={}, y_train.shape={}\nx_test.shape={}, y_test.shape={}'.format(
    x_train.shape, y_train.shape, x_test.shape, y_test.shape
))

Extracting data/mnist/train-images-idx3-ubyte.gz
Extracting data/mnist/train-labels-idx1-ubyte.gz
Extracting data/mnist/t10k-images-idx3-ubyte.gz
Extracting data/mnist/t10k-labels-idx1-ubyte.gz
x_train.shape=(55000, 28, 28, 1), y_train.shape=(55000, 10)
x_test.shape=(10000, 28, 28, 1), y_test.shape=(10000, 10)


## models

In [5]:
from keras.models import load_model, Model
from keras.layers import Input, Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from keras.optimizers import Adam

def build_model(input_shape, num_classes):
    in_layer = Input(shape=input_shape)
    x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(in_layer)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    out_layer = Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs=[in_layer], outputs=[out_layer])

num_gpu = len(args.gpus.split(','))
input_shape = x_train.shape[1:]
num_classes = y_train.shape[1]
lr = [args.lr] * num_gpu
jobs_argument = []

K.clear_session()
for gpu_name, lr in zip(available_gpus, lr):
    with tf.Graph().as_default() as graph:
         with tf.device(gpu_name):
                model = build_model(input_shape, num_classes)
                model.compile(loss='categorical_crossentropy', metrics=['acc'], optimizer=Adam(lr=lr))
                jobs_argument.append((gpu_name, graph, model))

pprint(jobs_argument)

[('/device:GPU:0',
  <tensorflow.python.framework.ops.Graph object at 0x7f48f405cf60>,
  <keras.engine.training.Model object at 0x7f48e01eef28>),
 ('/device:GPU:1',
  <tensorflow.python.framework.ops.Graph object at 0x7f48f4077080>,
  <keras.engine.training.Model object at 0x7f48e00a8400>)]


## training

In [6]:
from keras.callbacks import Callback

class K_Logger(Callback):
    def __init__(self, n, gpu_id=0):
        """
        n {int} - print the log for each n epochs
        """
        self.n = n
        self.gpu_id = gpu_id

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.n == 0:
            train_loss = logs.get('loss')
            train_acc = logs.get('acc')
            val_loss = logs.get('val_loss')
            val_acc = logs.get('val_acc')
            print('GPU {} - epoch {}, loss {:.5f}, acc {:.3f}, val_loss {:.5f}, val_acc {:.3f}'.format(
                self.gpu_id, epoch, train_loss, train_acc, val_loss, val_acc
            ))

def training(gpu_name, graph, model):
    print('start - {}'.format((gpu_name, graph, model)))
    k_logger = K_Logger(args.skip_log, gpu_name)
    
    with tf.Session(graph=graph) as session:
        K.set_session(session=session)
        model.fit(x=x_train, y=y_train,
                  batch_size=args.bz,
                  epochs=args.epochs,
                  validation_data=(x_test, y_test),
                  verbose=False,
                  callbacks=[k_logger]
                 )

In [None]:
thread_jobs = []
for jid, job_arg in enumerate(jobs_argument):
    job = Thread(target=training, args=job_arg)
    thread_jobs.append(job)
    job.start()

for j in thread_jobs:
    j.join()
print('Complete')

start - ('/device:GPU:0', <tensorflow.python.framework.ops.Graph object at 0x7f48f405cf60>, <keras.engine.training.Model object at 0x7f48e01eef28>)
start - ('/device:GPU:1', <tensorflow.python.framework.ops.Graph object at 0x7f48f4077080>, <keras.engine.training.Model object at 0x7f48e00a8400>)
GPU /device:GPU:1 - epoch 0, loss 0.67887, acc 0.791, val_loss 0.17829, val_acc 0.946
GPU /device:GPU:0 - epoch 0, loss 0.70519, acc 0.787, val_loss 0.19112, val_acc 0.944
GPU /device:GPU:1 - epoch 5, loss 0.10486, acc 0.969, val_loss 0.05518, val_acc 0.982
GPU /device:GPU:0 - epoch 5, loss 0.10590, acc 0.968, val_loss 0.04984, val_acc 0.983
GPU /device:GPU:1 - epoch 10, loss 0.06970, acc 0.979, val_loss 0.03508, val_acc 0.989
GPU /device:GPU:0 - epoch 10, loss 0.06871, acc 0.979, val_loss 0.03545, val_acc 0.988
GPU /device:GPU:1 - epoch 15, loss 0.05342, acc 0.984, val_loss 0.02959, val_acc 0.989
GPU /device:GPU:0 - epoch 15, loss 0.05128, acc 0.984, val_loss 0.02928, val_acc 0.990
GPU /device: