# Simple mixture density network on a regression task

In [1]:
import nbloader
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np 
import matplotlib.pyplot as plt
from demo_util import nzr
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
print ("Packages loaded")

  from ._conv import register_converters as _register_converters


Importing Jupyter notebook from [demo_util.ipynb]
Packages loaded


Data generation

In [None]:
"""
    Generate complex data for tesing mixture density network 
"""
def get_data4mdn(_xmin=-5.,_xmax=5.,_bias=0,_nsample=1e3):
    XMIN,XMAX = _xmin,_xmax
    NSAMPLE   = _nsample
    _x1 = np.float32(np.random.uniform(XMIN,XMAX,((int)(NSAMPLE/2),1)))
    _r1 = np.array([np.random.normal(scale=np.abs(i)) for i in _x1])
    _y1 = np.float32((_x1**2)+_r1*1.0) 
    _z1 = np.float32((_x1**2)-_bias+_r1*1.0) 
    _x2 = np.float32(np.random.uniform(XMIN,XMAX,((int)(NSAMPLE/2),1)))
    _r2 = np.array([np.random.normal(scale=np.abs(i)) for i in _x2])
    _y2 = np.float32(-(_x2**2)+_r2*1.0)
    _z2 = np.float32(-(_x2**2)+_bias+_r2*1.0)
    _xdata = np.concatenate((_x1,_x2),axis=0)
    _ydata = np.concatenate((_y1,_y2),axis=0)
    _zdata = np.concatenate((_z1,_z2),axis=0)
    return _xdata,_ydata,_zdata

Define MDN

In [None]:
class basic_mdn(object):
    def __init__(self,_name='',_xdim=1,_ydim=1,_hdims=[64,64],_kmix=2
                 ,_actv=tf.nn.relu,_bn=slim.batch_norm
                 ,_sigmax=2):
        self.name = _name
        self.xdim = _xdim
        self.ydim = _ydim
        self.hdims = _hdims
        self.kmix = _kmix
        self.actv = _actv 
        self.bn   = _bn
        self.sigmax = _sigmax
        """ Build model """
        self.build_model()
        """ Build graph """
        self.build_graph()
        
        """ Print """
        print ("[%s] instantiated" %(self.name))
        print (" xdim:[%d],ydim:[%d]"%(self.xdim,self.ydim))
        print ("Trainable Variables")
        for i in range(len(self.t_vars)):
            w_name  = self.t_vars[i].name
            w_shape = self.t_vars[i].get_shape().as_list()
            print (" [%02d] Name:[%s] Shape:[%s]" % (i,w_name,w_shape))
        print ("Global Variables")
        for i in range(len(self.g_vars)):
            w_name  = self.g_vars[i].name
            w_shape = self.g_vars[i].get_shape().as_list()
            print (" [%02d] Name:[%s] Shape:[%s]" % (i,w_name,w_shape))
        
    """
        Build model
    """
    def build_model(self):
        # Placeholders
        self.x = tf.placeholder(dtype=tf.float32,shape=[None,self.xdim]) # Input placeholder
        self.y = tf.placeholder(dtype=tf.float32,shape=[None,self.ydim]) # Output placeholder
        self.kp = tf.placeholder(dtype=tf.float32,shape=[]) # Keep probability 
        self.lr = tf.placeholder(dtype=tf.float32,shape=[]) # Learning rate
        self.wd = tf.placeholder(dtype=tf.float32,shape=[]) # Weight decay rate
        self.is_training = tf.placeholder(dtype=tf.bool,shape=[]) # Is training flag
        # Initailizers
        self.fully_init  = tf.random_normal_initializer(stddev=0.02)
        self.bias_init   = tf.constant_initializer(0.)
        self.bn_init     = {'beta': tf.constant_initializer(0.),
                           'gamma': tf.random_normal_initializer(1., 0.02)}
        self.bn_params   = {'is_training':self.is_training,'decay':0.9,'epsilon':1e-5,
                           'param_initializers':self.bn_init,'updates_collections': None}
        """ Build graph """ 
        with tf.variable_scope('W',reuse=False) as scope:
            with slim.arg_scope([slim.fully_connected],activation_fn=self.actv,
                                weights_initializer=self.fully_init,biases_initializer=self.bias_init,
                                normalizer_fn=self.bn,normalizer_params=self.bn_params,
                                weights_regularizer=slim.l2_regularizer(self.wd)):
                _net = self.x # Here comes the input
                for h_idx in range(len(self.hdims)): # Loop over hidden layers
                    _hdim = self.hdims[h_idx]
                    _net = slim.fully_connected(_net,_hdim,scope='lin'+str(h_idx))
                    _net = slim.dropout(_net,keep_prob=self.kp,is_training=True,scope='dr'+str(h_idx))  
                # Class allocation probability 
                _pi_logits = slim.fully_connected(_net,self.kmix,scope='pi_logits')
                _pi = tf.nn.softmax(_pi_logits,dim=1)
                # means (data x dim x mixture)
                _mu = slim.linear(_net,self.kmix*self.ydim,scope='mu_flatten')
                _mu = tf.reshape(_mu,shape=[-1,self.ydim,self.kmix])
                # varainces (data x dim x mixture)
                _sigma_logits = slim.fully_connected(_net,self.kmix*self.ydim,scope='sigma_logits')
                _sigma = self.sigmax*tf.nn.sigmoid(_sigma_logits)
                _sigma = tf.reshape(_sigma,shape=[-1,self.ydim,self.kmix])
                self.pi = _pi
                self.mu = _mu
                self.sigma = _sigma
        """ Get trainable and global variables """
        _t_vars = tf.trainable_variables()
        self.t_vars = [var for var in _t_vars if 'W/' in var.name]
        _g_vars = tf.global_variables()
        self.g_vars = [var for var in _g_vars if 'W/' in var.name]
        
    """
        Build Graph
    """
    def build_graph(self):
        """ Likelihood of a gaussian mixture model """
        y = self.y
        pi = self.pi
        mu = self.mu
        sigma = self.sigma
        yrepeat = tf.tile(y[:,:,tf.newaxis],[1,1,self.kmix]) # (N x D x K)
        self.quadratics = -0.5*tf.reduce_sum(((yrepeat-mu)/sigma)**2,axis=1) # (N x K)
        self.logdet = -0.5*tf.reduce_sum(tf.log(sigma),axis=1) # (N x K)
        self.logconstant = - 0.5*self.ydim*tf.log(2*np.pi) # (1)
        self.logpi = tf.log(pi) # (N x K)
        self.exponents = self.quadratics + self.logdet + self.logconstant + self.logpi
        self.logprobs = tf.reduce_logsumexp(self.exponents,axis=1) # (N)
        self.gmm_prob = tf.exp(self.logprobs) # (N)
        self.gmm_nll  = -tf.reduce_mean(self.logprobs) # (1)
        
        """ Loss and optimizer """
        self.loss = self.gmm_nll + sum(tf.losses.get_regularization_losses())
        self.optm = tf.train.AdamOptimizer(learning_rate=self.lr
            , beta1=0.9, beta2=0.999, epsilon=0.01).minimize(self.loss)
    
    def train(self,_sess,_x,_y,max_iter=1000,batch_size=128,print_period=100,plot_period=100,
              lr=1e-4,kp=1.0,wd=.0,is_training=True):
        from sklearn.utils import shuffle # For batch learning
        num_batches = max(_x.shape[0] // batch_size, 1)
        batch_size = _x.shape[0] // num_batches
        for i in range(max_iter+1):
            _x,_y = shuffle(_x,_y)
            for j in range(num_batches):
                start = j * batch_size
                end = (j + 1) * batch_size
                feeds = {self.x:_x[start:end,:],self.y:_y[start:end,:]
                         ,self.kp:kp,self.lr:lr,self.wd:wd,self.is_training:is_training}
                _sess.run(self.optm,feeds)
            feeds = {self.x:_x,self.y:_y,self.kp:kp,self.lr:lr,self.wd:wd,
                     self.is_training:is_training}
            # Plot current result
            if (i%plot_period)==0:
                nSample = 1
                ytest = self.sampler(_sess=_sess,_x=_x,n_samples=nSample)
                plt.figure(figsize=(5,3))
                plt.plot(_x,_y[:,0],'ro',alpha=0.3)
                plt.plot(_x,_y[:,1],'bo',alpha=0.3)
                for i in range(nSample):
                    plt.plot(_x,ytest[:,0,i],'rx')
                    plt.plot(_x,ytest[:,1,i],'bx')
                plt.title("[%d/%d]"%(i,max_iter))
                plt.show()
            # Print current result
            if (i%print_period)==0:
                loss = _sess.run(self.loss,feeds)
                print("[%d/%d] loss : %.3e"%(i,max_iter,loss))
        
    """
        Sampler 
    """
    def sampler(self,_sess,_x,n_samples=10):
        pi, mu, sigma = _sess.run([self.pi, self.mu, self.sigma],
                                  feed_dict={self.x:_x,self.kp:1.0,self.is_training:False})
        n_points = _x.shape[0]
        _y_sampled = np.zeros([n_points,self.ydim,n_samples])
        for i in range(n_points):
            for j in range(n_samples):
                k = np.random.choice(self.kmix,p=pi[i,:])
                _y_sampled[i,:,j] = mu[i,:,k] + np.random.randn(1,self.ydim)*sigma[i,:,k]
        return _y_sampled
        
print ("Class ready")

Generate training data for MDN

In [None]:
if __name__ == "__main__":
    """ Get Data """
    xdata,_ydata,_zdata = get_data4mdn(_xmin=-5.,_xmax=5.,_bias=20,_nsample=1e3)
    ydata = np.concatenate((_ydata,_zdata),axis=1)
    """ Print """
    print 'xdata.shape: ',xdata.shape
    print 'ydata.shape: ',ydata.shape
    """ Plot """
    plt.figure(figsize=(6,3))
    plt.plot(xdata,ydata[:,0],'ro',alpha=0.3)
    plt.plot(xdata,ydata[:,1],'bo',alpha=0.3)
    plt.title('Training data')
    plt.show()

Instantiate MDN class

In [None]:
if __name__ == "__main__":
    tf.reset_default_graph()
    M = basic_mdn(_name='MDN',_xdim=1,_ydim=2,_hdims=[64,64],
                  _kmix=10,_actv=tf.nn.tanh,_bn=slim.batch_norm)

Train

In [None]:
if __name__ == "__main__":
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    M.train(_sess=sess,_x=xdata,_y=ydata,
            max_iter=1000,batch_size=128,print_period=100,plot_period=100,
            lr=1e-4,kp=1.0,wd=.0,is_training=True)

Sample

In [None]:
if __name__ == "__main__":
    ytest = M.sampler(_sess=sess,_x=xdata,n_samples=3)
    plt.figure(figsize=(14,6))
    plt.plot(xdata,ydata[:,0],'ro',alpha=0.3)
    plt.plot(xdata,ydata[:,1],'bo',alpha=0.3)
    for i in range(3):
        plt.plot(xdata,ytest[:,0,i],'rx')
        plt.plot(xdata,ytest[:,1,i],'bx')
    plt.title('Training data')
    plt.show()