# Section 3.2: Mixture of Gaussians

## Section 3.2.0: Package initialisations, environment configuration and function definitions

Import relevant packages:

In [1]:
import tensorflow as tf
import numpy as np

import time
import datetime
import os

# TensorFlow embedding API library
from tensorflow.contrib.tensorboard.plugins import projector

# Non-interactive plotting
import matplotlib.pyplot as plt

# Interactive plotting
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as pyo
from plotly.offline import download_plotlyjs

Configure environment:

In [2]:
%config InlineBackend.figure_format = 'retina'
np.set_printoptions(precision=3)

# Global Variables
LOG_DIR = './Logs'

# Activate Plotly Offline for Jupyter
pyo.init_notebook_mode(connected=True)

# Define global variable SEED
SEED = 521

Load data2D.npy and data100D.npy into memory:

In [3]:
"""
data2D.npy contains 10,000 data points of dimension 2
data100D.npy contains 10,000 data points of dimension 100
"""
# Load data
data2D = np.load("./Data/data2D.npy")
data100D = np.load("./Data/data100D.npy")

# Set random seed
np.random.seed(521)

# Generate random index
randIdx2D = np.arange(len(data2D))
randIdx100D = np.arange(len(data100D))

# Randomise data2D
np.random.shuffle(randIdx2D)
data2D = data2D[randIdx2D]

# Randomise data100D
np.random.shuffle(randIdx100D)
data100D = data100D[randIdx100D]

### Load results (Optional; when working resuming work session)

In [4]:
# results_2_2_2 = np.load('./Results/MoG/2_2_2.npy')
# results_2_2_3 = np.load('./Results/MoG/2_2_3.npy')
# results_2_2_4_MoG = np.load('./Results/MoG/2_2_4_MoG.npy')

## Create Mixture of Gaussian (MoG) TensorFlow graph:

### Loss function:

$$ \mathcal{L}(\mathbf{\mu}, \mathbf{\sigma}, \mathbf{\pi}) = \prod_{i=1}^B \sum_{k=1}^K \pi_k \mathcal{N} (\mathbf{x}_n \ | \ \mathbf{\mu}_k, \sigma_k^2) $$

In [5]:
'''
Builds TensorFlow graph for MoG

Input:
    K: number of clusters
    D: dimension of data (only 2 or 100 allowed)
Internal variables:
    X: input data matrix (N x D)
    Mu: cluster centres (K x D)
    sigma_sq: cluster variance (K x 1)
    log_pi: log of latent cluster variables (K x 1)
'''
def build_MoG(K, D, device='cpu'):
    '''
    Calculate log probability density function for all pairs of B data points and K clusters

    Assumptions:
        Dimensions are independent and have the same standard deviation, sigma
    Output:
        log PDF function (N x K)
    '''
    def calc_log_gaussian_cluster_k(X, Mu, sigma_sq):
        with tf.name_scope('log_gaussian_cluster'):
            # Infer dimension of data
            D = tf.shape(X)[1]

            # Calculate Mahalanobis distance
            ### Expand dim(X) to (N x 1 x D)
            ### Expand dim(Mu) to (1 x K x D)
            ### Reduce sum along the D-axis
            with tf.name_scope('mahalanobis_dist'):
                dist = - tf.divide(tf.reduce_sum(tf.square(tf.expand_dims(X, axis=1) \
                                                               - tf.expand_dims(Mu, axis=0)), axis=2),
                                   2 * tf.transpose(sigma_sq), \
                                   name='mahalanobis_dist')
                
            # Calculate log of gaussian constant term
            ### Transpose sigma_sq to (1 x K)
            with tf.name_scope('log_gauss_const'):
                log_gauss_const = - tf.multiply(tf.cast(D, tf.float32) / 2, \
                                                tf.log(2 * np.pi * tf.transpose(sigma_sq)),\
                                                name='log_gauss_const')

            # Sum results
            log_gaussian_cluster = tf.add(dist, log_gauss_const, name='log_gauss_cluster')

        return log_gaussian_cluster
    
    '''
    Calculate log probability cluster variable z given x, a.k.a. conditional responsibilities, gamma

    Output:
        conditional responsibilities (N x K)
    '''
    def calc_log_conditional_responsibilities(X, Mu, sigma_sq, log_pi):
        with tf.name_scope('log_conditional_responsibilities'):
            # Calculate unnormalised_log_posterior P(z|x)
            with tf.name_scope('unnormalised_log_posterior'):
                unnormalised_log_posterior = calc_log_gaussian_cluster_k(X, Mu, sigma_sq) + tf.transpose(log_pi)

            # Return log normalised posterior / conditional responsibilities
            with tf.name_scope('log_gamma_z'):
                cond_resp = tf.add(- tf.reduce_logsumexp(unnormalised_log_posterior, axis=1, keep_dims=True),\
                                   unnormalised_log_posterior, \
                                   name='log_gamma_z')
        return cond_resp
    
    '''
    Calculates the negative log marginal probability, -log P(X), aka the loss function for MoG

    Output:
        - log P(X) (scalar)
    '''
    def calc_neg_log_marg_prob(X, Mu, sigma_sq, log_pi):
        with tf.name_scope('loss'):
            loss = tf.negative(tf.reduce_sum(tf.reduce_logsumexp(calc_log_gaussian_cluster_k(X, Mu, sigma_sq) \
                                                                 + tf.transpose(log_pi), axis=1),\
                                             axis=0), name='-log_P_X')
        return loss
    
    '''
    Helper function to add histogram tag to variables
    Input:
        var: variable to be tagged with histogram summary
    '''
    def _add_histogram(vars_):
        for var in vars_:
            tf.summary.histogram(var.op.name, var)
    
    #######################
    ##  Function begins  ##
    #######################
    
    # Fix TF graph seed
    tf.set_random_seed(SEED)
    
    # Define computation device
    try:
        assert device == 'cpu' or device == 'gpu'
    except AssertionError:
        print 'Invalid device chosen. Please use \'cpu\' or \'gpu\''
        quit()
    device = '/' + device + ':0'
    
    with tf.device('/cpu:0'):
        # Define placeholder
        with tf.name_scope('placeholder'):
            X = tf.placeholder(tf.float32, shape=[None, D], name='inputs')
            
        # Define parameters
        with tf.variable_scope('parameters'):
            Mu = tf.get_variable('cluster_centres', shape=[K, D], \
                                initializer=tf.truncated_normal_initializer(seed=SEED))
            phi = tf.get_variable('latent_for_sigma_sq', shape=[K, 1], \
                                       initializer=tf.truncated_normal_initializer(seed=SEED))
            psi = tf.get_variable('latent_for_pi', shape=[K, 1], \
                                 initializer=tf.truncated_normal_initializer(seed=SEED + 1))
            
            # Calculate bounded variables sigma_sq and pi
            sigma_sq = tf.exp(phi, name='sigma_sq')
            
            with tf.name_scope('log_pi'):
                log_pi = tf.transpose(tf.nn.log_softmax(tf.transpose(psi)), name='log_pi') 
        
    with tf.device(device):
        # Calculate conditional responsibilities
        log_resp = calc_log_conditional_responsibilities(X, Mu, sigma_sq, log_pi)
        
        # Calculate loss
        loss = calc_neg_log_marg_prob(X, Mu, sigma_sq, log_pi)
        tf.summary.scalar('loss', loss)
        
        # Define optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate=0.01, \
                                           beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss)
        
    with tf.device('/cpu:0'):
        # Add histogram summaries for variables of interest
        _add_histogram([Mu, phi, psi, sigma_sq, log_pi, log_resp])
        
        # Merge all summaries
        merged = tf.summary.merge_all()
        
    return X, Mu, sigma_sq, log_pi, log_resp, loss, optimizer, merged

### Define training function:

In [6]:
'''
Runs MoG training algorithm
'''
def run_MoG(K_list, D, QUES_DIR, has_valid=False):
    '''
    If has_valid is true, subsets:
        first 2/3 of data as training data
        remaining 1/3 of data as validation data
    '''
    def subset_data(D):
        if D == 2:
            data = data2D
        elif D == 100:
            data = data100D
        divider = data.shape[0] * 2 / 3
        return data[:divider], data[divider:]
    
    #######################
    ##  Function begins  ##
    #######################
    '''
    cluster_centres: 11 x K x D
    train_resp:        11 x N x K
    '''
    
    # Assert correct value for D
    assert D == 2 or D == 100
    
    # Define locally global function
    MAX_ITER = 1000
    CURR_TIME = '{:%b%d %H_%M_%S}'.format(datetime.datetime.now())
    SUMMARY_DIR = CURRENT_DIR + LOG_DIR + '/MoG' + QUES_DIR + '/' + CURR_TIME
    
    # Create list to store run results
    results = []
    
    for K in K_list:
        # Clear any pre-defined graph
        tf.reset_default_graph()
        
        # Build TensorFlow graph
        X, Mu, sigma_sq, log_pi, resp, loss, optimizer, merged = build_MoG(K, D)
        
        # Select appropriate input_data
        if has_valid:
            input_data, valid_data = subset_data(D)
        else:
            input_data = data2D if D == 2 else data100D

        # Create arrays to log losses, cluster_centres, cluster_variances, pi's, and responsbility indices
        train_loss = np.array([])[:, np.newaxis]            
        if has_valid:
            valid_loss = np.array([])[:, np.newaxis]
            valid_resp = np.array([])[:, np.newaxis, np.newaxis].reshape(0, valid_data.shape[0], K)
        cluster_centres = np.array([])[:, np.newaxis, np.newaxis].reshape(0, K, D)
        cluster_variances = np.array([])[:, np.newaxis].reshape(0, K)
        pi = np.array([])[:, np.newaxis].reshape(0, K)
        train_resp = np.array([])[:, np.newaxis, np.newaxis].reshape(0, input_data.shape[0], K)
        
        # Begin session
        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
            # Log start time
            start_time = time.time()

            # Create sub-directory title
            sub_dir = '/K={},D={},valid={}'.format(K, D, has_valid)
            
            # Create summary writers
            train_writer = tf.summary.FileWriter(SUMMARY_DIR + sub_dir + '/train', graph=sess.graph)
            if has_valid:
                valid_writer = tf.summary.FileWriter(SUMMARY_DIR + sub_dir + '/valid')

            # Initialise all TensorFlow variables
            tf.global_variables_initializer().run()
            
            # Define iterator
            curr_iter = 0
            
            # Calculate training (and validation) loss, 
            # cluster centres and responsibility indices before any training
            err, summaries, clusters, variances, log_pi_prob, train_indices = \
                sess.run([loss, merged, Mu, sigma_sq, log_pi, resp], feed_dict={X:input_data})
            train_loss = np.append(train_loss, err)
            train_writer.add_summary(summaries, curr_iter)
            
            
            # Log validation data
            if has_valid:
                err, valid_indices, summaries  = sess.run([loss, resp, merged], feed_dict={X:valid_data})
                
                valid_loss = np.append(valid_loss, err)
                valid_resp = np.append(valid_resp, valid_indices[np.newaxis, :, :], axis=0)
                valid_writer.add_summary(summaries, curr_iter)
            
            # Log clusters and responsibility indices
            cluster_centres = np.append(cluster_centres, clusters[np.newaxis,:,:], axis=0)
            cluster_variances = np.append(cluster_variances, np.transpose(variances), axis=0)
            pi = np.append(pi, np.transpose(np.exp(log_pi_prob)), axis=0)
            
            train_resp = np.append(train_resp, train_indices[np.newaxis,:,:], axis=0)
            
            # Begin training
            while curr_iter < MAX_ITER:                
                # Train graph
                _, err, summaries = sess.run([optimizer, loss, merged], feed_dict={X:input_data})
            
                
                # Add training loss
                train_writer.add_summary(summaries, curr_iter + 1)
                train_loss = np.append(train_loss, err)

                # Log validation loss
                if has_valid:
                    err, valid_indices, summaries = sess.run([loss, resp, merged], feed_dict={X:valid_data})
                    valid_loss = np.append(valid_loss, err)
                    valid_resp = np.append(valid_resp, valid_indices[np.newaxis, :, :], axis=0)
                    valid_writer.add_summary(summaries, curr_iter)
                
                # Log responsibility indices and cluster centres every 10% of maximum iteration
                if ((float(curr_iter) + 1) * 100 / MAX_ITER) % 10 == 0:
                    clusters, variances, log_pi_prob, train_indices = \
                        sess.run([Mu, sigma_sq, log_pi, resp], feed_dict={X:input_data})
                    
                    cluster_centres = np.append(cluster_centres, clusters[np.newaxis, :, :], axis=0)
                    cluster_variances = np.append(cluster_variances, np.transpose(variances), axis=0)
                    pi = np.append(pi, np.transpose(np.exp(log_pi_prob)), axis=0)
                    
                    train_resp = np.append(train_resp, train_indices[np.newaxis,:,:], axis=0)
                
                # Post training progress to user, every 100 iterations
                if curr_iter % 100 == 99:
                    if not has_valid:
                        print 'iter: {:3d}, train_loss: {:3.1f}'.format(curr_iter, train_loss[curr_iter])
                    else:
                        print 'iter: {:3d}, train_loss: {:3.1f}, valid_loss: {:3.1f}'\
                                .format(curr_iter + 1, train_loss[curr_iter], valid_loss[curr_iter])
                
                curr_iter += 1
            
            # End of while loop
            print 'Max iteration reached'
            train_writer.close()
            if has_valid:
                valid_writer.close()
            
            if not has_valid:
                results.append(
                    {
                        'K': K,
                        'train_loss': train_loss,
                        'cluster_centres': cluster_centres,
                        'cluster_variances': cluster_variances,
                        'cluster_prob': pi,
                        'train_resp': train_resp,
                        'time_of_run': '{:%b%d %H_%M_%S}'.format(datetime.datetime.now())
                    }
                )
            else:
                results.append(
                {
                    'K': K,
                    'train_loss': train_loss,
                    'valid_loss': valid_loss,
                    'cluster_centres': cluster_centres,
                    'cluster_variances': cluster_variances,
                    'cluster_prob': pi,
                    'train_resp': train_resp,
                    'valid_resp': valid_resp,
                    'time_of_run': '{:%b%d %H_%M_%S}'.format(datetime.datetime.now())
                }
            )
            
            # TODO calculate convergence
            if not has_valid:
                print 'K: {:3d}, train loss: {:3.1f}, duration: {:3.1f}s\n'\
                        .format(K, train_loss[-1], time.time() - start_time)
            else:
                print 'K: {:3d}, train loss: {:3.1f}, valid loss: {:3.1f}, duration: {:3.1f}s\n'\
                        .format(K, train_loss[-1], valid_loss[-1], time.time() - start_time)
                                                                              
    print 'RUN COMPLETED'
    return results

In [7]:
'''
Runs MoG training algorithm more efficiently by not saving loss values.
    Tensorboard embedding enabled
'''
def run_MoG_v2(K_list, D, QUES_DIR, has_valid=False, device='cpu'):
    '''
    If has_valid is true, subsets:
        first 2/3 of data as training data
        remaining 1/3 of data as validation data
    '''
    def subset_data(D):
        if D == 2:
            data = data2D
        elif D == 100:
            data = data100D
        divider = data.shape[0] * 2 / 3
        return data[:divider], data[divider:]
    
    '''
    Embed data for visualization purposes
    '''
    def embed_data(D, train_writer):
        # Define input data
        input_data = data2D if D == 2 else data100D
        input_data_name = 'data{}D.npy'.format(D)
        
        # Create variable to embed
        data_to_embed = tf.Variable(input_data, name=input_data_name, trainable=False, collections=[])

        # Define projector configurations
        config = projector.ProjectorConfig()
        
        # Add embedding
        embedding = config.embeddings.add()
        
        # Connect tf.Variable to embedding
        embedding.tensor_name = data_to_embed.name

        # Evaluate tf.Variable
        sess.run(data_to_embed.initializer)
        
        # Create save checkpoint
        saver = tf.train.Saver([data_to_embed])
        saver.save(sess, SUMMARY_DIR + sub_dir + '/train/model.ckpt', MAX_ITER)

        # Write projector_config.pbtxt in LOG_DIR
        projector.visualize_embeddings(train_writer, config)
    
    #######################
    ##  Function begins  ##
    #######################
    '''
    cluster_centres: 11 x K x D
    train_resp:        11 x N x K
    '''
    
    # Assert correct value for D
    assert D == 2 or D == 100
    
    # Define locally global function
    MAX_ITER = 1500
    CURR_TIME = '{:%b%d %H_%M_%S}'.format(datetime.datetime.now())
    SUMMARY_DIR = LOG_DIR + '/MoG' + QUES_DIR + '/' + CURR_TIME
    
    # Create list to store run results
    results = []
    
    for K in K_list:
        # Clear any pre-defined graph
        tf.reset_default_graph()
        
        # Build TensorFlow graph
        X, Mu, sigma_sq, log_pi, log_resp, loss, optimizer, merged = build_MoG(K, D, device)
        
        # Select appropriate input_data
        if has_valid:
            input_data, valid_data = subset_data(D)
        else:
            input_data = data2D if D == 2 else data100D

        # Create arrays to log cluster_centres, cluster_variances, pi's, and responsbility indices
        train_loss = np.array([])[:, np.newaxis]
        if has_valid:
            valid_loss = np.array([])[:, np.newaxis]
            valid_resp = np.array([])[:, np.newaxis, np.newaxis].reshape(0, valid_data.shape[0], K)
        cluster_centres = np.array([])[:, np.newaxis, np.newaxis].reshape(0, K, D)
        cluster_variances = np.array([])[:, np.newaxis].reshape(0, K)
        cluster_pi = np.array([])[:, np.newaxis].reshape(0, K)
        train_resp = np.array([])[:, np.newaxis, np.newaxis].reshape(0, input_data.shape[0], K)
        
        # Begin session
        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
            # Log start time
            start_time = time.time()

            # Create sub-directory title
            sub_dir = '/K={},D={},valid={}'.format(K, D, has_valid)
            
            # Create summary writers
            train_writer = tf.summary.FileWriter(SUMMARY_DIR + sub_dir + '/train', graph=sess.graph)
            if has_valid:
                valid_writer = tf.summary.FileWriter(SUMMARY_DIR + sub_dir + '/valid')
                
            # Initialise all TensorFlow variables
            tf.global_variables_initializer().run()
            
            # Define iterator
            curr_iter = 0
            
            # Calculate training (and validation) loss, 
            # cluster centres and responsibility indices before any training
            err, summaries, clusters, variances, log_prior_pi, log_train_indices = \
                sess.run([loss, merged, Mu, sigma_sq, log_pi, log_resp], feed_dict={X:input_data})
            train_loss = np.append(train_loss, err)
            train_writer.add_summary(summaries, curr_iter)
            
            # Log clusters and responsibility indices
            cluster_centres = np.append(cluster_centres, clusters[np.newaxis,:,:], axis=0)
            cluster_variances = np.append(cluster_variances, np.transpose(variances), axis=0)
            cluster_pi = np.append(cluster_pi, np.exp(np.transpose(log_prior_pi)), axis=0)

            train_resp = np.append(train_resp, np.exp(log_train_indices)[np.newaxis,:,:], axis=0)
            
            # Log validation data
            if has_valid:
                err, log_valid_indices, summaries  = sess.run([loss, log_resp, merged], feed_dict={X:valid_data})
                
                valid_loss = np.append(valid_loss, err)
                valid_resp = np.append(valid_resp, np.exp(log_valid_indices)[np.newaxis, :, :], axis=0)
                valid_writer.add_summary(summaries, curr_iter)
            
            # Begin training
            while curr_iter < MAX_ITER:                
                # Train graph
                _, summaries, err = sess.run([optimizer, merged, loss], feed_dict={X:input_data})
                
                # Add training loss
                train_loss = np.append(train_loss, err)
                train_writer.add_summary(summaries, curr_iter + 1)

                # Log validation loss
                if has_valid:
                    summaries, err = sess.run([merged, loss], feed_dict={X:valid_data})
                    
                    valid_loss = np.append(valid_loss, err)
                    valid_writer.add_summary(summaries, curr_iter)
                
                # Log responsibility indices and cluster centres every 10% of maximum iteration
                if ((float(curr_iter) + 1) * 100 / MAX_ITER) % 10 == 0:
                    clusters, variances, log_prior_pi, log_train_indices = \
                        sess.run([Mu, sigma_sq, log_pi, log_resp], feed_dict={X:input_data})
                    
                    cluster_centres = np.append(cluster_centres, clusters[np.newaxis, :, :], axis=0)
                    cluster_variances = np.append(cluster_variances, np.transpose(variances), axis=0)
                    cluster_pi = np.append(cluster_pi, np.exp(np.transpose(log_prior_pi)), axis=0)
                    
                    train_resp = np.append(train_resp, np.exp(log_train_indices)[np.newaxis,:,:], axis=0)
                    
                    if has_valid:
                        log_valid_indices = sess.run(log_resp, feed_dict={X:valid_data})
                        valid_resp = np.append(valid_resp, np.exp(log_valid_indices)[np.newaxis, :, :], axis=0)
                
                # Post training progress to user, every 100 iterations
                if curr_iter % 100 == 99:
                    print 'iter: {:3d}'.format(curr_iter + 1)
                
                curr_iter += 1
            
            # End of while loop
            print 'Max iteration reached'
            
            # Embed data
            #embed_data(D, train_writer)
            
            # Close writers
            train_writer.close()
            if has_valid:
                valid_writer.close()
            
            if not has_valid:
                results.append(
                    {
                        'K': K,
                        'train_loss': train_loss,
                        'cluster_centres': cluster_centres,
                        'cluster_variances': cluster_variances,
                        'cluster_pi': cluster_pi,
                        'train_resp': train_resp,
                        'time_of_run': '{:%b%d %H_%M_%S}'.format(datetime.datetime.now())
                    }
                )
            else:
                results.append(
                {
                    'K': K,
                    'train_loss': train_loss,
                    'valid_loss': valid_loss,
                    'cluster_centres': cluster_centres,
                    'cluster_variances': cluster_variances,
                    'cluster_pi': cluster_pi,
                    'train_resp': train_resp,
                    'valid_resp': valid_resp,
                    'time_of_run': '{:%b%d %H_%M_%S}'.format(datetime.datetime.now())
                }
            )
            
            # TODO calculate convergence
            print 'K: {:3d}, duration: {:3.1f}s\n'.format(K, time.time() - start_time)
                                                                              
    print 'RUN COMPLETED'
    return results

## Section 3.2.2.2: MoG on $\textit{data2D.npy}$ without validation $(K = 3)$

In [8]:
results_2_2_2 = run_MoG_v2(K_list=[3], D=2, QUES_DIR='/Q2.2.2')

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   3, duration: 20.8s

RUN COMPLETED


### Save results

In [27]:
np.save('./Results/MoG/2_2_2.npy', results_2_2_2)

### Plot loss vs number of updates

In [9]:
def loss_IGraph(loss):
    # Define data to plot
    trace = go.Scatter(
        x = range(loss.shape[0]),
        y = loss
    )
    data = go.Data([trace])
    
    # Define layout
    layout = go.Layout(
        title = '-logP(X) vs. Number of Updates',
        xaxis = {'title': 'Updates'},
        yaxis = {'title': 'Loss'}
    )
    
    # Define figure
    figure = go.Figure(data=data, layout=layout)
    
    # Generate plot
    py.iplot(figure, filename='/ECE521: A3/Q2: Mixture of Gaussians/Q2.2_MoG_loss', sharing='private')
    return pyo.iplot(figure)

# Generate loss function graph
figure = loss_IGraph(results_2_2_2[0]['train_loss'])

## Section 3.2.2.3: MoG on $\textit{data2D.npy}$ with validation $(K = 1, 2, 3, 4, 5)$

In [10]:
results_2_2_3 = run_MoG_v2(K_list=[1, 2, 3, 4, 5], D=2, QUES_DIR='/Q2.2.3', has_valid=True)

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   1, duration: 12.2s

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   2, duration: 27.4s

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   3, duration: 29.6s

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   4, duration: 27.5s

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   5, du

### Save results

In [11]:
np.save('./Results/MoG/2_2_3.npy', results_2_2_3)

### Compare relative performance between different clusters

In [12]:
def IGraph_2_2_3(results):
    valid_loss = [result['valid_loss'][-1] for result in results]
    
    figure = {
        'data': [],
        'layout': {}
    }
    
    figure['data'].append({
        'x': [k + 1 for k in range(5)],
        'y': valid_loss,
    })
    
    figure['layout'] = {
        'title': 'MoG Model Performances on data2D.npy',
        'showlegend': False,
        'xaxis': {'title': 'K', 'dtick': 1},
        'yaxis': {'title': 'Final Validation Loss'}
    }
    
    py.iplot(figure, \
             filename='/ECE521: A3/Q2: Mixture of Gaussians/Q2.3_compare_MoG_clusters',\
             sharing='private')
    return pyo.iplot(figure)

IGraph_2_2_3(results_2_2_3)

### Generate bar chart for cluster assignment $\%$

In [13]:
'''
Generate a bar chart for each model showing percentage of data points belong to each cluster
'''
def cluster_assignment_IGraph(results, is_MoG, D, question_name):
    assert D == 2 or D == 100
    
    # Define colour list as per Plotly's default colour list
    colour_list = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
    
    # Define empty figure
    figure = {
        'data': [],
        'layout': {}
    }
    
    # Define data to plot
    for i, result in enumerate(results):
        for k in range(result['K']):
            trace = go.Bar(
                x = [i + 1],
                y = [result['cluster_pi'][-1][k]] if is_MoG == True else [result['composition'][k]],
                marker = {'color': colour_list[k]},
                name = 'Cluster {}'.format(k + 1)
            )
            figure['data'].append(trace)
    
    # Define layout
    figure['layout'] = {
        'title': 'Percentage of data points assigned to each {} cluster on data{}D.npy'\
            .format('MoG' if is_MoG == True else 'K-means', D),
        'xaxis': {'title': 'Number of clusters, K'},
        'yaxis': {'title': 'Assignment to cluster, %'},
        'barmode': 'stack',
        'showlegend': False
    }
    
    # Generate plot
    py.iplot(figure, \
             filename='/ECE521: A3/Q2: Mixture of Gaussians/Q{}_assignment_bar_chart_{}D_{}'\
                 .format(question_name, D, 'MoG' if is_MoG is True else 'K-means'),\
             sharing='private')
    
    return pyo.iplot(figure)

cluster_assignment_IGraph(results_2_2_3, is_MoG=True, D=2, question_name='2.3')

### Visualising clusters

In [14]:
'''
Final result by colouring data points by clusters generated by Mixture of Gaussian algorithm
Input:
    result:           MoG training result with validation
Notes:
    cluster_centres:  coordinates of cluster centres (K x D)
    cluter_variances: cluster variances (K)
    train_resp:       training responsibility indices for each run of K ((N*2/3) x K)
    valid_resp:       validation responsibility indices for each run of K ((N/3) x K)
'''
def visualise_MoG_clusters(result):
    '''
    Convert hex values of type string to RGB of type int
    Input:
        colour_list: numpy array of type string (numColour x 1)
    Output:
        RGB: RGB component of type int (numColour x 3)
    '''
    def _hex_to_rgb(colour_list):
        RGB = np.array([])[np.newaxis,:].reshape(0,3)
        # Split hex values into R, G, B components
        # Convert components to int and store in RGB array
        for colour in colour_list:
            RGB = np.append(RGB, np.array([int(colour[1:3], 16), \
                                           int(colour[3:5], 16), \
                                           int(colour[5:7], 16)]).reshape(1, 3), axis=0)
        return RGB

    '''
    Convert RGB of type int to hex string of format '#xxxxxx'
    Input:
        RGB: RGB component of type int (N x 3)
    Output:
        hex_colours: (N x 1)
    '''
    def _rgb_to_hex(RGB):
        hex_colours = np.array([])
        # Convert RGB ints to a single hex string
        RGB = RGB.astype(int)
        for colour in RGB:
            hex_colours = np.append(hex_colours, '#{:02X}{:02X}{:02X}'.format(colour[0], colour[1], colour[2]))
        return hex_colours

    '''
    Return the 'average' colour based on Plotly's default colour list and responsibility index
    Input:
        idx: responsibility index (N x K)
    Output:
        average_colour (N x 1)
    '''
    def get_colour_gradient(resp):
        # Assert error if there are more colours than available colours
        N = resp.shape[0]
        K = resp.shape[1]
        try:
            assert K <= colour_list.shape
        except AssertionError:
            print 'Not enough colours to colour all K clusters. Consider increasing number of colours in colour_list.'

        # Matrix multiply resp (N x K) and RGB-ed colour_list (K x 3) to obtain 'average' colour
        # Multiply max resp to whiten less certain data points
        # assigned_colour = np.matmul(resp, _hex_to_rgb(colour_list[:K]))
        assigned_colour = np.matmul(np.eye(K, dtype='int')[np.argmax(resp, axis=1)], _hex_to_rgb(colour_list[:K]))
        white_layer = np.repeat(255, N * 3).reshape(N, 3)
        
        # Append white_layer to assigned_colour on axis=2
        # pre_whitened (N x K x 2)
        pre_whitened = np.append(assigned_colour[:,:,np.newaxis], white_layer[:,:,np.newaxis], axis=2)

        # Create weights (N x 2)
        # Second layer takes the converse of the maximum responsibility (N x 1)
        weights = np.append(np.ones(N)[:,np.newaxis], 1 - np.amax(resp, axis=1)[:, np.newaxis], axis=1)

        # Conform shape of weights to shape of pre_whitened
        weights = np.transpose(np.tile(weights, (3, 1, 1)), (1, 0, 2))

        # Perform weighted-average to colours
        whitened_colour = np.average(pre_whitened, weights=weights, axis=2)

        # Return matrix of colour in hex form
        return _rgb_to_hex(whitened_colour)

    '''
    Create x- and y-coordinates for ellipses for each cluster
    Assummptions:
        Joint independence and equal marginal variances
        Dimension of data point is 2
    Returns:
        ellipse: x- and y-coordinates for K ellipses (N x K x D)
    '''
    def calc_ellipse_coordinates(centres, variances):
        # Create trace for region to encompass 95% of the points (using Chi-squared critical value)
        # Assuming joint independence and equal marginal variances
        
        # Chi-squared with df 2 and alpha=5%
        crit_val = 5.991
        
        # Calculate axes length
        axis_lengths = np.sqrt(variances * crit_val)
        
        # Calculate coordinates to trace ellipse
        t = np.arange(-np.pi, np.pi + np.pi / 50, np.pi / 50) # Parameter
        x = np.transpose(centres[:,0][:, np.newaxis]) + axis_lengths * np.cos(t)[:, np.newaxis]
        y = np.transpose(centres[:,1][:, np.newaxis]) + axis_lengths * np.sin(t)[:, np.newaxis]
        
        # Stack x- and y-coordinates along axis=2
        ellipse = np.stack([x, y], axis=2)
        
        return ellipse
    
    #######################
    ##  Function begins  ##
    #######################
    
    # Define K and divider between training and validation data
    K = result['K']
    divider = data2D.shape[0] * 2 / 3 # Anything before K is part of the training data. Anything after is part of validation data
    
    # Store cluster parameters and responsibility indices
    centres = result['cluster_centres'][-1]
    variances = result['cluster_variances'][-1]
    train_resp = result['train_resp'][-1]
    valid_resp = result['valid_resp'][-1]
    
    # Create ellipse coordinates
    ellipse = calc_ellipse_coordinates(centres, variances)
    
    # Define colour list as per Plotly's default colour list
    colour_list = np.array(['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b'])
    
    # Define blank figure
    figure = {
        'data': [],
        'layout': {}
    }
    
    # Create trace for training data points
    # Create trace for validation data points
    valid_data_trace = {
        'x': data2D[divider:][:,0],
        'y': data2D[divider:][:,1],
        'mode': 'markers',
        'hoverinfo': 'none',
        'marker': {
            'size': 4,
            'color': colour_list[np.argmax(valid_resp, axis=1)] #get_colour_gradient(valid_resp)
        }
    }
    
    # Append data traces
    figure['data'].append(valid_data_trace)
    
    for k in range(K):
        # Create trace for cluster centres
        centre_trace = {
            'x': np.round([centres[k][0]], 3),
            'y': np.round([centres[k][1]], 3),
            'name': 'Cluster {}'.format(k + 1),
            'mode': 'markers',
            'marker': {
                    'size': 12,
                    'symbol': 'diamond',
                    'color': colour_list[k],
                    'line': {'width': 3}
                }   
        }

        # Create trace for region encompassing 95% of data points
        variance_trace = {
            'x': ellipse[:,k,:][:,0],
            'y': ellipse[:,k,:][:,1],
            'hoverinfo': 'none',
            'mode': 'lines',
            'name': 'Cluster {}'.format(k + 1),
            'marker': {
                'color': colour_list[k]
            }
        }
        
        # Add cluster trace
        for trace in [centre_trace, variance_trace]:
            figure['data'].append(trace)

    # Generate figure layout
    figure['layout'] = go.Layout(
        width = 900,
        height = 900,
        showlegend = False,
        title = 'MoG Clustering Visualisation (K = {})'.format(K),
        xaxis = {'range': [-4, 4], 'autorange': False},
        yaxis = {'range': [-5, 2], 'autorange': False}
    )
    
    return pyo.iplot(figure)

In [15]:
fig2_2_3 = []
for result in results_2_2_3:
    fig2_2_3.append(visualise_MoG_clusters(result))

### Create GIFs

In [16]:
'''
Creates snapshots of animated plots, with data points coloured by clusters
Input:
    result:           MoG training result with validation
Notes:
    cluster_centres:  coordinates of cluster centres (11 x K x D)
    cluter_variances: cluster variances (11 x K)
    train_resp:       training responsibility indices for each run of K (11 x (N*2/3) x K)
    valid_resp:       validation responsibility indices for each run of K (11 x (N/3) x K)
'''
def generate_gif_images(result):
    '''
    Convert hex values of type string to RGB of type int
    Input:
        colour_list: numpy array of type string (numColour x 1)
    Output:
        RGB: RGB component of type int (numColour x 3)
    '''
    def _hex_to_rgb(colour_list):
        RGB = np.array([])[np.newaxis,:].reshape(0,3)
        # Split hex values into R, G, B components
        # Convert components to int and store in RGB array
        for colour in colour_list:
            RGB = np.append(RGB, np.array([int(colour[1:3], 16), \
                                           int(colour[3:5], 16), \
                                           int(colour[5:7], 16)]).reshape(1, 3), axis=0)
        return RGB

    '''
    Convert RGB of type int to hex string of format '#xxxxxx'
    Input:
        RGB: RGB component of type int (N x 3)
    Output:
        hex_colours: (N x 1)
    '''
    def _rgb_to_hex(RGB):
        hex_colours = np.array([])
        # Convert RGB ints to a single hex string
        RGB = RGB.astype(int)
        for colour in RGB:
            hex_colours = np.append(hex_colours, '#{:02X}{:02X}{:02X}'.format(colour[0], colour[1], colour[2]))
        return hex_colours

    '''
    Return the 'average' colour based on Plotly's default colour list and responsibility index
    Input:
        idx: responsibility index (N x K)
    Output:
        average_colour (N x 1)
    '''
    def get_colour_gradient(resp):
        # Assert error if there are more colours than available colours
        N = resp.shape[0]
        K = resp.shape[1]
        try:
            assert K <= colour_list.shape
        except AssertionError:
            print 'Not enough colours to colour all K clusters. Consider increasing number of colours in colour_list.'

        # Matrix multiply resp (N x K) and RGB-ed colour_list (K x 3) to obtain 'average' colour
        # Multiply max resp to whiten less certain data points
        # assigned_colour = np.matmul(resp, _hex_to_rgb(colour_list[:K]))
        assigned_colour = np.matmul(np.eye(K, dtype='int')[np.argmax(resp, axis=1)], _hex_to_rgb(colour_list[:K]))
        white_layer = np.repeat(255, N * 3).reshape(N, 3)

        # Append white_layer to assigned_colour on axis=2
        # pre_whitened (N x K x 2)
        pre_whitened = np.append(assigned_colour[:,:,np.newaxis], white_layer[:,:,np.newaxis], axis=2)

        # Create weights (N x 2)
        # Second layer takes the converse of the maximum responsibility (N x 1)
        weights = np.append(np.ones(N)[:,np.newaxis], 1 - np.amax(resp, axis=1)[:, np.newaxis], axis=1)

        # Conform shape of weights to shape of pre_whitened
        weights = np.transpose(np.tile(weights, (3, 1, 1)), (1, 0, 2))

        # Perform weighted-average to colours
        whitened_colour = np.average(pre_whitened, weights=weights, axis=2)

        # Return matrix of colour in hex form
        return _rgb_to_hex(whitened_colour)

    '''
    Create x- and y-coordinates for ellipses for each cluster
    Assummptions:
        Joint independence and equal marginal variances
        Dimension of data point is 2
    Returns:
        ellipse: x- and y-coordinates for K ellipses (N x K x D)
    '''
    def calc_ellipse_coordinates(centres, variances):
        # Create trace for region to encompass 95% of the points (using Chi-squared critical value)
        # Assuming joint independence and equal marginal variances
        
        # Chi-squared with df 2 and alpha=5%
        crit_val = 5.991 
        
        # Calculate axes length
        axis_lengths = np.sqrt(variances * crit_val)
        
        # Calculate coordinates to trace ellipse
        t = np.arange(-np.pi, np.pi + np.pi / 50, np.pi / 50) # Parameter
        x = np.transpose(centres[:,0][:, np.newaxis]) + axis_lengths * np.cos(t)[:, np.newaxis]
        y = np.transpose(centres[:,1][:, np.newaxis]) + axis_lengths * np.sin(t)[:, np.newaxis]
        
        # Stack x- and y-coordinates along axis=2
        ellipse = np.stack([x, y], axis=2)
        
        return ellipse
    
    #######################
    ##  Function begins  ##
    #######################
    
    # Define K and divider between training and validation data
    K = result['K']
    divider = data2D.shape[0] * 2 / 3 # Anything before K is part of the training data. Anything after is part of validation data
    
    # Store cluster parameters and responsibility indices
    centres = result['cluster_centres']
    variances = result['cluster_variances']
    train_resp = result['train_resp']
    valid_resp = result['valid_resp']
    
    # Define colour list as per Plotly's default colour list
    colour_list = np.array(['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b'])
    
    # Define blank figure
    figure = {
        'data': [],
        'layout': {}
    }
    
    # Create layout
    figure['layout'] = {
        'width': 900,
        'height': 900,
        'xaxis': {'range': [-4, 4], 'autorange': False},
        'yaxis': {'range': [-5, 2], 'autorange': False},
        'title': 'MoG Clutering Visualisation (K = {})'.format(K),
        'showlegend': False
    }
    
    # Define slider step
    slider_steps = []
    
    # Define slider ticker labels
    slider_values = ['0%', '10%', '20%', '30%', '40%', '50%', '60%', '70%', '80%', '90%', '100%']
    
    for i in range(11):
        slider_step = {
            'args': [
                [slider_values[i]],
                {'frame': {'duration': 300, 'redraw': False},
                 'mode': 'immediate',
                 'transition': {'duration': 300}}
            ],
            'label': slider_values[i],
            'method': 'animate'
        }

        # Append slider stes to slider dictionary
        slider_steps.append(slider_step)
    
    # Create snapshots
    for i in range(11):
        # Clears figure data for new snapshot
        figure['data'] = []

        # Create trace for validation data points
        valid_data_trace = {
            'x': data2D[divider:][:,0],
            'y': data2D[divider:][:,1],
            'mode': 'markers',
            'hoverinfo': 'none',
            'marker': {
                'size': 4,
                'color': get_colour_gradient(valid_resp[i])
            }
        }

        # Append data traces
        figure['data'].append(valid_data_trace)

        for k in range(K):
            # Create trace for cluster centres
            centre_trace = {
                'x': np.round([centres[i][k][0]], 3),
                'y': np.round([centres[i][k][1]], 3),
                'name': 'Cluster {}'.format(k + 1),
                'mode': 'markers',
                'marker': {
                        'size': 12,
                        'symbol': 'diamond',
                        'color': colour_list[k],
                        'line': {'width': 3}
                    }   
            }

            # Create ellipse coordinates
            ellipse = calc_ellipse_coordinates(centres[i], variances[i])
    
            # Create trace for region encompassing 95% of data points
            variance_trace = {
                'x': ellipse[:,k,:][:,0],
                'y': ellipse[:,k,:][:,1],
                'hoverinfo': 'none',
                'mode': 'lines',
                'name': 'Cluster {}'.format(k + 1),
                'marker': {
                    'color': colour_list[k]
                }
            }

            # Add cluster trace
            for trace in [centre_trace, variance_trace]:
                figure['data'].append(trace)

        # Define slider dictionary
        slider_dict = {
            'active': i, # Slider knob's relative starting location
            'pad': {'b': 10, 't': 50}, # Bottom and top padding
            'len': 1, # Slider length
            'x': 0, # Slider x-position
            'y': 0, # Slider y-position
            'yanchor': 'top', 
            'xanchor': 'left',
            'currentvalue': { # Displays current value selected by slider
                'font': {'size': 20},
                'prefix': 'Training: ',
                'visible': True,
                'xanchor': 'right'
            },
            'transition': {'duration': 300, 'easing': 'cubic-in-out'},
            'steps': slider_steps
        }
    
        # Add sliders to layout
        figure['layout']['sliders'] = [slider_dict]
        
        # Save snapshots locally
        py.plotly.image.save_as(figure, filename='Q2.2.3_K={0}_gif_{1:02d}.png'.format(K, i), \
                                width=900, height=900, scale=1)

In [17]:
# for result in results_2_2_3:
#     generate_gif_images(result)

## Section 3.2.2.4: MoG on $\textit{data100D.npy}$ with validation $(K = 1, \cdots, 15)$

In [22]:
results_2_2_4_MoG = run_MoG_v2(K_list=[1, 2, 3, 4], D=100, has_valid=True, device='gpu', QUES_DIR='/Q2.2.4')

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   1, duration: 31.8s

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   2, duration: 41.8s

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   3, duration: 48.3s

iter: 100
iter: 200
iter: 300
iter: 400
iter: 500
iter: 600
iter: 700
iter: 800
iter: 900
iter: 1000
iter: 1100
iter: 1200
iter: 1300
iter: 1400
iter: 1500
Max iteration reached
K:   4, duration: 54.7s

RUN COMPLETED


### Save results

In [23]:
np.save('./Results/MoG/2_2_4_MoG_x15.npy', results_2_2_4_MoG)

### Load K-means results for this question

In [24]:
results_2_2_4_K_means = np.load('./Results/MoG/2_2_4_K-means.npy')

### Compare relative performance between K-means and MoG models

In [25]:
def IGraph_2_2_4(K_means, MoG):
    valid_loss_K_means = [result['valid_loss'][-1] for result in K_means]
    valid_loss_MoG = [result['valid_loss'][-1] for result in MoG]
    
    figure = tools.make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=('K-means', 'MoG'))
    
    K_means_trace = {
        'x': [k + 1 for k in range(15)],
        'y': valid_loss_K_means,
        'name': 'K-means'
    }
    
    MoG_trace = {
        'x': [k + 1 for k in range(15)],
        'y': valid_loss_MoG,
        'name': 'MoG'
    }
    
    figure.append_trace(K_means_trace, 1, 1)
    figure.append_trace(MoG_trace, 2, 1)
    
    figure['layout'].update({
        'height': 500,
        'width': 800,
        'title': 'K-means and MoG Models Performances on data100D.npy',
        'xaxis1': {'title': 'Number of clusters, K', 'dtick': 1},
        'yaxis1': {'title': '$\\text{Valid. Loss, } \\mathcal{L}(\\mathbf{\\mu})$'},
        'yaxis2': {'title': '$\\text{Valid. Loss, } - P(\\mathbf{X})$'},
        'showlegend': False
    })
    
    
    
    py.iplot(figure, filename='/ECE521: A3/Q2: Mixture of Gaussians/Q2.4_K_means_vs_MoG', sharing='private')

    return pyo.iplot(figure)
    
IGraph_2_2_4(results_2_2_4_K_means, results_2_2_4_MoG)

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x1,y2 ]

