In [1]:
import mxnet as mx
import numpy as np
from sklearn.cluster import KMeans
import time

  from ._conv import register_converters as _register_converters


In [2]:
def quantize(data,shrink=16):
    shape=data.shape
    result=np.zeros(shape)
    nclusters=shape[0]/shrink
    q_indices=np.zeros((shape[0],shape[1]))
    #q_indices_onehot=mx.nd.zeros((shape[0],shape[1]*nclusters))
    cluster_centers=np.zeros((nclusters, shape[1], shape[2], shape[3]))
    
    for channel in range(shape[1]):
        c_data=data[:,channel,:,:]
        cshape=c_data.shape
        c_data_shaped=c_data.reshape((cshape[0], cshape[1]*cshape[2]))
        
        estimator = KMeans(n_clusters=nclusters)
        estimator.fit(c_data_shaped.asnumpy())
        
        indices = estimator.predict(X=c_data_shaped.asnumpy())
        data_quantized = np.array([estimator.cluster_centers_[idx] for idx in indices])

        cluster_centers[:,channel,:,:] = estimator.cluster_centers_.reshape(nclusters,cshape[1],cshape[2])
        q_indices[:,channel]=indices
        
        result[:,channel,:,:]=data_quantized.reshape(cshape)
    
    return result, cluster_centers, q_indices

def get_onehot(data,nclusters, batch_size):
    index_mat= mx.nd.one_hot(mx.nd.array(data),depth=nclusters).reshape(0,-1)
    return  mx.nd.broadcast_axes(mx.nd.expand_dims(index_mat,axis=0),axis=0, size=batch_size)

def convolve_codebook_lighter(data, filters, indices, fshape, output_shape):

    #fshape  = codebookshape #4,16,3,3
    #print filters.shape
    #print fshape[0]*fshape[1]
    #filters = mx.sym.transpose(filters, axes=(1,0,2,3)).reshape((-1,1,0, 0)) #TODO: transpose is unnecessary!!
    res = mx.nd.Convolution(data=data, weight=filters, num_group=fshape[1], num_filter=fshape[0]*fshape[1],
                            no_bias=True, kernel=(3,3))
    res = res.reshape((0,0,-1)) #flatten the image for matmul lookup
    
    res = mx.nd.batch_dot(lhs=indices,rhs=res)
    
    res = res.reshape((0,0,output_shape[2],output_shape[3]))
    
    return res


In [3]:
def convolve_codebook_lighter_lookup(data, filters, indices, fshape, output_shape, outputholder):
    #print outputholder.shape
    #fshape  = codebookshape #4,16,3,3
    #print filters.shape
    #print fshape[0]*fshape[1]
    #filters = mx.sym.transpose(filters, axes=(1,0,2,3)).reshape((-1,1,0, 0)) #TODO: transpose is unnecessary!!
    res = mx.nd.Convolution(data=data, weight=filters, num_group=fshape[1], num_filter=fshape[0]*fshape[1],
                            no_bias=True, kernel=(3,3))
    #res = res.reshape((0,0,-1)) #flatten the image for matmul lookup
    
    for iidx in range(res.shape[0]):
        #am1=mx.nd.take(res[iidx], indices=indices)
        #print am1.shape
        outputholder[0]=mx.nd.sum(mx.nd.take(res[iidx], indices=indices), axis=1)
        
    #res = outputholder.reshape((0,0,output_shape[2],output_shape[3]))

    return outputholder


In [4]:
def modify_indices(indices,n_cluster):
    shape=indices.shape
    res=mx.nd.zeros(shape)
    for ch in range(shape[0]):
        for idc in range(shape[1]):
            res[ch,idc] = indices[ch,idc]+ idc*n_cluster
    return res

In [7]:
batch_size=1
shrink = 8
img=mx.nd.random.uniform(0, 1, shape=(1,16,32,32))
fshape=(32,16,3,3)
nclusters=fshape[0]/shrink
orig_filter=mx.nd.random.uniform(0, 1, shape=fshape)


qfilter, codebook_filter, indices = quantize(orig_filter, shrink=shrink)
indices = mx.nd.array(indices)
qfilter = mx.nd.array(qfilter)
onehot_indices = get_onehot(indices,indices.shape[0]/shrink, batch_size=batch_size)
flat_codebook_filter=  mx.nd.transpose(mx.nd.array(codebook_filter), axes=(1,0,2,3)).reshape((-1,1,0, 0))

codebookshape=codebook_filter.shape
indices_shape=indices.shape
data_iter = mx.io.NDArrayIter(img, batch_size= batch_size)

outputholder = mx.nd.zeros(((batch_size,fshape[0],30,30)))
mod_ind=modify_indices(indices,nclusters)

In [None]:
begin =  time.time()

mx.profiler.set_config(profile_all=True,
                        filename='original1.json',  # File used for chrome://tracing visualization
                        continuous_dump=True,
                        aggregate_stats=True)
mx.profiler.set_state('run')

for i in range(1000):
    result_original = mx.nd.Convolution(data=img,weight=qfilter, num_filter=fshape[0], kernel=(3,3), no_bias=True).asnumpy()

mx.profiler.set_state('stop')

print time.time() - begin
print mx.profiler.dumps()

In [None]:
begin =  time.time()
mx.profiler.set_config(profile_all=True,
                        filename='clustered1.json',  # File used for chrome://tracing visualization
                        continuous_dump=True,
                        aggregate_stats=True,
                      profile_symbolic=True)
mx.profiler.set_state('run')

for i in range(1000):
    result_clustered = convolve_codebook_lighter(data=img, filters = flat_codebook_filter, indices=onehot_indices, fshape = codebookshape,
                               output_shape=(batch_size,fshape[0],30,30)).asnumpy()
    
mx.profiler.set_state('stop')

print time.time() - begin
print mx.profiler.dumps()

In [None]:
print np.mean(np.square(result_clustered - result_original))

In [8]:
begin =  time.time()
mx.profiler.set_config(profile_all=True,
                        filename='clustered1.json',  # File used for chrome://tracing visualization
                        continuous_dump=True,
                        aggregate_stats=True,
                      profile_symbolic=True)
mx.profiler.set_state('run')


for i in range(1000):
    result_clustered_2 = convolve_codebook_lighter_lookup(data=img, filters = flat_codebook_filter, 
                               indices=mod_ind,fshape = codebookshape,
                               output_shape=(batch_size,fshape[0],30,30), outputholder=outputholder).asnumpy()
    
mx.profiler.set_state('stop')
    
print time.time() - begin
print mx.profiler.dumps()

9.18899106979

Profile Statistics.
	Note that counter items are counter values and not time units.
Device Storage
Name                          Total Count        Time (ms)    Min Time (ms)    Max Time (ms)    Avg Time (ms)
----                          -----------        ---------    -------------    -------------    -------------
Memory: cpu/0                        8113        2075.6321           0.0160        4264.4321        2132.2080

MXNET_C_API
Name                          Total Count        Time (ms)    Min Time (ms)    Max Time (ms)    Avg Time (ms)
----                          -----------        ---------    -------------    -------------    -------------
MXNDArrayGetContext                  1024           0.4750           0.0000           0.0010           0.0005
MXNDArrayGetStorageType                 1           0.0010           0.0010           0.0010           0.0010
MXNDArrayGetDType                    2060           0.9370           0.0000           0.0080           

In [None]:
print np.mean(np.square(result_clustered_2 - result_original))

In [None]:
print indices.asnumpy()[1]
print modify_indices(indices, nclusters).asnumpy()[1]

In [None]:
print indices

In [None]:
############################################################################################

In [None]:
mx.profiler.set_config(profile_all=True,
                        filename='clustered1.json',  # File used for chrome://tracing visualization
                        continuous_dump=True,
                        aggregate_stats=True,
                      profile_symbolic=True)
mx.profiler.set_state('run')
mx.profiler.set_state('stop')


In [None]:
begin =  time.time()

for i in range(1000):
    result_clustered = convolve_codebook_lighter_sparse(data=img, filters = flat_codebook_filter, indices=onehot_indices, fshape = codebookshape,
                               output_shape=(batch_size,fshape[0],30,30), outputholder=outputholder).asnumpy()
print time.time() - begin


In [None]:
def convolve_codebook_lighter_sparse(data, filters, indices, fshape, output_shape, outputholder):

    #fshape  = codebookshape #4,16,3,3
    #print filters.shape
    #print fshape[0]*fshape[1]
    #filters = mx.sym.transpose(filters, axes=(1,0,2,3)).reshape((-1,1,0, 0)) #TODO: transpose is unnecessary!!
    res = mx.nd.Convolution(data=data, weight=filters, num_group=fshape[1], num_filter=fshape[0]*fshape[1],
                            no_bias=True, kernel=(3,3))
    res = res.reshape((0,0,-1)) #flatten the image for matmul lookup
    
    for iidx in range(res.shape[0]):
        outputholder[0]=mx.nd.sparse.dot(indices[0],res[iidx])
    
    res = outputholder.reshape((0,0,output_shape[2],output_shape[3]))

    return res
