In [1]:
import mxnet as mx
import numpy as np
from sklearn.cluster import KMeans
import time

  from ._conv import register_converters as _register_converters


In [2]:
def convolve_codebook_ndarray(data, codebook):
    filters = codebook
    fshape  = filters.shape
    
    filters = mx.nd.transpose(filters, axes=(1,0,2,3)).reshape((-1,1,0, 0)) 
    res = mx.nd.Convolution(data=data, weight=filters, num_group=fshape[1], num_filter=fshape[0]*fshape[1], no_bias=True, kernel=(3,3))
    print res.shape
    res = res.expand_dims(1)
    print res.shape
    res = res.reshape((0,fshape[1],fshape[0], 0, 0))
    print res.shape
    res = mx.nd.transpose(res,axes=(0,2,1,3,4))
    
    return res

In [3]:
def quantize(data,shrink=16):
    shape=data.shape
    result=np.zeros(shape)
    nclusters=shape[0]/shrink
    q_indices=np.zeros((shape[0],shape[1]))
    #q_indices_onehot=mx.nd.zeros((shape[0],shape[1]*nclusters))
    cluster_centers=np.zeros((nclusters, shape[1], shape[2], shape[3]))
    
    for channel in range(shape[1]):
        c_data=data[:,channel,:,:]
        cshape=c_data.shape
        c_data_shaped=c_data.reshape((cshape[0], cshape[1]*cshape[2]))
        
        estimator = KMeans(n_clusters=nclusters)
        estimator.fit(c_data_shaped.asnumpy())
        
        indices = estimator.predict(X=c_data_shaped.asnumpy())
        data_quantized = np.array([estimator.cluster_centers_[idx] for idx in indices])

        cluster_centers[:,channel,:,:] = estimator.cluster_centers_.reshape(nclusters,cshape[1],cshape[2])
        q_indices[:,channel]=indices
        
        result[:,channel,:,:]=data_quantized.reshape(cshape)
    
    return result, cluster_centers, q_indices

In [4]:
def convolve_codebook(data, indices, codebookshape, output_shape):
    filters = mx.sym.Variable("codebook", shape=codebookshape)
    fshape  = codebookshape #4,16,3,3
    index_shape=indices.shape
    
    filters = mx.sym.transpose(filters, axes=(1,0,2,3)).reshape((-1,1,0, 0)) #TODO: transpose is unnecessary!!
    res = mx.sym.Convolution(data=data, weight=filters, num_group=fshape[1], num_filter=fshape[0]*fshape[1], no_bias=True, kernel=(3,3))
    res = res.expand_dims(1)
    res = res.reshape((0,fshape[1],fshape[0], 0, 0))
    res = mx.sym.transpose(res,axes=(0,2,1,3,4)) #lookup table
    
    #hacky because multi-dim indexing isn't allowed
    res = mx.sym.reshape(data=res,shape=(-1,0),reverse=1) #(sample*nclusters*channel*W,H)
    #now looking up the results
    
    #print res[0,1,0] #7, 4, 16 ,30, 30
    print index_shape#7,4,16,30,30
    lres=[]
    #TODO: find a way to implement with less loops
    for sample in range(output_shape[0]):
        filterwise_list=[]
        for fltr in range(index_shape[0]):
            channelwise_list=[]
            for ch in range(index_shape[1]):
                            ## (((sample*4+cluster)*channels)*channel)*width
                slice_begin = (((sample*fshape[0]+indices[fltr,ch])*fshape[1]+ch)*output_shape[2],0)
                slice_end   = (slice_begin[0]+output_shape[2],output_shape[3])

                #channelwise_list.append(res[sample][indices[fltr,ch]][ch][0])
                channelwise_list.append(mx.sym.slice(data=res, begin=slice_begin, end=slice_end))
                
            filterwise_list.append(mx.sym.sum(mx.sym.stack(*channelwise_list),axis=0))
        lres.append(mx.sym.stack(*filterwise_list))
    lres=mx.sym.stack(*lres)                 
                
    
    
    return lres

In [5]:
def convolve_codebook_lighter(data, fshape, codebookshape, output_shape, indices_shape):
    filters = mx.sym.Variable("codebook", shape=codebookshape)
    indices = mx.sym.Variable("indices", shape=indices_shape)
    #fshape  = codebookshape #4,16,3,3
        
    #filters = mx.sym.transpose(filters, axes=(1,0,2,3)).reshape((-1,1,0, 0)) #TODO: transpose is unnecessary!!
    res = mx.sym.Convolution(data=data, weight=filters, num_group=fshape[1], num_filter=fshape[0]*fshape[1], no_bias=True, kernel=(3,3))
    res = res.reshape((0,0,-1)) #flatten the image for matmul lookup
    
    res = mx.sym.batch_dot(lhs=indices,rhs=res)
    
    res = res.reshape((0,0,output_shape[2],output_shape[3]))
    
    return res


In [6]:
def get_onehot(data,nclusters, batch_size):
    index_mat= mx.nd.one_hot(mx.nd.array(data),depth=nclusters).reshape(0,-1)
    return  mx.nd.broadcast_axes(mx.nd.expand_dims(index_mat,axis=0),axis=0, size=batch_size)

In [7]:
batch_size=7
shrink = 2
img=mx.nd.random.uniform(0, 1, shape=(7,16,32,32))
labels=mx.nd.array([1,0,1,0,0,1,0,1])
fshape=(32,16,3,3)
orig_filter=mx.nd.random.uniform(0, 1, shape=fshape)

qfilter, codebook_filter, indices = quantize(orig_filter, shrink=shrink)
indices=indices.astype(int)
codebookshape=codebook_filter.shape
indices_shape=indices.shape
data_iter = mx.io.NDArrayIter(img, batch_size= batch_size)
ctx=mx.gpu()


In [8]:
args={"codebook": mx.nd.array(codebook_filter)}
data=mx.sym.Variable("data")


In [9]:
sym=convolve_codebook(data=data,indices= indices,codebookshape= codebookshape, output_shape=(batch_size,128,30,30))
mod=mx.mod.Module(symbol=sym, context=ctx)
#mod.init_params(arg_params=args)
mod.bind(for_training=False, data_shapes=data_iter.provide_data)#,{'codebook': mx.nd.array(codebook_filter)}], label_shapes=None)
mod.set_params(args,None)

(32, 16)


	data
	codebook[0m


In [10]:
##baseline
args2={"filters":mx.nd.array(qfilter)}

baseline_filters=mx.sym.Variable("filters")
sym2=mx.sym.Convolution(data=data,weight=baseline_filters, num_filter=fshape[0], kernel=(3,3), no_bias=True)

mod2=mx.mod.Module(symbol=sym2, context=ctx)
#mod.init_params(arg_params=args)
mod2.bind(for_training=False, data_shapes=data_iter.provide_data)#,{'codebook': mx.nd.array(codebook_filter)}], label_shapes=None)
mod2.set_params(args2, None)

	data
	filters[0m


In [11]:
onehot_indices = get_onehot(mx.nd.array(indices),indices.shape[0]/shrink, batch_size=batch_size)

qfilter3=  mx.nd.transpose(mx.nd.array(codebook_filter), axes=(1,0,2,3)).reshape((-1,1,0, 0))
args3={"codebook":qfilter3, "indices" : onehot_indices}

sym3=convolve_codebook_lighter(data=data,fshape = codebookshape, codebookshape= qfilter3.asnumpy().shape,
                               output_shape=(batch_size,fshape[0],30,30), indices_shape = onehot_indices.shape)
mod3=mx.mod.Module(symbol=sym3, context=ctx)
#mod.init_params(arg_params=args)
mod3.bind(for_training=False, data_shapes=data_iter.provide_data)#,{'codebook': mx.nd.array(codebook_filter)}], label_shapes=None)
mod3.set_params(args3,None)

	indices
	data
	codebook[0m


In [12]:
begin=time.time()
result=mod.predict(eval_data=data_iter).asnumpy()
print time.time() - begin

0.154917001724


In [13]:
begin=time.time()

result2=mod2.predict(eval_data=data_iter).asnumpy()
print time.time() - begin

0.0382959842682


In [14]:
begin=time.time()
result3=mod3.predict(eval_data=data_iter).asnumpy()
print time.time() - begin

0.00500011444092


In [None]:
np.mean(np.square(result-result2))

In [None]:
np.mean(np.square(result-result3))

In [None]:
print indices[0,3]
print onehot_indices[4,0,12:16]

In [None]:
np.mean(np.square(result-result3))

In [None]:
indices.shape

In [None]:
indices_one_hot = get_onehot(indices,indices.shape[0]/8, 4)

In [None]:
indices_one_hot.shape

In [None]:
indices[0,1]

In [None]:
test1=mx.nd.array([1,1,1,0,0,0])
test1=mx.nd.array(test1)
test1.shape

In [None]:
#mx.nd.tile(mx.nd.one_hot(test1, depth=2),reps=())

In [None]:
mx.nd.one_hot(test1, depth=4).reshape(-1)

In [None]:
a=mx.nd.array([[1,2],[3,4],[5,6],[7,8],[9,10]])

In [None]:
b = a.reshape((-1))
b

In [None]:
goal = mx.nd.array([1,4,5,7,10])

In [None]:
ids=mx.nd.array([[0,1,0,0,1],[0,1,0,0,0]])
ids.shape

In [None]:
ids = mx.nd.one_hot(ids,depth=2)
ids

In [None]:
ids=ids.reshape((0,-1))
ids

In [None]:
mx.nd.dot(lhs=ids,rhs=b)

In [None]:
mx.nd.expand_dims(a,axis=0).shape

In [None]:
ids_batch = mx.nd.broadcast_axes(mx.nd.expand_dims(ids,axis=0),axis=0, size=3)
ids_batch.shape

In [None]:
b_batch = mx.nd.broadcast_axes(mx.nd.expand_dims(b,axis=0),axis=0, size=3).expand_dims(2)
b_batch.shape

In [None]:
mx.nd.batch_dot(lhs=ids_batch,rhs=b_batch)

In [None]:
begin=time.time()

mx.profiler.set_config(profile_all=True,
                        filename='clustered_profile.json',  # File used for chrome://tracing visualization
                        continuous_dump=True,
                        aggregate_stats=True,
                      profile_symbolic=True)
mx.profiler.set_state('run')

result=mod.predict(eval_data=data_iter).asnumpy()

mx.profiler.set_state('stop')
print(mx.profiler.dumps())

#print time.time()-begin


In [None]:
begin=time.time()
result=mod.predict(eval_data=data_iter).asnumpy()
print time.time()-begin

In [None]:
data_iter.reset()

In [None]:
begin=time.time()

mx.profiler.set_config(profile_all=True,
                        filename='baseline_profile.json',  # File used for chrome://tracing visualization
                        continuous_dump=True,
                        aggregate_stats=True)
mx.profiler.set_state('run')

result2=mod2.predict(eval_data=data_iter).asnumpy()

mx.profiler.set_state('stop')
print(mx.profiler.dumps())
print time.time()-begin



In [None]:
data_iter.reset()

In [None]:
##baseline2
reshaped_filter = mx.nd.transpose(mx.nd.array(codebook_filter), axes=(1,0,2,3)).reshape((-1,1,0, 0))
args3={"filters":reshaped_filter}

baseline3_filters=mx.sym.Variable("filters")
sym3= mx.sym.Convolution(data=data, weight=baseline3_filters, num_group=codebookshape[1], 
                         num_filter=codebookshape[0]*codebookshape[1], no_bias=True, kernel=(3,3))


mod3=mx.mod.Module(symbol=sym3, context=mx.gpu())
#mod.init_params(arg_params=args)
mod3.bind(for_training=False, data_shapes=data_iter.provide_data)#,{'codebook': mx.nd.array(codebook_filter)}], label_shapes=None)
mod3.set_params(args3, None)

In [None]:

mx.profiler.set_config(profile_all=True,
                        filename='baseline_profile.json',  # File used for chrome://tracing visualization
                        continuous_dump=True,
                        aggregate_stats=True)
mx.profiler.set_state('run')
begin=time.time()

result3=mod3.predict(eval_data=data_iter).asnumpy()
print time.time()-begin

mx.profiler.set_state('stop')
print(mx.profiler.dumps())



In [None]:
result.shape

In [None]:
result2=convolve_codebook_ndarray(img,args["codebook"])
np.array_equal(result.asnumpy(),result2.asnumpy())

In [None]:
import numpy as np

In [None]:
aa=mx.nd.zeros((7,4,16,30,30))

for s in range(7):
    for i in range(4):
        filled= np.empty((16,30,30))
        filled.fill(i+s)
        aa[s,i,:,:,:] = mx.nd.array(filled)

In [None]:
lookup_idx=mx.nd.array(np.random.choice(4,(32,16)))
print lookup_idx[0].shape

In [None]:
lres=mx.nd.tile(data=mx.nd.zeros_like(aa[0,0,0,:,:]),reps=(7,32,1,1))
lres.shape

In [None]:
aa[0][0][0].shape

In [None]:
#lres=mx.nd.zeros((7,32,30,30))

for sample in range(7):#7
    for fltr in range(lookup_idx.shape[0]):#32 
        for ch in range(lookup_idx.shape[1]):#16
            lres[sample,fltr]+=aa[sample,lookup_idx[fltr,ch],ch,:,:][0]

In [None]:
#this is how you slice ------------------------------------
print aa[1,3,15,0]
print aa.shape
samp=1
cls=3
ch=15
print ((samp*4+cls)*16+ch)*30
aa2=mx.nd.reshape(data=aa,shape=(-1,0),reverse=1)
print mx.nd.slice(data=aa2, begin=(((samp*4+cls)*16+ch)*30,0), end=(((samp*4+cls)*16+ch)*30+30,30))
#print aa2[480*7:480*7+30]
#print mx.nd.slice_axis(data=aa,axis=2,begin=0,end=1).shape

In [None]:
lres2=[]
for sample in range(7):#7
    filterwise_list=[]
    for fltr in range(lookup_idx.shape[0]):#32 
        channelwise_list=[]
        for ch in range(lookup_idx.shape[1]):#16
            
            channelwise_list.append(aa[sample,lookup_idx[fltr,ch],ch,:,:][0])
        filterwise_list.append(mx.nd.sum(mx.nd.stack(*channelwise_list),axis=0))
    lres2.append(mx.nd.stack(*filterwise_list))
lres2=mx.nd.stack(*lres2)
print lres2.shape

In [None]:
c=mx.nd.stack(c,b, axis = 1)
c.shape

In [None]:
mx.nd.tile(data=a,reps=(7,16,1,1)).shape

In [None]:
mx.nd.sum(lookup_idx[6])

In [None]:
lres2[0][6][0]

In [None]:
img=mx.nd.random.uniform(0, 1, shape=(8,16,32,32))
data_iter = mx.io.NDArrayIter(img, batch_size= 3, label_name=None)


In [None]:
data_iter.provide_data

In [None]:
data_iter.next()

In [None]:
data_iter.provide_data

In [None]:
def construct_tensors(data, l_indices, index_shape, batch_size):
    lookup_idx = mx.sym.Variable("indices", shape=indes_shape)
  
    lres=mx.nd.tile(data=mx.nd.zeros_like(data[0,0,0,:,:]),reps=(7,32,1,1))
    
    for sample in range(batch_size):
        for fltr in range(indes_shape[0]):
            for ch in range(index_shape[1]):
                lres[sample,fltr]+=aa[sample,lookup_idx[fltr,ch],ch,:,:][0]
    

In [None]:
def convolve_codebook_nd(data, indices, codebook, codebookshape, output_shape):
    #filters = mx.sym.Variable("codebook", shape=codebookshape)
    filters=codebook
    
    fshape  = codebookshape #4,16,3,3
    index_shape=indices.shape
    
    filters = mx.nd.transpose(filters, axes=(1,0,2,3)).reshape((-1,1,0, 0)) #TODO: transpose is unnecessary!!
    res = mx.nd.Convolution(data=data, weight=filters, num_group=fshape[1], num_filter=fshape[0]*fshape[1], no_bias=True, kernel=(3,3))
    res = res.expand_dims(1)
    res = res.reshape((0,fshape[1],fshape[0], 0, 0))
    res = mx.nd.transpose(res,axes=(0,2,1,3,4)) #lookup table
    
    #hacky because multi-dim indexing isn't allowed
    res = mx.nd.reshape(data=res,shape=(-1,0),reverse=1) #(sample*nclusters*channel*W,H)
    #now looking up the results
    
    #print res[0,1,0] #7, 4, 16 ,30, 30
    #print index_shape#7,4,16,30,30
    lres=[]
    #TODO: find a way to implement with less loops
    for sample in range(output_shape[0]):
        filterwise_list=[]
        for fltr in range(index_shape[0]):
            channelwise_list=[]
            for ch in range(index_shape[1]):
                            ## (((sample*4+cluster)*channels)*channel)*width
                slice_begin = (((sample*fshape[0]+indices[fltr,ch])*fshape[1]+ch)*output_shape[2],0)
                slice_end   = (slice_begin[0]+output_shape[2],output_shape[3])
                
                #print slice_begin
                #print slice_end
                #channelwise_list.append(res[sample][indices[fltr,ch]][ch][0])
                channelwise_list.append(mx.nd.slice(data=res, begin=slice_begin, end=slice_end))
                
            filterwise_list.append(mx.nd.sum(mx.nd.stack(*channelwise_list),axis=0))
        lres.append(mx.nd.stack(*filterwise_list))
    lres=mx.nd.stack(*lres)                 
                
    
    
    return lres

In [None]:
import time

In [None]:
img=mx.nd.random.uniform(0, 1, shape=(10,64,32,32))
fshape=(128,64,3,3)
orig_filter=mx.nd.random.uniform(0, 1, shape=fshape)

qfilter, codebook_filter, indices = quantize(orig_filter)
indices=indices.astype(int)
codebook_filter=mx.nd.array(codebook_filter)
codebookshape=codebook_filter.shape
indices_shape=indices.shape

In [None]:
begin=time.time()
print"======================="
well = convolve_codebook_nd(data=img,codebook=codebook_filter,indices=indices,output_shape=(10,128,30,30), codebookshape=codebookshape)
print  well.asnumpy().shape
print time.time()-begin

In [None]:
well.shape

In [None]:
begin=time.time()
print "========================="
well2=mx.nd.Convolution(data=img,weight=mx.nd.array(qfilter),kernel=(3,3),num_filter=128, no_bias=True)
print  well2.asnumpy().shape
print time.time()-begin

In [None]:
well2.shape

In [None]:
mx.nd.mean(well-well2)

In [None]:
well2[0]

In [None]:
img=mx.nd.random.uniform(0, 1, shape=(8,16,32,32))
labels=mx.nd.array([1,0,1,0,0,1,0,1])
labels = mx.nd.one_hot(depth=2,indices=labels)
fshape=(32,16,3,3)
orig_filter=mx.nd.random.uniform(0, 1, shape=fshape)

qfilter, codebook_filter, indices = quantize(orig_filter)
indices=indices.astype(int)
codebookshape=codebook_filter.shape
indices_shape=indices.shape
data_iter = mx.io.NDArrayIter(img, batch_size= 4, label=labels)




In [None]:
args={"codebook": mx.nd.array(codebook_filter)}
data=mx.sym.Variable("data")
sym=convolve_codebook(data=data,indices= indices,codebookshape= codebookshape, output_shape=(4,32,30,30))
sym=mx.sym.Flatten(sym)
sym = mx.symbol.FullyConnected(data=sym, num_hidden=2, no_bias=True)
sym = mx.symbol.SoftmaxOutput(data=sym, name='softmax')

mod=mx.mod.Module(symbol=sym, context=mx.cpu())
#mod.init_params(arg_params=args)
mod.bind( data_shapes=data_iter.provide_data, label_shapes=data_iter.provide_label)#,{'codebook': mx.nd.array(codebook_filter)}], label_shapes=None)
mod.set_params(args,None,allow_missing=True)

In [None]:
optimizer_params = {'learning_rate': 0.0000001,
                       'momentum': 0.9,
                       'wd': 0.0005,
                       'clip_gradient': None,
                       'rescale_grad': 1.0}

mod.fit(data_iter, eval_data=data_iter, arg_params=args,eval_metric=['acc'],optimizer='sgd',num_epoch=10
        ,optimizer_params=optimizer_params
               )

In [None]:
mod.predict(data_iter)

In [None]:
data_iter.reset()

In [None]:
newargs=mod.get_params()

In [None]:
newargs[0]['codebook'].shape