## Create some data

In [22]:
import numpy as np
import time as t
from sklearn import random_projection
n = 10000
d = 10000
X = np.random.rand(n, d)

## Random Gaussian Transform: approx O(nd)

In [35]:
Xn1 = X[:500]
Xn2 = X[:1000]
Xn3 = X

Xd1 = X[:, :500]
Xd2 = X[:, :1000]
Xd3 = X

#create random projection transformer
transformer = random_projection.GaussianRandomProjection(eps=0.5)

# n = 10
start = t.time()
X_new1 = transformer.fit_transform(Xn1.T).T
end = t.time()
t1 = end - start
# n = 100
start = t.time()
X_new2 = transformer.fit_transform(Xn2.T).T
end = t.time()
t2 = end - start
# n = 1000
start = t.time()
X_new3 = transformer.fit_transform(Xn3.T).T
end = t.time()
t3 = end - start
print(t1, X_new1.shape, '\n', t2, X_new2.shape, '\n', t3, X_new3.shape)

# p = 1000
start = t.time()
X_new1 = transformer.fit_transform(Xd1.T).T
end = t.time()
t1 = end - start
# p = 10000
start = t.time()
X_new2 = transformer.fit_transform(Xd2.T).T
end = t.time()
t2 = end - start
# p = 10000
start = t.time()
X_new3 = transformer.fit_transform(Xd3.T).T
end = t.time()
t3 = end - start
print(t1, X_new1.shape, '\n', t2, X_new2.shape, '\n', t3, X_new3.shape)


0.04035806655883789 (442, 10000) 
 0.06374001502990723 (442, 10000) 
 0.5787320137023926 (442, 10000)
0.14344406127929688 (298, 500) 
 0.2151632308959961 (331, 1000) 
 0.5957870483398438 (442, 10000)


## Random Sparse Transform: O(nd)

In [9]:
Xn1 = X[:500]
Xn2 = X[:1000]
Xn3 = X

Xd1 = X[:, :500]
Xd2 = X[:, :1000]
Xd3 = X

#create random projection transformer
transformer = random_projection.SparseRandomProjection(eps=0.5)

# n = 10
start = t.time()
X_new1 = transformer.fit_transform(Xn1.T).T
end = t.time()
t1 = end - start
# n = 100
start = t.time()
X_new2 = transformer.fit_transform(Xn2.T).T
end = t.time()
t2 = end - start
# n = 1000
start = t.time()
X_new3 = transformer.fit_transform(Xn3.T).T
end = t.time()
t3 = end - start
print(t1, X_new1.shape, '\n', t2, X_new2.shape, '\n', t3, X_new3.shape)

# p = 1000
start = t.time()
X_new1 = transformer.fit_transform(Xd1.T).T
end = t.time()
t1 = end - start
# p = 10000
start = t.time()
X_new2 = transformer.fit_transform(Xd2.T).T
end = t.time()
t2 = end - start
# p = 10000
start = t.time()
X_new3 = transformer.fit_transform(Xd3.T).T
end = t.time()
t3 = end - start
print(t1, X_new1.shape, '\n', t2, X_new2.shape, '\n', t3, X_new3.shape)

0.07978224754333496 (442, 10000) 
 0.10758686065673828 (442, 10000) 
 0.4223899841308594 (442, 10000)
0.10091781616210938 (298, 500) 
 0.1466672420501709 (331, 1000) 
 0.42191624641418457 (442, 10000)


## Implement FJLT  via fast Hadamard Transform: slow??

In [10]:
import numpy as np
import numpy.random as npr
import math
import scipy.sparse as sparse
import FWHT



def nextPow(d_act):
    d_act = d_act - 1
    d_act |= d_act >> 1
    d_act |= d_act >> 2
    d_act |= d_act >> 4
    d_act |= d_act >> 8
    d_act |= d_act >> 16
    d_act += 1
    return d_act

def fast_sample(n, sample_size):
    swap_records = {}
    sample_wor = np.empty(sample_size, dtype=int)
    for i in range(sample_size):
        rand_ix = npr.randint(i, n)
        
        if i in swap_records:
            el1 = swap_records[i]
        else:
            el1 = i

        if rand_ix in swap_records:
            el2 = swap_records[rand_ix]
        else:
            el2 = rand_ix
        
        swap_records[rand_ix] = el1
        sample_wor[i] = el2
        if i in swap_records:            
            del swap_records[i]
    return sample_wor

def fjlt(A, k, q):
    (d, n) = A.shape
    #Calculate the next power of 2
    d_act = nextPow(d)
    sc_ft = np.sqrt(d_act / float(d * k))
    #Calculate D plus some constansts
    D = npr.randint(0, 2, size=(d, 1)) * 2 * sc_ft - sc_ft
    DA = np.zeros((d_act, n))
    DA[0:d, :] = A * D
    
    #Apply hadamard transform to each row
    hda = np.apply_along_axis(FWHT.FWHT, 1, DA.T)[:,:,0].T

    #Apply P transform
    sample_size = npr.binomial(k * d, q)
    indc = fast_sample(k * d, sample_size)
    p_rows, p_cols = np.unravel_index(indc, (k, d))
    p_data = npr.normal(loc=0, scale=math.sqrt(1/q), size=len(p_rows))
    P = sparse.csr_matrix((p_data, (p_rows, p_cols)), shape=(k, d_act))
    return P.dot(hda)

In [13]:
start = t.time()
b = fjlt(X, 512, 1)
end = t.time()
print(end-start)
b.shape

58.92161703109741


(512, 10000)

## Random Sampling 

In [33]:
from numpy.random import choice

#unweighted
start = t.time()
draw = choice(range(n), 442)
X[draw,:]
end = t.time()
print(end-start)

#weighted by row norm
norms = np.linalg.norm(X, axis=1)
weights = norms / sum(norms)
start = t.time()
draw = choice(range(n), 442, p=weights)
X[draw,:]
end = t.time()
print(end-start)

0.010253190994262695
0.008491039276123047
