# Sketch time comparison

Comparing the sketch times of srht and countsketch on the same inputs.

In [2]:
import numpy as np
import scipy.sparse as sparse
import numba
from numba import jit
import matplotlib.pyplot as plt
import line_profiler
from tabulate import tabulate
%load_ext line_profiler
%matplotlib inline
from fht import fht


In [4]:
from countSketch import *
from srht import *

In [13]:
@jit(nopython=True)
def countSketch_elt_stream(matrixA, sketch_size):
    n,d = matrixA.shape
    sketch = np.zeros((sketch_size,d))
    nonzero_rows, nonzero_cols = np.nonzero(matrixA)
    hashedIndices = np.random.choice(sketch_size, n, replace=True)
    randSigns = np.random.choice(2, n, replace=True) * 2 - 1
    for ii,jj in zip(nonzero_rows,nonzero_cols):
        bucket = hashedIndices[ii]
        sketch[bucket, jj] += randSigns[ii]*matrixA[ii,jj]
    return sketch

In [6]:
matrix = sparse.random(2**16, 2**3, 0.1).toarray()
print(matrix.shape)
#matrix_sparse = matrix.tocsr()
#matrix = matrix.toarray()
#x = np.random.randn(matrix.shape[1])
#true_norm = np.linalg.norm(matrix@x,ord=2)**2
#cov_mat = matrix.T@matrix
#matrix_norm = np.linalg.norm(cov_mat, ord='fro')**2

sketch_size = 1000

(65536, 8)


In [11]:
%timeit fht(matrix)

16.2 ms ± 335 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [19]:
%lprun -f srht_test srht_test(matrix)

In [26]:
%lprun -f srht_transform srht_transform(matrix,sketch_size)

In [18]:
%timeit srht_transform(matrix,sketch_size)

20.7 ms ± 498 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [10]:
%timeit countSketchStreaming(matrix,sketch_size)

71.9 ms ± 980 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
%lprun -f countSketch_elt_stream countSketch_elt_stream(matrix, sketch_size)

In [10]:
import itertools
#from time import time
from timeit import default_timer as time
rows = [2**16, 2**17]
cols = [5, 10, 25, 50, 100]
density = [0.01, 0.1, 0.25, 0.5, 1.0]
sketch_sizes = [10*col for col in cols]

for n_rows, n_columns, data_density in itertools.product(rows, cols, density):
    sketch_size = sketch_sizes[cols.index(n_columns)]
    data = sparse.random(n_rows, n_columns, data_density).toarray()
    
    #print("(n,d) = ({},{})".format(n_rows,n_columns))
    
    start = time()
    hessian = data.T@data
    hessian_time = time() - start
    
    hessian_norm = np.linalg.norm(hessian,ord='fro')**2
    
    start = time()
    srht = srht_transform(data,sketch_size)
    srht_sketch_time = time() - start
    #print("SRHT sketch time: {}".format(srht_sketch_time))
    
    srht_hessian = srht.T@srht
    srht_hessian_norm = np.linalg.norm(srht_hessian,ord='fro')**2
    srht_hessian_error = srht_hessian_norm/hessian_norm
    #print("SRHT Hessian rel error: {}".format(srht_hessian_error))
    
    sketch_size = 100*sketch_size
    start = time()
    cwt = countSketch_elt_stream(data,sketch_size)
    cwt_sketch_time = time() - start
    cwt_hessian = cwt.T@cwt
    cwt_hessian_norm = np.linalg.norm(cwt_hessian,ord='fro')**2
    cwt_hessian_error = cwt_hessian_norm/hessian_norm
    
    print(tabulate([
        ['SRHT', n_rows, n_columns, data_density, srht_sketch_time, srht_hessian_error],
        ['CWT', '  ', '  ', '  ',  cwt_sketch_time, cwt_hessian_error]],
        headers=['Sketch', 'Rows', 'Cols', 'Density', 'Sketch time', 'Rel error'],
        tablefmt='orgtbl'))
    
    #start = time()
    #countSketchStreaming(data,sketch_size)
    #cwt_stream_sketch_time = time() - start
    
    #start = time()
    #countSketch_elt_stream(data,sketch_size)
    #cwtelt_stream_sketch_time = time() - start
    
    #print("Method: {} Rows: {} Cols: {} Sketch Size: {} Density: {} Time: {}".format(
    #    "SRHT", n_rows, n_columns, sketch_size, data_density, srht_sketch_time))
    
    #print("Method: {} Rows: {} Cols: {} Sketch Size: {} Density: {} Time: {}".format(
    #    "CWTR", n_rows, n_columns, sketch_size, data_density, cwt_stream_sketch_time))
    
    #print("Method: {} Rows: {} Cols: {} Sketch Size: {} Density: {} Time: {}".format(
    #    "CWTE", n_rows, n_columns, sketch_size, data_density, cwtelt_stream_sketch_time))

| Sketch   | Rows   | Cols   | Density   |   Sketch time |   Rel error |
|----------+--------+--------+-----------+---------------+-------------|
| SRHT     | 65536  | 5      | 0.01      |    0.01054    |    0.96833  |
| CWT      |        |        |           |    0.00398369 |    0.980354 |
| Sketch   | Rows   | Cols   | Density   |   Sketch time |   Rel error |
|----------+--------+--------+-----------+---------------+-------------|
| SRHT     | 65536  | 5      | 0.1       |    0.010353   |    1.24084  |
| CWT      |        |        |           |    0.00362839 |    0.985926 |
| Sketch   | Rows   | Cols   | Density   |   Sketch time |   Rel error |
|----------+--------+--------+-----------+---------------+-------------|
| SRHT     | 65536  | 5      | 0.25      |    0.0113303  |    1.2232   |
| CWT      |        |        |           |    0.00469159 |    0.996993 |
| Sketch   | Rows   | Cols   | Density   |   Sketch time |   Rel error |
|----------+--------+--------+-----------+---------

| Sketch   | Rows   | Cols   | Density   |   Sketch time |   Rel error |
|----------+--------+--------+-----------+---------------+-------------|
| SRHT     | 131072 | 10     | 0.01      |    0.0404908  |    0.936938 |
| CWT      |        |        |           |    0.00740898 |    1.00482  |
| Sketch   | Rows   | Cols   | Density   |   Sketch time |   Rel error |
|----------+--------+--------+-----------+---------------+-------------|
| SRHT     | 131072 | 10     | 0.1       |     0.040668  |    1.03148  |
| CWT      |        |        |           |     0.0108907 |    0.989558 |
| Sketch   | Rows   | Cols   | Density   |   Sketch time |   Rel error |
|----------+--------+--------+-----------+---------------+-------------|
| SRHT     | 131072 | 10     | 0.25      |     0.0450755 |    0.939524 |
| CWT      |        |        |           |     0.0149536 |    0.993367 |
| Sketch   | Rows   | Cols   | Density   |   Sketch time |   Rel error |
|----------+--------+--------+-----------+---------

In [18]:
# Test the norm of the returned values

rows = [25000, 50000]
cols = [10, 100, 500]
density = [0.01, 0.1, 0.25, 0.5, 1.0]
sketch_sizes = [10*col for col in cols]

for n_rows, n_columns, data_density in itertools.product(rows, cols, density):
    sketch_size = sketch_sizes[cols.index(n_columns)]
    data = sparse.random(n_rows, n_columns, data_density).toarray()
    test_vector = np.random.randn(data.shape[1])
    true_mat_vec_norm = np.linalg.norm(data@test_vector)**2
    true_covariance_norm = np.linalg.norm(data.T@data)**2
    
    SRHT = srht_transform(data,sketch_size)
    SRHT_vec_norm = np.linalg.norm(SRHT@test_vector)**2
    SRHT_mat_norm = np.linalg.norm(SRHT.T@SRHT)**2
    SRHT_vec_rel_error = SRHT_vec_norm/true_mat_vec_norm
    SRHT_mat_rel_error = SRHT_mat_norm/true_covariance_norm
    
    CWTE = countSketch_elt_stream(data,sketch_size)
    CWTE_vec_norm = np.linalg.norm(CWTE@test_vector)**2
    CWTE_mat_norm = np.linalg.norm(CWTE.T@CWTE)**2
    CWTE_vec_rel_error = CWTE_vec_norm/true_mat_vec_norm
    CWTE_mat_rel_error = CWTE_mat_norm/true_covariance_norm
    
    print("{} Rows: {} Cols: {} Sketch Size: {} Density: {} Vec norm: {} Mat norm: {}".format(
        "SRHT", n_rows, n_columns, sketch_size, data_density, SRHT_vec_rel_error, SRHT_mat_rel_error))
    
    print("{} Rows: {} Cols: {} Sketch Size: {} Density: {} Vec norm: {} Mat norm: {}".format(
        "CWTE", n_rows, n_columns, sketch_size, data_density, CWTE_vec_rel_error, CWTE_mat_rel_error))

SRHT Rows: 25000 Cols: 10 Sketch Size: 100 Density: 0.01 Vec norm: 1.1312787102561204 Mat norm: 1.0647174236549088
CWTE Rows: 25000 Cols: 10 Sketch Size: 100 Density: 0.01 Vec norm: 1.0222613846662432 Mat norm: 1.0575537057519346
SRHT Rows: 25000 Cols: 10 Sketch Size: 100 Density: 0.1 Vec norm: 0.9318728694310383 Mat norm: 1.0984999808816736
CWTE Rows: 25000 Cols: 10 Sketch Size: 100 Density: 0.1 Vec norm: 1.033499901432821 Mat norm: 1.218831713447632
SRHT Rows: 25000 Cols: 10 Sketch Size: 100 Density: 0.25 Vec norm: 0.8360495801472566 Mat norm: 1.2195216649701677
CWTE Rows: 25000 Cols: 10 Sketch Size: 100 Density: 0.25 Vec norm: 0.820159969582668 Mat norm: 1.0416515376042939
SRHT Rows: 25000 Cols: 10 Sketch Size: 100 Density: 0.5 Vec norm: 0.8282833419088235 Mat norm: 1.007109517702544
CWTE Rows: 25000 Cols: 10 Sketch Size: 100 Density: 0.5 Vec norm: 1.0335206199259088 Mat norm: 0.954980780886542
SRHT Rows: 25000 Cols: 10 Sketch Size: 100 Density: 1.0 Vec norm: 0.9153791999218154 Mat 