## Gaussian desntisty estimatation

In [1]:
# First let's generate some non trivial data.
# Namely, n_vectors vectors close to d_inner dimensions in d_ambient dimensions. 
import numpy as np

n_vectors = 10000
d_ambient = 14
d_inner = 4
radius = np.sqrt(d_ambient)
vectors = np.random.randn(n_vectors, d_inner)@np.random.randn(d_inner, d_ambient) 
vectors += 0.1*np.random.randn(n_vectors, d_ambient)
vectors /= radius

In [2]:
# Now we can create a coreset like this
from gde import GDE
d = d_ambient
k = 32

gde = GDE(k, d)
for vector in vectors:
    gde.update(vector) 
    
# let's see how large the coresent is
print(f"The sketch coreset contains {gde.size()} elements")

The sketch coreset contains 95 elements


In [3]:
# To see how good the sketch is we need to define the kernel function
def kernel(vector_1, vector_2):
    return np.exp(-np.linalg.norm(vector_1 - vector_2)**2)

## And, we'll need a function that computes the exact density to compare to
def density(vectors, query):
    return np.mean([kernel(vector, query) for vector in vectors])

# Finally, we need to compare ourselves to a simple sketching algorithm
# which will be uniform sampling of the same size as the coreset
sampled_vectors = vectors[np.random.choice(n_vectors, gde.size())]

In [4]:
# The density in the origin
query  = [0.0]*d
print(f"Actual density = {density(vectors, query)}")
print(f"Coreset approx = {gde.query(query)}")
print(f"Sampled approx = {density(sampled_vectors, query)}")

Actual density = 0.11256883823890199
Coreset approx = 0.08209484634504294
Sampled approx = 0.09587554727332566


In [5]:
# something from the data
query  = vectors[0]
print(f"Actual density = {density(vectors, query)}")
print(f"Coreset approx = {gde.query(query)}")
print(f"Sampled approx = {density(sampled_vectors, query)}")

Actual density = 0.036754530288669346
Coreset approx = 0.023654805900234305
Sampled approx = 0.04100985508115075


In [6]:
# random query of the right magnitude
query  = np.random.randn(d)/np.sqrt(d)
print(f"Actual density = {density(vectors, query)}")
print(f"Coreset approx = {gde.query(query)}")
print(f"Sampled approx = {density(sampled_vectors, query)}")

Actual density = 0.04508443316071552
Coreset approx = 0.032877746090775505
Sampled approx = 0.03920073467354825


In [7]:
# random query of the right magnitude
query  = np.random.randn(d)
print(f"Actual density = {density(vectors, query)}")
print(f"Coreset approx = {gde.query(query)}")
print(f"Sampled approx = {density(sampled_vectors, query)}")

Actual density = 1.9803101046615824e-05
Coreset approx = 1.5915897849219183e-05
Sampled approx = 1.9321368352452334e-05
