In [23]:
import numpy as np
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import math
import pywt
import scipy
from scipy import ndimage

# Create some random data to be tested for WaveCluster

In [265]:
#Generate random points
#centers = [[1, 1], [-1, -1], [1, -1]]
X, y= make_blobs(n_samples=1000, n_features = 3, cluster_std=0.4, random_state=0)

X = StandardScaler().fit_transform(X)

# Segment the space into cubes and bin the data

In [275]:
#Compute and/or set some variables
layers = 8
n_bins = 2**layers
d = X.shape[1] #number of dimensions

In [276]:
H = np.histogramdd(X, bins=n_bins)
data_quant = H[0]

In [233]:
#plt.imshow(data_quant)

# Compute the DWT

In [309]:
#pywt.wavelist()
#Images: https://www.mathworks.com/help/wavelet/gs/introduction-to-the-wavelet-families.html

In [289]:
#Select a wavelet.
wave = 'db1'
print(pywt.Wavelet(wave))

Wavelet db1
  Family name:    Daubechies
  Short name:     db
  Filters length: 2
  Orthogonal:     True
  Biorthogonal:   True
  Symmetry:       asymmetric
  DWT:            True
  CWT:            False


In [322]:
#Perform dwt on quantized data.
wp = pywt.wavedecn(data=data_quant, wavelet=wave)
#wp = pywt.dwtn(data=data_quant, wavelet=wave)

In [331]:
#I cannot see a way around computing DWT twice in the thresholding step
wp_c = pywt.wavedecn(data=data_quant, wavelet=wave)

# Threshold the results of DWT

In [332]:
#Pick a threshold value
epsilon = .01

In [333]:
#Threshold the DWT
if abs(wp_c[0])<epsilon:
    wp_c[0] = 0
else:
    wp_c[0] = 1

keys = wp[1].keys()
for i in range(1,layers+1):
    for k in keys:
        wp_c[i][k][abs(wp_c[i][k])<epsilon] = 0
        wp_c[i][k][abs(wp_c[i][k])>=epsilon] = 1

# Find connected components

In [334]:
#Compute the connected components of each thresholed DWT.  Adjacnecy is determined by ``four'' connectivity
for i in range(1,layers+1):
    for k in keys:
        component = ndimage.label(wp_c[i][k])
        wp_c[i][k] = component[0]
        
wp_c

[1,
 {'aad': array([[[1]]], dtype=int32),
  'ada': array([[[1]]], dtype=int32),
  'add': array([[[1]]], dtype=int32),
  'daa': array([[[1]]], dtype=int32),
  'dad': array([[[1]]], dtype=int32),
  'dda': array([[[1]]], dtype=int32),
  'ddd': array([[[1]]], dtype=int32)},
 {'aad': array([[[0, 0],
          [0, 1]],
  
         [[2, 0],
          [2, 0]]], dtype=int32), 'ada': array([[[1, 0],
          [0, 2]],
  
         [[1, 0],
          [1, 0]]], dtype=int32), 'add': array([[[0, 0],
          [0, 1]],
  
         [[2, 0],
          [2, 0]]], dtype=int32), 'daa': array([[[1, 0],
          [0, 2]],
  
         [[1, 0],
          [1, 0]]], dtype=int32), 'dad': array([[[0, 0],
          [0, 1]],
  
         [[2, 0],
          [2, 0]]], dtype=int32), 'dda': array([[[1, 0],
          [0, 2]],
  
         [[1, 0],
          [1, 0]]], dtype=int32), 'ddd': array([[[0, 0],
          [0, 1]],
  
         [[2, 0],
          [2, 0]]], dtype=int32)},
 {'aad': array([[[0, 0, 0, 0],
          [0, 0,

# Create Lookup Table

In [305]:
#Create a mask for each connected component. NOT DONE YET
for i in range(1,layers+1):
    for k in keys:
        component = ndimage.label(wp_c[i][k])
        wp_c[i][k] = component[0]
        
wp_c

[1,
 {'aad': array([[[0]]], dtype=int32),
  'ada': array([[[0]]], dtype=int32),
  'add': array([[[0]]], dtype=int32),
  'daa': array([[[0]]], dtype=int32),
  'dad': array([[[1]]], dtype=int32),
  'dda': array([[[0]]], dtype=int32),
  'ddd': array([[[0]]], dtype=int32)},
 {'aad': array([[[0, 0],
          [0, 1]],
  
         [[0, 0],
          [2, 0]]], dtype=int32), 'ada': array([[[0, 0],
          [0, 1]],
  
         [[2, 0],
          [2, 0]]], dtype=int32), 'add': array([[[0, 0],
          [0, 1]],
  
         [[0, 0],
          [2, 0]]], dtype=int32), 'daa': array([[[0, 0],
          [0, 0]],
  
         [[0, 0],
          [0, 0]]], dtype=int32), 'dad': array([[[0, 0],
          [0, 0]],
  
         [[0, 0],
          [0, 0]]], dtype=int32), 'dda': array([[[0, 0],
          [0, 0]],
  
         [[0, 0],
          [0, 0]]], dtype=int32), 'ddd': array([[[0, 0],
          [0, 0]],
  
         [[0, 0],
          [0, 0]]], dtype=int32)},
 {'aad': array([[[0, 0, 0, 0],
          [0, 0,

In [338]:
ex = wp_c[3]['daa']
ex

array([[[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 1, 1]],

       [[2, 2, 0, 0],
        [0, 0, 0, 0],
        [2, 0, 0, 0],
        [0, 0, 1, 1]],

       [[2, 2, 0, 0],
        [0, 0, 0, 0],
        [2, 0, 0, 0],
        [0, 0, 0, 1]],

       [[2, 2, 0, 0],
        [2, 0, 0, 0],
        [2, 0, 0, 0],
        [0, 0, 0, 0]]], dtype=int32)

In [344]:
range(1,np.max(ex)+1)

range(1, 2)

In [339]:
#Create a mask.  Do this for each number in ex
ex[ex==1] = 1
ex[ex>1]=0
ex

array([[[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 1, 1]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 1, 1]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 1]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]]], dtype=int32)

In [342]:
#Schur product to find contributions
np.multiply(wp[3]['daa'],ex)

array([[[ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        , -0.01367188, -2.24023438]],

       [[-0.        , -0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [-0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.0234375 ,  2.51367188]],

       [[-0.        , -0.        ,  0.        ,  0.        ],
        [-0.        ,  0.        ,  0.        ,  0.        ],
        [-0.        , -0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.015625  ]],

       [[ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ]]])

# Trash

In [231]:
#Create the bins
#bin_list = []
#for i in list(range(d)):
#    dim_min = math.floor(min(X[:,i]))
#    dim_max = math.ceil(max(X[:,i]))
#    dim_bin = np.linspace(start=dim_min, stop=dim_max, num=n_bins)
#    bin_list.append(dim_bin)
#    
#bin_list = np.stack(bin_list, axis=0)

In [232]:
#Quantize data into the bins
#data_quant = []
#index_key = []
#for b in list(range(d)):
#    inds = np.digitize(X[:,b], bin_list[b])
#    index_key.append(inds)
#    data_quant.append(np.bincount(inds, minlength = n_bins))
#
#index_key = np.stack(index_key,axis=0)
#data_quant = np.stack(data_quant, axis=0)