In [1]:
import pickle
import math
import numpy as np
import torch
import matplotlib.pyplot as plt
import higra as hg
%matplotlib inline
import time

from IPython import embed

In [3]:
device = torch.device('cuda')
print(f'Using device={device}')

Using device=cuda


In [4]:
def hac_gpu_single(X, _MAX_DIST=10, verbose=False, device=device):
    # Initialization
    D = X.size(1)
    # Take the upper triangular and mask the other values with a large number
    Y = _MAX_DIST*torch.ones(D,D, device=device).tril() + X.triu(1)
    parents = torch.arange(D+(D-1), device=device)
    parent_to_idx = torch.arange(D+(D-1), device=device)
    idx_to_parent = torch.arange(D, device=device)
    values, indices = torch.min(Y, dim=1)
    
    if verbose:
        print('Initialization:')
        print('Y:', Y)
        print('\tparents:', parents)
        print('\tparent_to_idx:', parent_to_idx)
        print('\tidx_to_parent:', idx_to_parent)
        print('\tminima (values):', values)
        print('\tminima (indices):', indices)
        print()
    
    ####################################
    
    max_node = D-1
    if verbose:
        print('Starting algorithm:')
    for i in range(D-1):
        max_node += 1
        min_minima_idx = torch.argmin(values).item()

        # Merge the index of the minimum value of minimums across rows with the index of the minimum value in its row
        merge_idx_1 = min_minima_idx
        merge_idx_2 = indices[merge_idx_1].item()

        # Find highest-altitude clusters corresponding to the merge indices
        parent_1 = idx_to_parent[parent_to_idx[idx_to_parent[merge_idx_1]]]
        parent_2 = idx_to_parent[parent_to_idx[idx_to_parent[merge_idx_2]]]

        if verbose:
            print(f'    #{i} Merging:',(merge_idx_1, merge_idx_2),'i.e.', (parent_1.item(), parent_2.item()), 
                  '=>', max_node)

        # Add parent for the clusters being merged
        parents[parent_1] = max_node
        parents[parent_2] = max_node

        # Update mappings
        idx_to_parent[merge_idx_1] = max_node
        parent_to_idx[max_node] = merge_idx_1

        # Update the matrix with merged values for cluster similarities
        max_dist_mask = Y == _MAX_DIST
        new_merge_idx_1_values = torch.min(torch.min(Y[merge_idx_1, :], Y[:, merge_idx_2]), 
                                           torch.min(Y[:, merge_idx_1], Y[merge_idx_2, :]))
        Y[:, merge_idx_1] = new_merge_idx_1_values
        Y[merge_idx_1, :] = new_merge_idx_1_values
        Y[max_dist_mask] = _MAX_DIST
        Y[:, merge_idx_2] = _MAX_DIST
        Y[merge_idx_2, :] = _MAX_DIST

        # Update nearest neighbour trackers
        values[merge_idx_2] = _MAX_DIST
        indices[indices == merge_idx_2] = merge_idx_1
        new_min_idx = torch.argmin(Y[merge_idx_1, merge_idx_1+1:]) + (merge_idx_1 + 1)
        values[min_minima_idx] = Y[merge_idx_1, new_min_idx]
        indices[min_minima_idx] = new_min_idx

        if verbose:
            print('Y:', Y)
            print('\tminima (values):', values)
            print('\tminima (indices):', indices)
            print('\tparents:', parents)
            print('\tparent_to_idx:', parent_to_idx)
            print('\tidx_to_parent:', idx_to_parent)
            print()
    
    return parents

In [6]:
def get_parents_from_higra(X, linkage):
    _g = hg.UndirectedGraph(D)
    _g.add_edges(torch.triu_indices(D,D,1).numpy()[0], torch.triu_indices(D,D,1).numpy()[1])
    _data = X.cpu()[torch.triu_indices(D,D,1)[0],torch.triu_indices(D,D,1)[1]].numpy()
    hg_func = {
        'single': hg.binary_partition_tree_single_linkage,
        'average': hg.binary_partition_tree_average_linkage,
    }
    _hg_hac = hg_func[linkage](_g, _data)
    return _hg_hac[0].parents()

In [184]:
# torch.manual_seed(15)

# Construct a random, symmetric matrix of values between 0 and 1
D = 4000
X = torch.rand((D,D), device=device)
X = X.triu(1) + X.triu(1).T + torch.eye(D, device=device)
print(X)

tensor([[1.0000, 0.5314, 0.7572,  ..., 0.5863, 0.6819, 0.4844],
        [0.5314, 1.0000, 0.4148,  ..., 0.2384, 0.0924, 0.4560],
        [0.7572, 0.4148, 1.0000,  ..., 0.2976, 0.1460, 0.8615],
        ...,
        [0.5863, 0.2384, 0.2976,  ..., 1.0000, 0.5498, 0.2335],
        [0.6819, 0.0924, 0.1460,  ..., 0.5498, 1.0000, 0.8720],
        [0.4844, 0.4560, 0.8615,  ..., 0.2335, 0.8720, 1.0000]],
       device='cuda:0')


In [183]:
# Test suite to verify accuracy of solution

for D in range(10,101,10):
    # D = 5
    X = torch.rand((D,D), device=device)
    X = X.triu(1) + X.triu(1).T + torch.eye(D, device=device)

    # Get HAC parents from GPU code
    _parents = hac_gpu_single(X, verbose=False)
#     print('GPU HAC parents:', _parents.tolist())

    # Get HAC parents from Higra
    _hg_parents = get_parents_from_higra(X, linkage='single')
#     print('Higra HAC parents:', list(_hg_parents))

    assert np.array_equal(_parents.tolist(), list(_hg_parents))

#### Measure performance difference

In [None]:
# Ours: Single-linkage

# %%timeit

torch.cuda.synchronize()
torch.cuda.synchronize()

a = time.perf_counter()

_parents = hac_gpu(X, verbose=False)

b = time.perf_counter()
print('{:.02e}s'.format(b - a))

In [None]:
# Higra: Single-linkage

# %%timeit

a = time.perf_counter()

_hg_parents = get_parents_from_higra(X, linkage='single')

b = time.perf_counter()
print('{:.02e}s'.format(b - a))

In [70]:
# STABLE; v1

def hac_gpu_avg(X, _MAX_DIST=10, verbose=False, device=device):
    # Initialization
    D = X.size(1)
    # Take the upper triangular and mask the other values with a large number
    Y = _MAX_DIST*torch.ones(D,D, device='cuda').tril() + X.triu(1)
    parents = torch.arange(D+(D-1), device=device)
    parent_to_idx = torch.arange(D+(D-1), device=device)
    idx_to_parent = torch.arange(D, device=device)
    cluster_sizes = torch.ones(D+(D-1), device=device)
    values, indices = torch.min(Y, dim=1)
    
    if verbose:
        print('Initialization:')
        print('Y:', Y)
        print('\tparents:', parents)
        print('\tparent_to_idx:', parent_to_idx)
        print('\tidx_to_parent:', idx_to_parent)
        print('\tminima (values):', values)
        print('\tminima (indices):', indices)
        print('\tcluster_sizes:', cluster_sizes)
        print()
    
    ####################################
    
    max_node = D-1
    if verbose:
        print('Starting algorithm:')
    for i in range(D-1):
        max_node += 1
        min_minima_idx = torch.argmin(values).item()

        # Merge the index of the minimum value of minimums across rows with the index of the minimum value in its row
        merge_idx_1 = min_minima_idx
        merge_idx_2 = indices[merge_idx_1].item()

        # Find highest-altitude clusters corresponding to the merge indices
        parent_1 = idx_to_parent[parent_to_idx[idx_to_parent[merge_idx_1]]].item()
        parent_2 = idx_to_parent[parent_to_idx[idx_to_parent[merge_idx_2]]].item()

        if verbose:
            print(f'    #{i} Merging:',(merge_idx_1, merge_idx_2),'i.e.', (parent_1, parent_2), '=>', max_node)

        # Add parent for the clusters being merged
        parents[parent_1] = max_node
        parents[parent_2] = max_node

        # Update mappings
        idx_to_parent[merge_idx_1] = max_node
        parent_to_idx[max_node] = merge_idx_1

        # Update the matrix with merged values for cluster similarities
        max_dist_mask = Y == _MAX_DIST
        new_cluster_size = cluster_sizes[parent_1] + cluster_sizes[parent_2]
        cluster_sizes[max_node] = new_cluster_size
        new_merge_idx_1_values = (torch.min(Y[merge_idx_1, :], Y[:, merge_idx_1]) * cluster_sizes[parent_1] + \
                                  torch.min(Y[:, merge_idx_2], Y[merge_idx_2, :]) * cluster_sizes[parent_2]) / \
                                    new_cluster_size
        Y[:, merge_idx_1] = new_merge_idx_1_values
        Y[merge_idx_1, :] = new_merge_idx_1_values
        Y[max_dist_mask] = _MAX_DIST
        Y[:, merge_idx_2] = _MAX_DIST
        Y[merge_idx_2, :] = _MAX_DIST

        # Update nearest neighbour trackers
        values[merge_idx_2] = _MAX_DIST
        
        max_dist_mask = values == _MAX_DIST
        values, indices = torch.min(Y, dim=1)
        values[max_dist_mask] = _MAX_DIST

        if verbose:
            print('Y:', Y)
            print('\tminima (values):', values)
            print('\tminima (indices):', indices)
            print('\tparents:', parents)
            print('\tparent_to_idx:', parent_to_idx)
            print('\tidx_to_parent:', idx_to_parent)
            print('\tcluster_sizes:', cluster_sizes)
            print()
    
    return parents

In [76]:
# Construct a random, symmetric matrix of values between 0 and 1
D = 4000
X = torch.rand((D,D), device=device)
X = X.triu(1) + X.triu(1).T + torch.eye(D, device=device)
print(X)

tensor([[1.0000, 0.8396, 0.3640,  ..., 0.9660, 0.3892, 0.4684],
        [0.8396, 1.0000, 0.1113,  ..., 0.4993, 0.6405, 0.4826],
        [0.3640, 0.1113, 1.0000,  ..., 0.8844, 0.7767, 0.3587],
        ...,
        [0.9660, 0.4993, 0.8844,  ..., 1.0000, 0.7781, 0.7506],
        [0.3892, 0.6405, 0.7767,  ..., 0.7781, 1.0000, 0.9799],
        [0.4684, 0.4826, 0.3587,  ..., 0.7506, 0.9799, 1.0000]],
       device='cuda:0')


In [75]:
# Get HAC parents from GPU code
_parents = hac_gpu_avg(X, verbose=False, device='cpu')
print('GPU HAC parents:', _parents.tolist())

# Get HAC parents from Higra
_hg_parents = get_parents_from_higra(X, linkage='average')
print('Higra HAC parents:', list(_hg_parents))

assert np.array_equal(_parents.tolist(),list(_hg_parents))

GPU HAC parents: [6, 8, 6, 7, 8, 7, 9, 9, 10, 10, 10]
Higra HAC parents: [6, 8, 6, 7, 8, 7, 9, 9, 10, 10, 10]


In [193]:
# Test suite

for D in range(10,101,10):
    X = torch.rand((D,D), device=device)
    X = X.triu(1) + X.triu(1).T + torch.eye(D, device=device)

    # Get HAC parents from GPU code
    _parents = hac_gpu_avg(X, verbose=False)
#     print('GPU HAC parents:', _parents.tolist())

    # Get HAC parents from Higra
    _hg_parents = get_parents_from_higra(X, linkage='average')
#     print('Higra HAC parents:', list(_hg_parents))

    assert np.array_equal(_parents.tolist(), list(_hg_parents))

#### Measure performance difference

In [78]:
# Ours: Average-linkage

# %%timeit

torch.cuda.synchronize()
torch.cuda.synchronize()

a = time.perf_counter()

_parents = hac_gpu_avg(X, verbose=False, device='cpu')

b = time.perf_counter()
print('{:.02e}s'.format(b - a))

8.46e+00s


In [192]:
# Higra: Average-linkage

# %%timeit

a = time.perf_counter()

_hg_parents = get_parents_from_higra(X, linkage='average')

b = time.perf_counter()
print('{:.02e}s'.format(b - a))

3.86e+01s


In [151]:
# Direct hac-cut rounding

def avg_hac_cut(X, weights, _MAX_DIST=10, verbose=False, device='cpu', use_similarities=False):
    # Initialization
    D = X.size(1)
    parents = torch.arange(D+(D-1))
    parent_to_idx = torch.arange(D+(D-1))
    idx_to_parent = torch.arange(D)
    cluster_sizes = torch.ones(D+(D-1))
    
    energy = torch.zeros(D+(D-1), device=device)
    clustering = torch.zeros((D+(D-1), D))
    clustering[torch.arange(D),torch.arange(D)] = torch.arange(1,D+1, dtype=clustering.dtype)
    round_matrix = torch.eye(D, device=device)
    
    # Take the upper triangular and mask the other values with a large number
    Y = _MAX_DIST*torch.ones(D,D, device=device).tril() + (-1 if use_similarities else 1) * X.triu(1)
    # Compute the dissimilarity minima per row
    values, indices = torch.min(Y, dim=1)
    
    if verbose:
        print('Initialization:')
        print('Y:', Y)
        print('\tparents:', parents)
        print('\tparent_to_idx:', parent_to_idx)
        print('\tidx_to_parent:', idx_to_parent)
        print('\tminima (values):', values)
        print('\tminima (indices):', indices)
        print('\tcluster_sizes:', cluster_sizes)
        print()
    
    ####################################
    
    max_node = D-1
    if verbose:
        print('Starting algorithm:')
    for i in range(D-1):
        max_node += 1
        min_minima_idx = torch.argmin(values).item()

        # Merge the index of the minimum value of minimums across rows with the index of the minimum value in its row
        merge_idx_1 = min_minima_idx
        merge_idx_2 = indices[merge_idx_1].item()

        # Find highest-altitude clusters corresponding to the merge indices
        parent_1 = idx_to_parent[parent_to_idx[idx_to_parent[merge_idx_1]]].item()
        parent_2 = idx_to_parent[parent_to_idx[idx_to_parent[merge_idx_2]]].item()

        if verbose:
            print(f'    #{i} Merging:',(merge_idx_1, merge_idx_2),'i.e.', (parent_1, parent_2), '=>', max_node)

        # Add parent for the clusters being merged
        parents[parent_1] = max_node
        parents[parent_2] = max_node

        # Update mappings
        idx_to_parent[merge_idx_1] = max_node
        parent_to_idx[max_node] = merge_idx_1

        # Update the matrix with merged values for cluster similarities
        max_dist_mask = Y == _MAX_DIST
        new_cluster_size = cluster_sizes[parent_1] + cluster_sizes[parent_2]
        cluster_sizes[max_node] = new_cluster_size
        new_merge_idx_1_values = (torch.min(Y[merge_idx_1, :], Y[:, merge_idx_1]) * cluster_sizes[parent_1] + \
                                  torch.min(Y[:, merge_idx_2], Y[merge_idx_2, :]) * cluster_sizes[parent_2]) / \
                                    new_cluster_size
        Y[:, merge_idx_1] = new_merge_idx_1_values
        Y[merge_idx_1, :] = new_merge_idx_1_values
        Y[max_dist_mask] = _MAX_DIST
        Y[:, merge_idx_2] = _MAX_DIST
        Y[merge_idx_2, :] = _MAX_DIST

        # Update nearest neighbour trackers
        values[merge_idx_2] = _MAX_DIST
        
        max_dist_mask = values == _MAX_DIST
        values, indices = torch.min(Y, dim=1)
        values[max_dist_mask] = _MAX_DIST
        
        # Energy calculations
        clustering[max_node] = clustering[parent_1] + clustering[parent_2]
        leaf_indices = torch.where(clustering[max_node])[0]
        leaf_edges = torch.meshgrid(leaf_indices, leaf_indices)
        energy[max_node] = energy[parent_1] + energy[parent_2]
        merge_energy = torch.sum(weights[leaf_edges])
        if merge_energy >= energy[max_node]:
            energy[max_node] = merge_energy
            clustering[max_node][clustering[max_node] > 0] = max_node
            round_matrix[leaf_edges] = 1
        
        if verbose:
            print('Y:', Y)
            print('\tminima (values):', values)
            print('\tminima (indices):', indices)
            print('\tparents:', parents)
            print('\tparent_to_idx:', parent_to_idx)
            print('\tidx_to_parent:', idx_to_parent)
            print('\tcluster_sizes:', cluster_sizes)
            print('\tclustering (current):', clustering[max_node])
            print('round_matrix:')
            print(round_matrix)
            print()
    
    return round_matrix, clustering[-1], parents

In [132]:
# Construct a random, symmetric matrix of values between 0 and 1
D = 100
X = torch.rand((D,D), device=device)
X = X.triu(1) + X.triu(1).T + torch.eye(D, device=device)
print(X)

W = torch.rand((D,D), device=device) * 2 - 1
W = W.triu(1) + torch.zeros((D,D), device=device)
print(W)

tensor([[1.0000, 0.6269, 0.0217,  ..., 0.8420, 0.4147, 0.8465],
        [0.6269, 1.0000, 0.4173,  ..., 0.8729, 0.4535, 0.3830],
        [0.0217, 0.4173, 1.0000,  ..., 0.5806, 0.1579, 0.1000],
        ...,
        [0.8420, 0.8729, 0.5806,  ..., 1.0000, 0.8355, 0.5138],
        [0.4147, 0.4535, 0.1579,  ..., 0.8355, 1.0000, 0.5435],
        [0.8465, 0.3830, 0.1000,  ..., 0.5138, 0.5435, 1.0000]],
       device='cuda:0')
tensor([[ 0.0000,  0.4299, -0.2313,  ..., -0.3950,  0.4839,  0.7758],
        [ 0.0000,  0.0000, -0.3569,  ...,  0.0936, -0.4037,  0.6479],
        [ 0.0000,  0.0000,  0.0000,  ..., -0.0942,  0.3481,  0.7604],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.8543,  0.5286],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.7540],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


In [160]:
# Ours: Average-linkage

torch.cuda.synchronize()
torch.cuda.synchronize()

a = time.perf_counter()

result = avg_hac_cut(X, W, verbose=False, device='cuda', use_similarities=True)

b = time.perf_counter()
print('{:.02e}s'.format(b - a))

7.65e-02s


In [165]:
np.array_equal(get_parents_from_higra(-X, linkage='average'), result[2])

True

In [163]:
result

(tensor([[1., 0., 1.,  ..., 1., 1., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [1., 0., 1.,  ..., 1., 1., 0.],
         ...,
         [1., 0., 1.,  ..., 1., 1., 0.],
         [1., 0., 1.,  ..., 1., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.]], device='cuda:0'),
 tensor([196., 165., 196., 196., 196., 171., 154., 196., 196., 196., 154., 196.,
         196., 196., 111., 196., 144., 196., 196., 101.,  21., 196., 196., 135.,
         154., 171., 196., 101., 165., 196., 171.,  32., 163., 196., 196., 196.,
         196., 163., 121., 111., 121., 160.,  43., 196., 196., 171., 196., 144.,
         165., 171., 196., 196., 196., 196., 196., 165., 196., 196., 196., 196.,
         165., 170., 196.,  64., 170.,  66.,  67., 105., 196., 196., 170., 163.,
         196.,  74., 170., 196., 196.,  78.,  79., 196., 163.,  82., 160., 154.,
         196.,  86., 196., 165., 160., 196.,  91., 144., 196., 105.,  95.,  96.,
         196., 196., 196., 135.]),
 tensor([151, 138, 157, 116, 127, 110,