### SoftKMeans Examples

In [None]:
!pip install torch-kmeans

In [1]:
# imports
import numpy as np
import torch
from sklearn.datasets import make_blobs
from torch_kmeans import SoftKMeans


In [2]:
# function to generate some clustering data
def get_data(bs: int = 1,
             n: int = 20,
             d: int = 2,
             k: int = 4,
             different_k: bool = False,
             k_lims = (2, 5),
             add_noise: bool = True,
             fp_dtype = torch.float32,
             seed: int = 42):
    torch.manual_seed(seed)
    if different_k:
        a, b = k_lims
        k = torch.randint(low=a, high=b, size=(bs,)).long()
    else:
        k = torch.empty(bs).fill_(k).long()

    # generate pseudo clustering data
    x, y = [], []
    for i, k_ in enumerate(k.numpy()):
        x_, y_ = make_blobs(n_samples=n, centers=k_, n_features=d, random_state=seed+i)
        x.append(x_)
        y.append(y_)
    x = torch.from_numpy(np.stack(x, axis=0))
    y = torch.from_numpy(np.stack(y, axis=0))
    if add_noise:
        x += torch.randn(x.size())

    return x.to(fp_dtype), y, k


In [3]:
# create some data (BS, N, D)
# i.e. 1 instance with N=20 points and D=2 features
BS = 1
K = 4
x, y, _ = get_data(bs=BS, n=20, d=2, k=K, different_k=False)


In [4]:
model = SoftKMeans()
result = model(x, k=K)
# the labels are inferred via argmax
print(result.labels)
# the soft (fuzzy) labels are given in the soft assignment tensor
print(result.soft_assignment)

Full batch converged at iteration 13/100 with center shifts = tensor([7.0482e-06]).
tensor([[2, 2, 3, 2, 1, 0, 2, 1, 3, 1, 0, 3, 0, 3, 0, 2, 0, 1, 3, 1]])
tensor([[[4.9242e-01, 6.6634e-04, 4.9243e-01, 1.4482e-02],
         [4.9830e-01, 2.1706e-03, 4.9830e-01, 1.2309e-03],
         [1.2966e-03, 8.5425e-05, 1.2967e-03, 9.9732e-01],
         [4.9823e-01, 1.5513e-03, 4.9823e-01, 1.9898e-03],
         [1.3596e-02, 9.7274e-01, 1.3595e-02, 7.3166e-05],
         [4.7792e-01, 4.4000e-02, 4.7791e-01, 1.7134e-04],
         [4.9830e-01, 1.7766e-03, 4.9831e-01, 1.6194e-03],
         [4.2583e-03, 9.9136e-01, 4.2580e-03, 1.2031e-04],
         [1.9370e-03, 7.6187e-05, 1.9371e-03, 9.9605e-01],
         [5.3002e-03, 9.8930e-01, 5.2999e-03, 1.0367e-04],
         [4.8609e-01, 2.7626e-02, 4.8608e-01, 1.9741e-04],
         [4.0914e-03, 7.2767e-05, 4.0916e-03, 9.9174e-01],
         [4.9468e-01, 1.0342e-02, 4.9467e-01, 3.1007e-04],
         [4.6491e-04, 2.1149e-04, 4.6493e-04, 9.9886e-01],
         [4.9521e-0

One nice feature of the SoftKMeans implementation is,
that it is completely differentiable w.r.t. its inputs
which enables its use in a neural network model.
See [https://papers.nips.cc/paper/2019/hash/8bd39eae38511daad6152e84545e504d-Abstract.html](https://papers.nips.cc/paper/2019/hash/8bd39eae38511daad6152e84545e504d-Abstract.html)
for more infos.

In [5]:
# enable gradient propagation on input
x1 = x.clone()
x1.requires_grad = True
print(x1.requires_grad)

result = model(x1, k=K)
out = result.soft_assignment
print(out.requires_grad)
# compute loss
loss_fn = torch.nn.CrossEntropyLoss()
loss = loss_fn(
    out.permute(0, 2, 1),   # expects (BS, C, N)
    y
)
loss.backward()
print(x1.grad)

True
Full batch converged at iteration 13/100 with center shifts = tensor([7.0482e-06]).
True
tensor([[[ 2.4434e-04,  2.3269e-05],
         [ 1.1727e-04,  6.1593e-05],
         [-1.5717e-05,  6.4270e-05],
         [ 1.6618e-04,  6.9853e-05],
         [ 3.1087e-04, -5.6229e-04],
         [ 1.0758e-04,  1.8572e-04],
         [ 1.0508e-04,  4.8672e-05],
         [ 1.0294e-04, -1.0992e-04],
         [-7.6657e-06,  2.2855e-05],
         [ 1.3412e-04, -1.5893e-04],
         [ 1.0319e-04,  1.5037e-04],
         [ 1.8031e-05, -3.6327e-05],
         [ 8.5447e-05,  8.8168e-05],
         [ 5.6830e-06,  1.0594e-04],
         [ 8.6794e-05,  8.6030e-05],
         [ 1.6743e-04,  3.5864e-05],
         [ 8.8507e-05,  1.3848e-04],
         [ 1.1673e-04, -1.2019e-04],
         [-1.6715e-06,  8.6334e-05],
         [ 1.8456e-04, -2.5920e-04]]])
