In [1]:
import torch
from vector_quantize_pytorch import VectorQuantize

vq = VectorQuantize(
    dim = 256,
    codebook_size = 512,     # codebook size
    decay = 0.8,             # the exponential moving average decay, lower means the dictionary will change faster
    commitment_weight = 1.   # the weight on the commitment loss
)

x = torch.randn(1, 1024, 256)
quantized, indices, commit_loss = vq(x) # (1, 1024, 256), (1, 1024), (1)

In [2]:
vq.codebook.shape


tensor([[-4.2240e+02,  2.0489e+02, -2.8715e+02,  ...,  5.2907e+02,
         -9.7509e+01,  3.7039e+02],
        [-1.2278e-01, -8.9855e-01,  3.0815e-01,  ..., -9.4204e-01,
         -1.7013e+00, -1.0491e+00],
        [ 3.1769e-01, -4.2162e-01, -7.4232e-01,  ..., -4.9752e-01,
          1.3512e+00, -1.7010e+00],
        ...,
        [ 6.9604e-02,  1.6588e+00,  2.1905e-02,  ..., -1.7449e+00,
         -3.1762e-01,  1.2220e-01],
        [-3.9274e-01, -3.9676e-01,  7.9910e-01,  ...,  9.4429e-02,
         -1.8215e-01, -9.2676e-02],
        [ 8.1358e-02,  3.0969e-01,  1.3196e-02,  ...,  9.8286e-01,
          7.9515e-01,  9.7848e-02]])

In [6]:
import torch
from vector_quantize_pytorch import ResidualVQ

residual_vq = ResidualVQ(
    dim = 256,
    num_quantizers = 8,      # specify number of quantizers
    codebook_size = 1024,    # codebook size
)

x = torch.randn(1, 1024, 256)

quantized, indices, commit_loss = residual_vq(x)

# (1, 1024, 256), (1, 1024, 8), (1, 8)
# (batch, seq, dim), (batch, seq, quantizer), (batch, quantizer)

# if you need all the codes across the quantization layers, just pass return_all_codes = True

quantized, indices, commit_loss, all_codes = residual_vq(x, return_all_codes = True)

# *_, (8, 1, 1024, 256)
# all_codes - (quantizer, batch, seq, dim)

In [14]:
quantized.shape


torch.Size([1, 1024, 256])

In [26]:
import torch
from vector_quantize_pytorch import LFQ

# you can specify either dim or codebook_size
# if both specified, will be validated against each other

quantizer = LFQ(
    codebook_size = 65536,      # codebook size, must be a power of 2
    dim = 16,                   # this is the input feature dimension, defaults to log2(codebook_size) if not defined
    entropy_loss_weight = 0.1,  # how much weight to place on entropy loss
    diversity_gamma = 1.        # within entropy loss, how much weight to give to diversity of codes, taken from https://arxiv.org/abs/1911.05894
)



In [None]:
image_feats = torch.randn(1, 16, 32, 32)

quantized, indices, entropy_aux_loss = quantizer(image_feats, inv_temperature=100.)  # you may want to experiment with temperature

# (1, 16, 32, 32), (1, 32, 32), (1,)

assert image_feats.shape == quantized.shape
assert (quantized == quantizer.indices_to_codes(indices)).all()

In [None]:
image_feats = torch.randn(1, 16, 32, 32)

quantized, indices, entropy_aux_loss = quantizer(image_feats, inv_temperature=100.)  # you may want to experiment with temperature

# (1, 16, 32, 32), (1, 32, 32), (1,)

assert image_feats.shape == quantized.shape
assert (quantized == quantizer.indices_to_codes(indices)).all()

In [21]:
seq = torch.randn(1, 512, 16)
quantized, *_ = quantizer(seq)

assert seq.shape == quantized.shape

# video_feats = torch.randn(1, 16, 10, 32, 32)
# quantized, *_ = quantizer(video_feats)

# assert video_feats.shape == quantized.shape

In [25]:
quantized

tensor([[[ 1.,  1., -1.,  ..., -1., -1.,  1.],
         [-1.,  1., -1.,  ...,  1., -1., -1.],
         [ 1., -1.,  1.,  ..., -1., -1.,  1.],
         ...,
         [-1.,  1.,  1.,  ...,  1., -1., -1.],
         [-1.,  1.,  1.,  ...,  1.,  1., -1.],
         [ 1., -1.,  1.,  ...,  1.,  1.,  1.]]])

In [None]:
import torch
from vector_quantize_pytorch import LFQ

quantizer = LFQ(
    codebook_size = 4096,
    dim = 16,
    num_codebooks = 4  # 4 codebooks, total codebook dimension is log2(4096) * 4
)

image_feats = torch.randn(1, 16, 32, 32)

quantized, indices, entropy_aux_loss = quantizer(image_feats)

# (1, 16, 32, 32), (1, 32, 32, 4), (1,)

assert image_feats.shape == quantized.shape
assert (quantized == quantizer.indices_to_codes(indices)).all()

In [4]:
import torch
from vector_quantize_pytorch import latent_quantization

# you can specify either dim or codebook_size
# if both specified, will be validated against each other

quantizer = latent_quantization(
    levels = [5, 5, 8],      # number of levels per codebook dimension
    dim = 16,                   # input dim
    commitment_loss_weight=0.1,  
    quantization_loss_weight=0.1,
)
seq = torch.randn(1, 32, 16)
quantized, *_ = quantizer(seq)

assert seq.shape == quantized.shape

# video_feats = torch.randn(1, 16, 10, 32, 32)
# quantized, *_ = quantizer(video_feats)

# assert video_feats.shape == quantized.shape

# image_feats = torch.randn(1, 16, 32, 32)

# quantized, indices, loss = quantizer(image_feats)

# # (1, 16, 32, 32), (1, 32, 32), (1,)

# assert image_feats.shape == quantized.shape
# assert (quantized == quantizer.indices_to_codes(indices)).all()

TypeError: 'module' object is not callable

In [7]:
import torch
from vector_quantize_pytorch import latent_quantization

In [8]:
import torch
from vector_quantize_pytorch import latent_quantization

levels = [4, 8, 16]
dim = 9
num_codebooks = 3

model = latent_quantization(levels, dim, num_codebooks=num_codebooks)

input_tensor = torch.randn(2, 3, dim)
output_tensor, indices, loss = model(input_tensor)

assert output_tensor.shape == input_tensor.shape
assert indices.shape == (2, 3, num_codebooks)
assert loss.item() >= 0

TypeError: 'module' object is not callable