In [1]:
import torch
import torchaudio
import matplotlib.pyplot as plt
import matplotlib

In [2]:
import sys
sys.path.append('../')
from src.encodec.model import EncodecModel
from src.encodec.modules.seanet import SEANetEncoder, SEANetDecoder
from src.encodec.quantization.vq import ResidualVectorQuantizer

In [3]:
# Let's say we have a batch of audio, each 10 seconds each
batch = torch.randn(20, 1, 16000*10)

In [4]:
# Create the model
dim = 128
encoder, decoder, vq = SEANetEncoder(dimension=dim), SEANetDecoder(dimension=dim), ResidualVectorQuantizer(dim)
Model = EncodecModel(
    encoder=encoder,
    decoder=decoder,
    quantizer=vq,
    target_bandwidths=[6.0, 8.0, 10.0, 12.0, 14.0, 16.0],
    sample_rate=16000,
    channels=1
)

In [5]:
# Put batch and model on GPU
batch = batch.cuda()
Model = Model.cuda()

In [6]:
Model(batch)

tensor([[[-0.0789, -0.0704, -0.0792,  ..., -0.0790, -0.0896, -0.0600]],

        [[-0.0789, -0.0704, -0.0792,  ..., -0.0790, -0.0896, -0.0600]],

        [[-0.0789, -0.0704, -0.0792,  ..., -0.0790, -0.0896, -0.0600]],

        ...,

        [[-0.0789, -0.0704, -0.0792,  ..., -0.0790, -0.0896, -0.0600]],

        [[-0.0789, -0.0704, -0.0792,  ..., -0.0790, -0.0896, -0.0600]],

        [[-0.0789, -0.0704, -0.0792,  ..., -0.0790, -0.0896, -0.0600]]],
       device='cuda:0', grad_fn=<SliceBackward0>)

In [7]:
# Adam optimizer
optimizer = torch.optim.Adam(Model.parameters(), lr=1e-4)

In [8]:
# Do a single pass and loss step to see gpu memory usage
optimizer.zero_grad()
loss = Model(batch).mean()
loss.backward()
optimizer.step()

In [10]:
# Get memory usage
print(f"{torch.cuda.memory_allocated() / 1e9} GB")

9.257893376 GB
