In [1]:
import sys
sys.path.append("..")

In [2]:
# import "aataset" (is in upper folder)
import dataset
import models
import config
from torch import nn
# import summary
import torch
from torchsummary import summary

  from .autonotebook import tqdm as notebook_tqdm
2023-08-01 11:55:12.892580: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-01 11:55:12.933989: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# get a sample from the dataset
dataloader = dataset.get_dataloader()
spectrogram, caption = next(iter(dataloader))
spectrogram = spectrogram.to(config.DEVICE)

torch.Size([1, 513, 431])


In [4]:
# run spectrogram through vae encoder
vae = models.VAE()

encodings_real = vae.encode(spectrogram)
encodings_real = encodings_real.latent_dist.mode()
# print shape
print(encodings_real.shape)

torch.Size([1, 8, 128, 107])


In [5]:
# generate a fake sample
ngpu = torch.cuda.device_count()
# Create the generator
netG = models.Generator(vae, ngpu).to(config.DEVICE)

# Handle multi-GPU if desired
if (ngpu > 1):
    netG = nn.DataParallel(netG, list(range(ngpu)))

# Apply the ``weights_init`` function to randomly initialize all weights
#  to ``mean=0``, ``stdev=0.02``.
netG.apply(models._weights_init)

# apply the model to the fixed noise
fake = netG(config.FIXED_NOISE).detach().cpu()
# print the shape
print(fake.shape)

torch.Size([1, 8, 128, 107])


In [6]:
# now, the summary of the netd
# Create the Discriminator
netD = models.Discriminator(ngpu).to(config.DEVICE)

# Handle multi-GPU if desired
if (ngpu > 1):
    netD = nn.DataParallel(netD, list(range(ngpu)))

# Apply the ``weights_init`` function to randomly initialize all weights
# like this: ``to mean=0, stdev=0.2``.
netD.apply(models._weights_init)

# Print the model
summary(netD, (8, 128, 107), 1)
print(f"Number of trainable parameters: {sum(p.numel() for p in netD.parameters() if p.requires_grad)}")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [1, 64, 64, 54]           4,608
       BatchNorm2d-2            [1, 64, 64, 54]             128
         LeakyReLU-3            [1, 64, 64, 54]               0
            Conv2d-4           [1, 128, 32, 27]          73,728
       BatchNorm2d-5           [1, 128, 32, 27]             256
         LeakyReLU-6           [1, 128, 32, 27]               0
            Conv2d-7           [1, 256, 16, 14]         294,912
       BatchNorm2d-8           [1, 256, 16, 14]             512
         LeakyReLU-9           [1, 256, 16, 14]               0
           Conv2d-10               [1, 1, 8, 7]           2,304
          Sigmoid-11               [1, 1, 8, 7]               0
          Flatten-12                    [1, 56]               0
           Linear-13                     [1, 1]              57
          Sigmoid-14                   

In [8]:
# run the real and fake data through the discriminator
real_result = netD(encodings_real)
fake_result = netD(fake)

# print the results
print(f"Real result: {real_result}")
print(f"Fake result: {fake_result}")

Real result: tensor([[0.4088]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Fake result: tensor([[0.3835]], device='cuda:0', grad_fn=<SigmoidBackward0>)
