In [1]:
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
import data_loader
import numpy as np
import sample_to_chords as s2c


### Definition VAE

In [2]:
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  
print('using',dev)
device = torch.device(dev)
class VAE(nn.Module):
    N_CHORDS = 16
    N_PITCH = 12
    N_MAIN_QUALITY = 3
    N_EXTRA_QUALITY = 3
    
    SIZE_HIDDEN = 400
    SIZE_LATENT = 40
    def __init__(self):
        super(VAE, self).__init__()

        self.fc1 = nn.Linear(self.N_CHORDS * (self.N_PITCH * self.N_MAIN_QUALITY + self.N_EXTRA_QUALITY), self.SIZE_HIDDEN)
        self.fc21 = nn.Linear(self.SIZE_HIDDEN, self.SIZE_LATENT)
        self.fc22 = nn.Linear(self.SIZE_HIDDEN, self.SIZE_LATENT)
        
        self.fc3 = nn.Linear(self.SIZE_LATENT, self.SIZE_HIDDEN)
        self.fc4 = nn.Linear(self.SIZE_HIDDEN, self.N_CHORDS * (self.N_PITCH * self.N_MAIN_QUALITY + self.N_EXTRA_QUALITY))

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        soft = nn.Sigmoid()
        return soft(self.fc4(h3).view(-1, self.N_CHORDS, self.N_PITCH * self.N_MAIN_QUALITY + self.N_EXTRA_QUALITY))

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, self.N_CHORDS * (self.N_PITCH * self.N_MAIN_QUALITY + self.N_EXTRA_QUALITY)))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

using cpu


In [3]:
# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logvar, beta):
    BCE = F.binary_cross_entropy(recon_x.view(-1, 16*(12*3 + 3)), x.view(-1, 16*(12*3 + 3)), reduction='sum')

    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE + beta*KLD

In [4]:
def train(epoch):
    model.train()
    train_loss = 0
    beta = epoch/epochs
    for batch_idx, data in enumerate(realbook_dataset):
        data = data.to(device)
        optimizer.zero_grad()
    
    
        recon_batch, mu, logvar = model(data)
        loss = loss_function(recon_batch, data, mu, logvar, beta)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), Nchunks,
                100. * batch_idx * len(data)/ Nchunks,
                loss.item() / len(data)))

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / Nchunks))

In [5]:
epochs = 10
batch_size = 128
log_interval = 100

In [6]:
realbook_dataset = data_loader.import_dataset()
Nchunks = len(realbook_dataset)
realbook_dataset = torch.split(realbook_dataset, batch_size, 0)
model = VAE().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

Dataset loaded !


### Load model

In [7]:
model.load_state_dict(torch.load("./model_realbook.pt"))

<All keys matched successfully>

### Train model

In [7]:
model.to(device)
for epoch in range(1, epochs + 1):
    train(epoch)

====> Epoch: 1 Average loss: 38.4286
====> Epoch: 2 Average loss: 22.4146
====> Epoch: 3 Average loss: 23.4053
====> Epoch: 4 Average loss: 25.9134


====> Epoch: 5 Average loss: 28.5836
====> Epoch: 6 Average loss: 31.2386
====> Epoch: 7 Average loss: 33.7572
====> Epoch: 8 Average loss: 36.1858
====> Epoch: 9 Average loss: 38.4631


====> Epoch: 10 Average loss: 40.6060


In [8]:
torch.save(model.state_dict(), "./model_realbook.pt")

### Test

In [13]:
index_test = 68
test_sample = realbook_dataset[0][index_test]

print("Vérité")
print(test_sample.shape)
print(data_loader.tensor_to_chunk(test_sample))

print()
print("Par VAE")
model.to(torch.device("cpu"))
recons_test, _, _ = model(test_sample)
print(data_loader.tensor_to_chunk(recons_test.detach()[0]))

Vérité
torch.Size([16, 39])
['A:min:min7', 'A:min:min7', 'A:min:min7', 'A:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'F:maj:maj7', 'F:maj:maj7', 'F:maj:maj7', 'F:maj:maj7']

Par VAE
['A:min:min7', 'A:min:min7', 'A:min:min7', 'A:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'E:min:min7', 'A:maj:min7', 'F:maj:min7', 'F:maj:min7', 'F:maj:maj7', 'F:maj:maj7']


### Create

In [18]:
N_LATENT = 40
with torch.no_grad():
    sample = torch.randn(1, N_LATENT)
    sample = model.decode(sample).cpu()
    sample = sample.detach()[0]
print(sample.shape)
data_loader.tensor_to_chunk(sample)

torch.Size([16, 39])


['A:min:N',
 'A:min:min7',
 'A:min:min7',
 'A:min:min7',
 'A:min:min7',
 'A:min:min7',
 'A:min:min7',
 'A:min:min7',
 'A:min:min7',
 'D:min:min7',
 'B:maj:min7',
 'G:min:min7',
 'C:maj:min7',
 'C:maj:N',
 'C:maj:N',
 'C:maj:N']