In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import PIL

import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.utils import save_image
from torchsummary import summary

from pushover import notify
from utils import makegif
from random import randint

from IPython.display import Image
from IPython.core.display import Image, display

import pretty_midi
import glob
import pickle

from keras.layers import Lambda, Input, Dense
from keras.models import Model
from keras.losses import mse, binary_crossentropy
from keras.utils import plot_model
from keras import backend as K

import numpy as np
import matplotlib.pyplot as plt
import argparse
import os
import matplotlib.cm as cm

%load_ext autoreload
%autoreload 2

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
bs = 32 # batchsize

In [None]:
def get_filename(filepath):
    base = os.path.basename(filepath)
    return os.path.splitext(base)[0]

In [None]:
def get_piano_rolls():
    pr = []
    for file in glob.glob("../type0/*.midi"):
        pm = pretty_midi.PrettyMIDI(file) #takes a midi file and converts to pretty_midi
        instr = pm.instruments #splits into list of instruments
        print("Processing ", file)
        for instrument in instr:
            name = pretty_midi.program_to_instrument_name(instrument.program)
            if name == "Acoustic Grand Piano": #only take the piano track
                piano_roll = instrument.get_piano_roll() #get the piano roll, which is a np.ndarray
                pr.append(piano_roll)  
                roll = torch.from_numpy(piano_roll)
#                 roll = (piano_roll[:,:]>0).astype(int)
#                 filename = get_filename(file)
#                 export_piano_rolls(roll, filename, output_t_size=128)
                return [roll]
    return pr #list of piano rolls (np.ndarray matrices)
piano_rolls = get_piano_rolls()

In [None]:
# # Load Data
# dataset = datasets.ImageFolder(root='trainings/roll_imgs_partial', transform=transforms.Compose([
# #     transforms.Resize(64),
#     transforms.ToTensor(), 
# ]))
# dataloader = torch.utils.data.DataLoader(dataset, batch_size=bs, shuffle=True)
# len(dataset.imgs), len(dataloader)
# # size of input = 3 x 128 x 128


In [46]:
# print(dataset[1][0].shape)
HSIZE = 2048 #9216 # 1024
ZDIM = 32

In [47]:
class Flatten(nn.Module):
    def forward(self, input):
#         print("flatten: ", input.shape)
        return input.view(input.size(0), -1)

In [48]:
class UnFlatten(nn.Module):
    def forward(self, input, size=HSIZE):
        return input.view(input.size(0), size, 1, 1)

In [62]:
class VAE(nn.Module):
    def __init__(self, image_channels=1, h_dim=HSIZE, z_dim=ZDIM):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(image_channels, 32, kernel_size=4, stride=2), # -> [32, 32, 31, 31] 63
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), # -> [32, 64, 14, 14] 31
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2), # -> [32, 128, 6, 6] 14
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2), # -> [32, 256, 2, 2] 6
            nn.ReLU(), 
            nn.Conv2d(256, 512, kernel_size=4, stride=2), # -> Null -> [32, 512, 2, 2]  -> [1, 512, 2, 2]
            nn.ReLU(), 
            Flatten() # -> [32, 1024]  -> [32, 2048] -> [1, 2048]
        )
        
        self.fc1 = nn.Linear(h_dim, z_dim)
        self.fc2 = nn.Linear(h_dim, z_dim)
        self.fc3 = nn.Linear(z_dim, h_dim)
        
        self.decoder = nn.Sequential(
            UnFlatten(), 
            nn.ConvTranspose2d(h_dim, 256 , kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=6, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(32, image_channels, kernel_size=6, stride=2),
            nn.Sigmoid(),
        )
#         self.decoder = nn.Sequential(
#             UnFlatten(),
#             nn.ConvTranspose2d(h_dim, 128, kernel_size=5, stride=2),
#             nn.ReLU(),
#             nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2),
#             nn.ReLU(),
#             nn.ConvTranspose2d(64, 32, kernel_size=6, stride=2),
#             nn.ReLU(),
#             nn.ConvTranspose2d(32, image_channels, kernel_size=6, stride=2),
#             nn.Sigmoid(),
#         )
        
    def reparameterize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        # return torch.normal(mu, std)
        esp = torch.randn(*mu.size())
        z = mu + std * esp
        return z
    
    def bottleneck(self, h):
        mu, logvar = self.fc1(h), self.fc2(h)
#         print("bottle: ",mu.shape, logvar.shape)
        z = self.reparameterize(mu, logvar)
        return z, mu, logvar

    def encode(self, x):
        print("======== Encode ========", x.shape)
        h = self.encoder(x)
        print("enc(x): ", h.shape)
        z, mu, logvar = self.bottleneck(h)
        print("z.shape: ", z.shape)
        return z, mu, logvar

    def decode(self, z):
        print("======== Decode ========", z.shape)
        z = self.fc3(z)
        print("fc3(z).shape: ", z.shape)
        z = self.decoder(z)
        print("decode(fc3(z)).shape: ", z.shape)
        return z

    def forward(self, x):
        z, mu, logvar = self.encode(x)
#         print(z.shape)
        z = self.decode(z)
#         print(z.shape, mu.shape, logvar.shape)
        return z, mu, logvar

In [63]:
vae = VAE(image_channels=1).to(device)
# model.load_state_dict(torch.load('vae.torch', map_location='cpu'))

In [64]:
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)

In [65]:
def loss_fn(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x, x, size_average=False)
    # BCE = F.mse_loss(recon_x, x, size_average=False)

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE + KLD, BCE, KLD

In [66]:
epochs = 1

In [68]:
for epoch in range(epochs):
    for idx, images in enumerate(piano_rolls):
        img = images[:, :128].type(torch.FloatTensor).view(1, 1, 128, 128)
#         print(img.shape, img[0][0][0][0])
        recon_images, mu, logvar = vae(img)
        
        loss, bce, kld = loss_fn(recon_images, img, mu, logvar)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

#         to_print = "Epoch[{}/{}] Loss: {:.3f} {:.3f} {:.3f}".format(epoch+1, 
#                                 epochs, loss.data[0]/bs, bce.data[0]/bs, kld.data[0]/bs)
        to_print = "Epoch[{}/{}] Loss: {:.3f} {:.3f} {:.3f}".format(epoch+1, 
                                epochs, loss.data/bs, bce.data/bs, kld.data/bs)
    print(to_print)

# notify to android when finished training
notify(to_print, priority=1)



RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 4, 4], but got 3-dimensional input of size [1, 128, 128] instead

In [25]:
torch.save(vae.state_dict(), 'models/vae.torch-alb-nmats-{}-epochs_{}'.format(len(dataset.imgs), epochs))

NameError: name 'dataset' is not defined