## For using Google Drive use next two cells (you don't need them in .py file)

In [2]:
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


## Path to your data on Google Drive

In [3]:
%cd /content/gdrive/My Drive/HW5

/content/gdrive/My Drive/HW5


In [4]:
from __future__ import print_function, division, absolute_import, unicode_literals
import six
import os
import numpy as np
import Data
import torch
import torchvision.transforms as transforms
import torch.nn as nn
#import argparse       #you don't need it in Colab
import torch.nn.functional as F

import matplotlib.pyplot as plt
from tqdm import tqdm, trange

In [17]:
class ModelSingleStep(torch.nn.Module):
    def __init__(self, blockSize):
        super(ModelSingleStep, self).__init__()
        self.blockSize = blockSize

        ###################################
        # define your layers here
        ###################################

        ###################################

        self.initParams()

    def initParams(self):
        for param in self.parameters():
            if len(param.shape) > 1:
                torch.nn.init.xavier_normal_(param)

    def encode(self, x):
        ###################################
        # implement the encoder
        ###################################

        ###################################
        return h

    def decode(self, h):
        ###################################
        # implement the decoder
        ###################################

        ###################################
        return o

    def forward(self, x):
        # glue the encoder and the decoder together
        h = self.encode(x)
        o = self.decode(h)
        return o

    def process(self, magnitude):
        # process the whole chunk of spectrogram at run time
        result = magnitude.copy()
        with torch.no_grad():
            nFrame = magnitude.shape[1]
            for i in range(nFrame):
                result[:, i] = magnitude[:, i] * self.forward(torch.from_numpy(magnitude[:, i].reshape(1, -1))).numpy()
        return result

In [6]:
def validate(model, dataloader):
    validationLoss = 0
    model.eval()
    with torch.no_grad():
        # Each time fetch a batch of samples from the dataloader
        for sample in dataloader:
            pass
    ######################################################################################
    # Implement here your validation loop. It should be similar to your train loop
    # without the backpropagation steps
    ######################################################################################

    model.train()
    return validationLoss

In [7]:
def saveFigure(result, target, mixture):
    plt.subplot(3, 1, 1)
    plt.pcolormesh(np.log(1e-4 + result), vmin=-300 / 20, vmax=10 / 20)
    plt.title('estimated')

    plt.subplot(3, 1, 2)
    plt.pcolormesh(np.log(1e-4 + target.cpu()[0, :, :].numpy()), vmin=-300 / 20, vmax=10 / 20)
    plt.title('vocal')
    plt.subplot(3, 1, 3)

    plt.pcolormesh(np.log(1e-4 + mixture.cpu()[0, :, :].numpy()), vmin=-300 / 20, vmax=10 / 20)
    plt.title('mixture')

    plt.savefig("result_feedforward.png")
    plt.gcf().clear()

## This is "\__main__" function. Next two cells we need only in Colab

In [8]:
class Arguments():
    def __init__(self):
        self.blockSize = 4096
        self.hopSize = 2048
    # how many audio files to process fetched at each time, modify it if OOM error
        self.batchSize = 8
    # set the learning rate, default value is 0.0001
        self.lr = 1e-4
    # Path to the dataset, modify it accordingly
        self.dataset = './DSD100'   # "/content/gdrive/My Drive/HW5/DSD100"  
    # set --load to 1, if you want to restore weights from a previous trained model
        self.load = 0
    # path of the checkpoint that you want to restore
        self.checkpoint = 'savedModel_feedForward_best.pt'
        self.seed = 555

In [9]:
args = Arguments()

In [10]:
# Random seeds, for reproducibility
torch.manual_seed(args.seed)
np.random.seed(args.seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

fs = 32000
blockSize = args.blockSize
hopSize = args.hopSize
PATH_DATASET = args.dataset
batchSize = args.batchSize
minValLoss = np.inf

In [11]:
# transformation pipeline for training data
transformTrain = transforms.Compose([
    # Randomly rescale the training data
    Data.Transforms.Rescale(0.8, 1.2),

    # Randomly shift the beginning of the training data, because we always do chunking for training in this case
    Data.Transforms.RandomShift(fs * 30),

    # transform the raw audio into spectrogram
    Data.Transforms.MakeMagnitudeSpectrum(blockSize=blockSize, hopSize=hopSize),

    # shuffle all frames of a song for training the single-frame model , 
    #NB!!!! remove this line for training a temporal sequence model
    Data.Transforms.ShuffleFrameOrder()
])

# transformation pipeline for training data. Here, we don't have to use any augmentation/regularization techqniques
transformVal = transforms.Compose([
    # transform the raw audio into spectrogram
    Data.Transforms.MakeMagnitudeSpectrum(blockSize=blockSize, hopSize=hopSize),
])

# initialize dataloaders for training and validation data, every sample loaded will go thourgh the preprocessing pipeline defined by the above transformations
# workers will restart after each epoch, which takes a lot of time. repetition = 8  repeats the dataset 8 times in order to reduce the waiting time
# so, in this case,  1 epoch is equal to 8 epochs. For validation data, there is not point in repeating the dataset.
datasetTrain = Data.DSD100Dataset(PATH_DATASET, split='Train', mono=True, transform=transformTrain, repetition=8)
datasetValid = Data.DSD100Dataset(PATH_DATASET, split='Valid', mono=True, transform=transformVal, repetition=1)

# initialize the data loader
# num_workers means how many workers are used to prefetch the data, reduce num_workers if OOM error
dataloaderTrain = torch.utils.data.DataLoader(datasetTrain, batch_size=batchSize, shuffle=True, num_workers=4,
                                              collate_fn=Data.collate_fn)
dataloaderValid = torch.utils.data.DataLoader(datasetValid, batch_size=10, shuffle=False, num_workers=0,
                                              collate_fn=Data.collate_fn)

  cpuset_checked))


In [12]:
# initialize the Model
model = ModelSingleStep(blockSize)

In [13]:
# if you want to restore your previous saved model, set --load argument to 1
if args.load == 1:
    checkpoint = torch.load(args.checkpoint)
    minValLoss = checkpoint['minValLoss']
    model.load_state_dict(checkpoint['state_dict'])

In [14]:
# determine if cuda is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ModelSingleStep(
  (encoder): Sequential(
    (0): Linear(in_features=2049, out_features=1000, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=1000, out_features=400, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
  )
  (decoder): Sequential(
    (0): Linear(in_features=400, out_features=1000, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=1000, out_features=2049, bias=True)
    (3): Sigmoid()
  )
)

## Next cell will not work unless you define the Model

In [15]:
# initialize the optimizer for paramters
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

In [None]:
model.train(mode=True)

lossMovingAveraged = -1

####################################
# The main loop of training
####################################
for epoc in range(2):
    iterator = iter(dataloaderTrain)
    with trange(len(dataloaderTrain)) as t:
        for idx in t:
            # Each time fetch a batch of samples from the dataloader
            sample = next(iterator)
            # the progress of training in the current epoch

            # Remember to clear the accumulated gradient each time you perfrom optimizer.step()
            model.zero_grad()

            # read the input and the fitting target into the device
            mixture = sample['mixture'].to(device)
            target = sample['vocal'].to(device)

            seqLen = mixture.shape[2]
            winLen = mixture.shape[1]
            currentBatchSize = mixture.shape[0]

            # store the result for the first one for debugging purpose
            result = torch.zeros((winLen, seqLen), dtype=torch.float32)

            #################################
            # Fill the rest of the code here#
            #################################

            # store your smoothed loss here
            lossMovingAveraged = 0
            # this is used to set a description in the tqdm progress bar
            t.set_description(f"epoc : {epoc}, loss {lossMovingAveraged}")
            # save the model

        # plot the first one in the batch for debuging purpose
        saveFigure(result, target, mixture)
        # Most likely, your 'result' will be a batch, so, you can use here 
        # the first element of the batch instead of 'result':
        # result.cpu().detach().numpy()[0, :, :]

    # create a checkpoint of the current state of training
    checkpoint = {
        'state_dict': model.state_dict(),
        'minValLoss': minValLoss,
    }
    # save the last checkpoint
    torch.save(checkpoint, 'savedModel_feedForward_last.pt')

    #### Calculate validation loss
    valLoss = validate(model, dataloaderValid)
    print(f"validation Loss = {valLoss:.4f}")

    if valLoss < minValLoss:
        minValLoss = valLoss
        # then save checkpoint
        checkpoint = {
            'state_dict': model.state_dict(),
            'minValLoss': minValLoss,
        }
        torch.save(checkpoint, 'savedModel_feedForward_best.pt')



  cpuset_checked))
  0%|          | 0/30 [00:04<?, ?it/s]
