## Setup
### Project setup

In [1254]:
if run_init:
    %pip install -U pip
    !if  [ ! -d "deep-learning-project" ] ; then git clone https://github.com/albertsgarde/deep-learning-project.git; fi
    !cd deep-learning-project && git reset --hard && git pull
    !source deep-learning-project/setup.sh deep-learning-project
run_init = False

In [1255]:
run_init = True

In [1256]:
run_init = False

### Imports

In [1257]:
import numpy as np
import itertools
import torch
import torch.nn as nn
import torch.nn.functional as nn_func
import torch.optim as optim
from torch.autograd import Variable

import audio_samples_py as aus

### Device setup

In [1258]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("Running GPU.") if use_cuda else print("No GPU available.")

Running GPU.


## Data
### Parameters

In [1259]:
SAMPLE_LENGTH = 256

### Generation

In [1260]:
import warnings

class AudioDataSet(torch.utils.data.Dataset):
    def __init__(self, parameters: aus.DataParameters):
         self.parameters = parameters

    def __len__(self):
        return np.iinfo(np.int64).max
    
    def __getitem__(self, index):
        data_point = self.parameters.generate_at_index(index)
        return data_point.get_samples(), torch.tensor([data_point.get_frequency_map()]).unsqueeze(0)



In [1261]:

data_loader_params = {"batch_size": 64}

parameters = aus.DataParameters(num_samples=SAMPLE_LENGTH).add_sine((0.5,0.75))
training_parameters = parameters.with_seed_offset(0)
training_generator = aus.DataGenerator(training_parameters)
training_loader = torch.utils.data.DataLoader(AudioDataSet(training_parameters), **data_loader_params)
validation_parameters = parameters.with_seed_offset(1)
validation_generator = aus.DataGenerator(validation_parameters)
validation_loader = torch.utils.data.DataLoader(AudioDataSet(validation_parameters), **data_loader_params)




## Neural Network

### Network Parameters

In [1262]:
conv_depth = 2
channels = [8, 8, 8, 8]
kernel_sizes = [5, 5, 5, 5]
strides = [1 for _ in range(conv_depth)]
paddings = [int((kernel_size - 1)/2) for kernel_size in kernel_sizes]
poolings = [2,2,2,2]
conv_batch_norms = [False for _ in range(conv_depth)]
conv_dropouts = [0.0 for _ in range(conv_depth)]


lin_depth = 2
features = [256, 128]
lin_batch_norms = [False for _ in range(lin_depth)]
lin_dropouts = [0.0 for _ in range(lin_depth)]

### Parameter validation and processing

In [1263]:
assert len(channels) >= conv_depth
assert len(kernel_sizes) >= conv_depth
assert len(strides) >= conv_depth
assert len(paddings) >= conv_depth
assert len(poolings) >= conv_depth
assert len(conv_batch_norms) >= conv_depth
assert len(conv_dropouts) >= conv_depth
for kernel_size in kernel_sizes:
    assert kernel_size % 2 == 1, "Only odd kernel sizes are supported."
for dropout in conv_dropouts:
    assert 0 <= dropout and dropout <= 1, "Dropout must be between 0 and 1."

conv_size = []
input_size = SAMPLE_LENGTH
for i in range(conv_depth):
    conv_dim_reduction = kernel_sizes[i]-1-paddings[i]*2
    assert (input_size - conv_dim_reduction) % (strides[i]*poolings[i]) == 0
    conv_size.append(int((input_size - conv_dim_reduction)/strides[i]/poolings[i]))
    input_size = conv_size[i]
    print("Conv layer {} has output size {} and {} channels.".format(i, conv_size[i], channels[i]))

conv_output_size = conv_size[-1]*channels[-1]


assert len(features) >= lin_depth
assert len(lin_batch_norms) >= lin_depth
assert len(lin_dropouts) >= lin_depth
for dropout in lin_dropouts:
    assert 0 <= dropout and dropout <= 1, "Dropout must be between 0 and 1."
for i in range(lin_depth):
    print("Lin layer {} has output size {}.".format(i, features[i]))

Conv layer 0 has output size 128 and 8 channels.
Conv layer 1 has output size 64 and 8 channels.
Lin layer 0 has output size 256.
Lin layer 1 has output size 128.


### Network

In [1264]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.convs = nn.ModuleList()
        in_channels = 1
        for i in range(conv_depth):
            conv = nn.Conv1d(in_channels=in_channels, out_channels=channels[i], kernel_size=kernel_sizes[i], stride=strides[i], padding=paddings[i])
            in_channels = channels[i]
            pool = nn.MaxPool1d(poolings[i])
            batchnorm = nn.BatchNorm1d(channels[i]) if (conv_batch_norms[i]) else nn.Identity()
            dropout = nn.Dropout(p=conv_dropouts[i])

            self.convs.append(nn.ModuleList([conv, pool, batchnorm, dropout]))
        
        self.lins = nn.ModuleList()
        in_features = conv_output_size
        for i in range(lin_depth):
            lin = nn.Linear(in_features=in_features, out_features=features[i])
            in_features = features[i]
            batchnorm = nn.BatchNorm1d(features[i]) if (lin_batch_norms[i]) else nn.Identity()
            dropout = nn.Dropout(p=lin_dropouts[i])

            self.lins.append(nn.ModuleList([lin, batchnorm, dropout]))

        self.lin_out = nn.Linear(in_features=in_features, out_features=1)
        
        
    def forward(self, x):
        x = x.unsqueeze(1)

        for conv, pool, batchnorm, dropout in self.convs:
            x = conv(x)
            x = pool(x)
            x = nn_func.relu(x)
            x = batchnorm(x)
            x = dropout(x)

        x = x.flatten(1)

        for lin, batchnorm, dropout in self.lins:
            x = lin(x)
            x = nn_func.relu(x)
            x = batchnorm(x)
            x = dropout(x)
            
        return self.lin_out(x).unsqueeze(1)

net = Net()
if use_cuda:
    net.cuda()
print(net)

Net(
  (convs): ModuleList(
    (0): ModuleList(
      (0): Conv1d(1, 8, kernel_size=(5,), stride=(1,), padding=(2,))
      (1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (2): Identity()
      (3): Dropout(p=0.0, inplace=False)
    )
    (1): ModuleList(
      (0): Conv1d(8, 8, kernel_size=(5,), stride=(1,), padding=(2,))
      (1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (2): Identity()
      (3): Dropout(p=0.0, inplace=False)
    )
  )
  (lins): ModuleList(
    (0): ModuleList(
      (0): Linear(in_features=512, out_features=256, bias=True)
      (1): Identity()
      (2): Dropout(p=0.0, inplace=False)
    )
    (1): ModuleList(
      (0): Linear(in_features=256, out_features=128, bias=True)
      (1): Identity()
      (2): Dropout(p=0.0, inplace=False)
    )
  )
  (lin_out): Linear(in_features=128, out_features=1, bias=True)
)


## Training

In [1265]:
LEARNING_RATE = 1e-2
WEIGHT_DECAY = 1e-5

In [1266]:
criterion = nn.MSELoss()  

# weight_decay is equal to L2 regularization
optimizer = optim.AdamW(net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) 


In [1267]:
def to_torch(x):
    variable = Variable(torch.from_numpy(x))
    if use_cuda:
        variable = variable.cuda()
    return variable

def test_net(net, validation_loader, criterion):
    was_training = net.training
    net.eval()
    num_iterations = 100
    total_loss = 0
    for data_point, frequency in itertools.islice(validation_loader, num_iterations):
        data_point = data_point.to(device)
        frequency = frequency.to(device)
        output = net(data_point)
        total_loss += criterion(output, frequency)
    net.train(mode=was_training)
    return total_loss/num_iterations

def manual_test(net, validation_generator):
    was_training = net.training
    net.eval()
    num_iterations = 5
    for data_point in validation_generator.next_n(num_iterations):
        samples = to_torch(data_point.get_samples()).unsqueeze(0)
        freq_map = to_torch(np.array([data_point.get_frequency_map()])).unsqueeze(0)
        output = net(samples)
        print("Frequency: ", parameters.map_to_frequency(freq_map.item()), " Output: ", parameters.map_to_frequency(output.item()))
    net.train(mode=was_training)



In [1268]:
NUM_BATCHES = 2000
EVAL_EVERY = 200


manual_test(net, validation_generator)

net.train()

for i, (data_point, frequency) in enumerate(itertools.islice(training_loader, NUM_BATCHES)):
    if i%EVAL_EVERY == 0:
        print(f"Loss at iteration {i}: {test_net(net, validation_loader, criterion)}")
    data_point = data_point.to(device)
    frequency = frequency.to(device)
    output = net(data_point)
    loss = criterion(output, frequency)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f"Loss at iteration {NUM_BATCHES}: {test_net(net, validation_loader, criterion)}")
manual_test(net, validation_generator)
    

Frequency:  21.806589126586914  Output:  542.9698486328125
Frequency:  7476.90869140625  Output:  550.0506591796875
Frequency:  50.53485107421875  Output:  550.6035766601562
Frequency:  10745.4326171875  Output:  552.8925170898438
Frequency:  18996.27734375  Output:  560.2656860351562
Loss at iteration 0: 0.33942902088165283
Loss at iteration 200: 0.0011569223133847117
Loss at iteration 400: 0.0005094261723570526
Loss at iteration 600: 0.0002821552916429937
Loss at iteration 800: 0.0002784880925901234
Loss at iteration 1000: 0.0004958484205417335
Loss at iteration 1200: 0.0001786417851690203
Loss at iteration 1400: 0.0001682821603026241
Loss at iteration 1600: 0.0005671387771144509
Loss at iteration 1800: 0.00015976777649484575
Loss at iteration 2000: 0.00011729608377208933
Frequency:  260.94561767578125  Output:  263.640869140625
Frequency:  15659.8359375  Output:  15760.86328125
Frequency:  5092.42333984375  Output:  5210.7314453125
Frequency:  193.81607055664062  Output:  192.347946