## Setup
### Project setup

In [886]:
if run_init:
    %pip install -U pip
    !if  [ ! -d "deep-learning-project" ] ; then git clone https://github.com/albertsgarde/deep-learning-project.git; fi
    !cd deep-learning-project && git reset --hard && git pull
    !source deep-learning-project/setup.sh deep-learning-project
run_init = False

In [887]:
run_init = True

In [888]:
run_init = False

### Imports

In [889]:
import numpy as np
import itertools
import torch
import torch.nn as nn
import torch.nn.functional as nn_func
import torch.optim as optim
from torch.autograd import Variable

import audio_samples_py as aus

### Device setup

In [890]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("Running GPU.") if use_cuda else print("No GPU available.")

Running GPU.


## Data
### Parameters

In [891]:
SAMPLE_LENGTH = 256

### Generation

In [892]:
import warnings

class AudioDataSet(torch.utils.data.Dataset):
    def __init__(self, parameters: aus.DataParameters):
         self.parameters = parameters

    def __len__(self):
        return np.iinfo(np.int64).max
    
    def __getitem__(self, index):
        data_point = self.parameters.generate_at_index(index)
        return data_point.get_samples(), torch.tensor([data_point.get_frequency_map()]).unsqueeze(0)



In [893]:

data_loader_params = {"batch_size": 16}

parameters = aus.DataParameters(num_samples=SAMPLE_LENGTH).add_sine((0.5,0.75))
training_parameters = parameters.with_seed_offset(0)
training_generator = aus.DataGenerator(training_parameters)
training_loader = torch.utils.data.DataLoader(AudioDataSet(training_parameters), **data_loader_params)
validation_parameters = parameters.with_seed_offset(1)
validation_generator = aus.DataGenerator(validation_parameters)
validation_loader = torch.utils.data.DataLoader(AudioDataSet(validation_parameters), **data_loader_params)




## Neural Network

In [894]:
conv_depth = 2
channels = [8, 8]
kernel_size = [5, 5]
stride = [1 for _ in range(conv_depth)]
padding = [int((kernel_size - 1)/2) for kernel_size in kernel_size]
pooling = [1 for _ in range(conv_depth)]



In [895]:
assert len(channels) == conv_depth
assert len(kernel_size) == conv_depth
assert len(stride) == conv_depth
assert len(padding) == conv_depth

conv_size = []
input_size = SAMPLE_LENGTH
for i in range(conv_depth):
    conv_dim_reduction = kernel_size[i]-1-padding[i]*2
    conv_size.append(int((input_size - conv_dim_reduction)/stride[i]/pooling[i]))
    input_size = conv_size[i]

conv_output_size = conv_size[-1]*channels[-1]

print(conv_output_size)

2048


In [896]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.convs = nn.ModuleList()
        in_channels = 1
        for i in range(conv_depth):
            conv = nn.Conv1d(in_channels=in_channels, out_channels=channels[i], kernel_size=kernel_size[i], stride=stride[i], padding=padding[i])
            in_channels = channels[i]
            self.convs.append(conv)
        
        self.lin_out = nn.Linear(in_features=conv_output_size, out_features=1)
        
        
    def forward(self, x):
        x = x.unsqueeze(1)

        for conv in self.convs:
            x = conv(x)
            x = nn_func.relu(x)

        x = x.flatten(1)
        result = self.lin_out(x).unsqueeze(1)
        return result

net = Net()
if use_cuda:
    net.cuda()
print(net)

Net(
  (convs): ModuleList(
    (0): Conv1d(1, 8, kernel_size=(5,), stride=(1,), padding=(2,))
    (1): Conv1d(8, 8, kernel_size=(5,), stride=(1,), padding=(2,))
  )
  (lin_out): Linear(in_features=2048, out_features=1, bias=True)
)


## Training

In [897]:
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5

In [898]:
criterion = nn.MSELoss()  

# weight_decay is equal to L2 regularization
optimizer = optim.AdamW(net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) 


In [899]:
def to_torch(x):
    variable = Variable(torch.from_numpy(x))
    if use_cuda:
        variable = variable.cuda()
    return variable

def test_net(net, validation_loader, criterion):
    was_training = net.training
    net.eval()
    num_iterations = 100
    total_loss = 0
    for data_point, frequency in itertools.islice(validation_loader, num_iterations):
        data_point = data_point.to(device)
        frequency = frequency.to(device)
        output = net(data_point)
        total_loss += criterion(output, frequency)
    net.train(mode=was_training)
    return total_loss/num_iterations

def manual_test(net, validation_generator):
    was_training = net.training
    net.eval()
    num_iterations = 5
    for data_point in validation_generator.next_n(num_iterations):
        samples = to_torch(data_point.get_samples()).unsqueeze(0)
        freq_map = to_torch(np.array([data_point.get_frequency_map()])).unsqueeze(0)
        output = net(samples)
        print("Frequency: ", parameters.map_to_frequency(freq_map.item()), " Output: ", parameters.map_to_frequency(output.item()))
    net.train(mode=was_training)



In [900]:
NUM_BATCHES = 2000
EVAL_EVERY = 200


manual_test(net, validation_generator)

net.train()

for i, (data_point, frequency) in enumerate(itertools.islice(training_loader, NUM_BATCHES)):
    if i%EVAL_EVERY == 0:
        print(f"Loss at iteration {i}: {test_net(net, validation_loader, criterion)}")
    data_point = data_point.to(device)
    frequency = frequency.to(device)
    output = net(data_point)
    loss = criterion(output, frequency)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f"Loss at iteration {NUM_BATCHES}: {test_net(net, validation_loader, criterion)}")
manual_test(net, validation_generator)
    

Frequency:  21.806589126586914  Output:  580.6998291015625
Frequency:  7476.90869140625  Output:  415.2980651855469
Frequency:  50.53485107421875  Output:  584.01318359375
Frequency:  10745.4326171875  Output:  593.9097290039062
Frequency:  18996.27734375  Output:  428.7369079589844
Loss at iteration 0: 0.34553542733192444
Loss at iteration 200: 0.005130981560796499
Loss at iteration 400: 0.0028716498054564
Loss at iteration 600: 0.0018242570804432034
Loss at iteration 800: 0.001974422251805663
Loss at iteration 1000: 0.0011648256331682205
Loss at iteration 1200: 0.0007377301226370037
Loss at iteration 1400: 0.0008496105438098311
Loss at iteration 1600: 0.0006910804659128189
Loss at iteration 1800: 0.0007181748514994979
Loss at iteration 2000: 0.0007483682129532099
Frequency:  260.94561767578125  Output:  274.1214294433594
Frequency:  15659.8359375  Output:  13606.0205078125
Frequency:  5092.42333984375  Output:  4870.1962890625
Frequency:  193.81607055664062  Output:  167.282455444335