# Setup

set working directory to guitarist-net and add to path

In [1]:
%cd /content/drive/Othercomputers/My MacBook Pro/guitarist-net

/content/drive/Othercomputers/My MacBook Pro/guitarist-net


install requirements

In [2]:
!pip install -r requirements.txt &> /dev/null

imports

In [3]:
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from plot_listen.listen import play
from plot_listen.plot import plot
import librosa.display
from globals import *
from ddsp.mlp import MLP
from ddsp.ddsp_decoder import DDSPDecoder

# Test MLP

In [4]:
mlp = MLP()

In [5]:
print(mlp)

MLP(
  (model): Sequential(
    (0): Linear(in_features=1, out_features=512, bias=True)
    (1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=512, bias=True)
    (4): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=512, bias=True)
    (7): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (8): ReLU()
  )
)


# Test Decoder

In [6]:
decoder = DDSPDecoder()

In [7]:
print(decoder)

DDSPDecoder(
  (mlp_f0): MLP(
    (model): Sequential(
      (0): Linear(in_features=1, out_features=512, bias=True)
      (1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (2): ReLU()
      (3): Linear(in_features=512, out_features=512, bias=True)
      (4): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (5): ReLU()
      (6): Linear(in_features=512, out_features=512, bias=True)
      (7): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (8): ReLU()
    )
  )
  (mlp_loudness): MLP(
    (model): Sequential(
      (0): Linear(in_features=1, out_features=512, bias=True)
      (1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (2): ReLU()
      (3): Linear(in_features=512, out_features=512, bias=True)
      (4): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (5): ReLU()
      (6): Linear(in_features=512, out_features=512, bias=True)
      (7): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (8): ReLU()
    )
  )

In [8]:
from torchsummary import summary

In [9]:
summary(decoder, [(1000,), (1000,)])

in decoder forward
before gru
after gru
after final mlp
after dense layers
after softmax
after store in dict
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1            [-1, 1000, 512]           1,024
         LayerNorm-2            [-1, 1000, 512]           1,024
              ReLU-3            [-1, 1000, 512]               0
            Linear-4            [-1, 1000, 512]         262,656
         LayerNorm-5            [-1, 1000, 512]           1,024
              ReLU-6            [-1, 1000, 512]               0
            Linear-7            [-1, 1000, 512]         262,656
         LayerNorm-8            [-1, 1000, 512]           1,024
              ReLU-9            [-1, 1000, 512]               0
              MLP-10            [-1, 1000, 512]               0
           Linear-11            [-1, 1000, 512]           1,024
        LayerNorm-12            [-1, 1000, 512]           

try sending dummy input through

In [10]:
dummy_f0 = torch.zeros((32, 1000))
dummy_loudness = torch.zeros((32, 1000))

In [11]:
p = decoder(dummy_f0, dummy_loudness)


in decoder forward
before gru
after gru
after final mlp
after dense layers
after softmax
after store in dict


In [12]:
print(p['f0'].shape)
print(p['overall_amplitude'].shape)
print(p['harm_distr'].shape)

torch.Size([32, 1000])
torch.Size([32, 1000])
torch.Size([32, 1000, 128])


# Feed into noise + harmonic synths

In [13]:
from ddsp.filtered_noise import FilteredNoise
from ddsp.harmonic_oscillator import HarmonicOscillator

In [14]:
noise_synth = FilteredNoise()
harm_synth = HarmonicOscillator()

In [15]:
noise_audio = noise_synth(p)

In [16]:
play(noise_audio.detach().numpy()[0])

In [None]:
harmonic_audio = harm_synth(p)