<a href="https://colab.research.google.com/github/magenta/ddsp/blob/main/ddsp/colab/tutorials/2_processor_group.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


##### Copyright 2021 Google LLC.

Licensed under the Apache License, Version 2.0 (the "License");





In [None]:
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# DDSP ProcessorGroup

This notebook demonstrates the use of a `ProcessorGroup()` as an alternative to stringing signal `Processors()` together in python. 

The main advantage of using a ProcessorGroup is that the entire signal processing chain can be specified in a `.gin` file, as a Directed Acyclic Graph (DAG), removing the need to write code in python for every configuration of processors.


In this tutorial we're going to synthesize some audio from these example controls in three different ways.

* With processors and python control flow
* With a ProcessorGroup DAG (via list)
* With a ProcessorGroup DAG (via gin)



In [None]:
#@title Install DDSP

#@markdown Install ddsp in a conda environment with Python 3.9 for compatibility.

!rm -rf /content/miniconda
!curl -L https://repo.anaconda.com/miniconda/Miniconda3-py39_23.11.0-2-Linux-x86_64.sh -o miniconda.sh
!chmod +x miniconda.sh
!sh miniconda.sh -b -p /content/miniconda
!/content/miniconda/bin/pip install tensorflow==2.11 tensorflow-probability==0.19.0 tensorflow-datasets==4.9.0 ddsp==3.7.0
print('\nDone installing DDSP in conda environment!')

In [None]:
#@title Import display helpers

import warnings
warnings.filterwarnings("ignore")

import base64
import io

import numpy as np
import matplotlib.pyplot as plt
from IPython import display
from scipy.io import wavfile
from scipy import signal as scipy_signal

sample_rate = 16000


def play(array_of_floats, sample_rate=sample_rate):
  """Play audio in colab using HTML5 audio widget."""
  if isinstance(array_of_floats, list):
    array_of_floats = np.array(array_of_floats)
  if len(array_of_floats.shape) == 2:
    array_of_floats = array_of_floats[0]
  normalizer = float(np.iinfo(np.int16).max)
  array_of_ints = np.array(
      np.asarray(array_of_floats) * normalizer, dtype=np.int16)
  memfile = io.BytesIO()
  wavfile.write(memfile, sample_rate, array_of_ints)
  html = """<audio controls>
              <source controls src="data:audio/wav;base64,{base64_wavfile}"
              type="audio/wav" />
              Your browser does not support the audio element.
            </audio>"""
  html = html.format(
      base64_wavfile=base64.b64encode(memfile.getvalue()).decode('ascii'))
  memfile.close()
  display.display(display.HTML(html))


def specplot(audio, vmin=-5, vmax=1, rotate=True, size=512 + 256):
  """Plot the log magnitude spectrogram of audio."""
  if isinstance(audio, list):
    audio = np.array(audio)
  if len(audio.shape) == 2:
    audio = audio[0]
  f, t, Sxx = scipy_signal.stft(audio, fs=sample_rate, nperseg=size,
                                 noverlap=size * 3 // 4)
  logmag = np.log10(np.abs(Sxx) + 1e-7)
  if rotate:
    logmag = np.flipud(logmag)
  plt.matshow(logmag, vmin=vmin, vmax=vmax, cmap=plt.cm.magma, aspect='auto')
  plt.xticks([])
  plt.yticks([])
  plt.xlabel('Time')
  plt.ylabel('Frequency')


print('Helpers imported!')

# Example processor inputs

Some signals to be used in the rest of the notebook.

In [None]:
# Constants
n_frames = 1000
hop_size = 64
n_samples = n_frames * hop_size


#### Harmonic controls
# Amplitude [batch, n_frames, 1].
amps = np.linspace(0.5, -5.0, n_frames)[np.newaxis, :, np.newaxis]

# Harmonic Distribution [batch, n_frames, n_harmonics].
n_harmonics = 20
harmonic_distribution = np.ones([n_frames, 1]) * np.linspace(1.0, -1.0, n_harmonics)[np.newaxis, :]
for i in range(n_harmonics):
  harmonic_distribution[:, i] = 1.0 - np.linspace(i * 0.09, 2.0, 1000)
  if i % 2 != 0:
    harmonic_distribution[:, i] = -3
harmonic_distribution = harmonic_distribution[np.newaxis, :, :]

# Fundamental frequency in Hz [batch, n_frames, 1].
f0_hz = np.linspace(300.0, 200.0, n_frames)[np.newaxis, :, np.newaxis]


### Filtered Noise controls
# Magnitudes [batch, n_frames, n_magnitudes].
n_filter_banks = 20
magnitudes = np.linspace(-1.0, -4.0, n_filter_banks)[np.newaxis, np.newaxis, :]
magnitudes = magnitudes + amps 


### Reverb controls
n_fade_in = 16 * 10
ir_size = int(sample_rate * 2)
n_fade_out = ir_size - n_fade_in

ir = 0.01 * np.random.randn(ir_size)
ir[:n_fade_in] *= np.linspace(0.0, 1.0, n_fade_in)
ir[n_fade_in:] *= np.exp(np.linspace(0.0, -5.0, n_fade_out))
ir = ir[np.newaxis, :]


inputs = {
    'amps': amps,
    'harmonic_distribution': harmonic_distribution,
    'f0_hz': f0_hz,
    'magnitudes': magnitudes,
    'ir': ir,
}
inputs = {k: v.astype(np.float32) for k, v in inputs.items()}

# Save inputs for conda scripts
import os
os.makedirs('/content/pg_inputs', exist_ok=True)
for k, v in inputs.items():
  np.save(f'/content/pg_inputs/{k}.npy', v)
print('Inputs saved.')

In [None]:
# Plot the inputs
time = np.linspace(0, n_samples / sample_rate, n_frames)

plt.figure(figsize=(18, 8))
plt.subplot(231)
plt.plot(time, amps[0, :, 0])
plt.xticks([0, 1, 2, 3, 4])
plt.title('Amp Input')

plt.subplot(232)
plt.plot(time, harmonic_distribution[0])
plt.xticks([0, 1, 2, 3, 4])
plt.title('Harmonic Input')

plt.subplot(233)
plt.plot(time, f0_hz[0, :, 0])
plt.xticks([0, 1, 2, 3, 4])
plt.title('Fundamental Frequency')

plt.subplot(234)
plt.plot(ir[0])
plt.title('Impulse Response')

plt.subplot(235)
plt.plot(time, magnitudes[0])
plt.xticks([0, 1, 2, 3, 4])
_ = plt.title('Noise Magnitudes')


# Processors

You can generate signal by stringing Processors together in python, as you would with any other differentiable modules.

In [None]:
#@title Run Python processor chain

SCRIPT = r'''
import os
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import ddsp

output_dir = '/content/pg_outputs'
os.makedirs(output_dir, exist_ok=True)

n_frames = 1000
hop_size = 64
n_samples = n_frames * hop_size

# Load inputs
inputs = {}
for k in ['amps', 'harmonic_distribution', 'f0_hz', 'magnitudes', 'ir']:
    inputs[k] = np.load(f'/content/pg_inputs/{k}.npy')

# Python processor chain
harmonic = ddsp.synths.Harmonic(n_samples=n_samples)
noise = ddsp.synths.FilteredNoise(n_samples=n_samples, initial_bias=0)
reverb = ddsp.effects.Reverb()

audio_harmonic = harmonic(inputs['amps'],
                          inputs['harmonic_distribution'],
                          inputs['f0_hz'])
audio_noise = noise(inputs['magnitudes'])
audio_dry = audio_harmonic + audio_noise
audio_out = reverb(inputs['ir'], audio_dry)

audio_np = audio_out.numpy() if hasattr(audio_out, 'numpy') else np.array(audio_out)
np.save(os.path.join(output_dir, 'audio_python_chain.npy'), audio_np)
print('Python chain audio saved.')  
'''

with open('/content/pg_python_chain.py', 'w') as f:
  f.write(SCRIPT)

!unset PYTHONPATH PYTHONHOME && /content/miniconda/bin/python /content/pg_python_chain.py

In [None]:
audio_python = np.load('/content/pg_outputs/audio_python_chain.npy')
play(audio_python)
specplot(audio_python)

# ProcessorGroup

A ProcessorGroup is a Directed Acyclic Graph (DAG) of Processors.

You can specify the DAG as a list of tuples `dag = [(processor, ['input1', 'input2', ...]), ...]`, where each tuple is a pair of processor and that processor's inputs respectively.

The output signal of any processor can be referenced as an input to a different processor by the string `'processor_name/signal'` where processor_name is the name of the processor at construction.

The ProcessorGroup takes a dictionary of inputs, whose keys are referenced as inputs in the DAG.


In [None]:
print('Input keys:', list(inputs.keys()))

In [None]:
#@title Run ProcessorGroup DAG

#@markdown This runs the ProcessorGroup with the DAG definition and saves
#@markdown all intermediate signals and controls for inspection.

SCRIPT = r'''
import os
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import ddsp

output_dir = '/content/pg_outputs'
os.makedirs(output_dir, exist_ok=True)

n_frames = 1000
hop_size = 64
n_samples = n_frames * hop_size

# Load inputs
inputs = {}
for k in ['amps', 'harmonic_distribution', 'f0_hz', 'magnitudes', 'ir']:
    inputs[k] = np.load(f'/content/pg_inputs/{k}.npy')

# Build DAG
harmonic = ddsp.synths.Harmonic(n_samples=n_samples, name='harmonic')
noise = ddsp.synths.FilteredNoise(n_samples=n_samples, name='noise', initial_bias=0.0)
reverb = ddsp.effects.Reverb(name='reverb')
add = ddsp.processors.Add(name='add')

dag = [
  (harmonic, ['amps', 'harmonic_distribution', 'f0_hz']),
  (noise, ['magnitudes']),
  (add, ['harmonic/signal', 'noise/signal']),
  (reverb, ['ir', 'add/signal'])
]

processor_group = ddsp.processors.ProcessorGroup(dag=dag)
audio_out = processor_group(inputs)

audio_np = audio_out.numpy() if hasattr(audio_out, 'numpy') else np.array(audio_out)
np.save(os.path.join(output_dir, 'audio_dag.npy'), audio_np)

# Get intermediate signals
outputs = processor_group.get_controls(inputs)

# Save the output signal from get_controls
out_signal = outputs['out']['signal']
out_np = out_signal.numpy() if hasattr(out_signal, 'numpy') else np.array(out_signal)
np.save(os.path.join(output_dir, 'dag_out_signal.npy'), out_np)

# Save noise signal
noise_signal = outputs['noise']['signal']
noise_np = noise_signal.numpy() if hasattr(noise_signal, 'numpy') else np.array(noise_signal)
np.save(os.path.join(output_dir, 'dag_noise_signal.npy'), noise_np)

# Save harmonic distribution controls
hd = outputs['harmonic']['controls']['harmonic_distribution']
hd_np = hd.numpy() if hasattr(hd, 'numpy') else np.array(hd)
np.save(os.path.join(output_dir, 'dag_harmonic_distribution.npy'), hd_np)

# Print outputs summary
for k, v in outputs.items():
    print(f"'{k}':", type(v).__name__)

print('\nDAG audio and intermediate signals saved.')
'''

with open('/content/pg_dag.py', 'w') as f:
  f.write(SCRIPT)

!unset PYTHONPATH PYTHONHOME && /content/miniconda/bin/python /content/pg_dag.py

In [None]:
# ProcessorGroup audio
audio_dag = np.load('/content/pg_outputs/audio_dag.npy')
play(audio_dag)
specplot(audio_dag)

In [None]:
# The out signal from get_controls
out_signal = np.load('/content/pg_outputs/dag_out_signal.npy')
print('out/signal shape:', out_signal.shape)

The processor group also offers all the intermediate signals and control tensors for inspection.

In [None]:
noise_audio = np.load('/content/pg_outputs/dag_noise_signal.npy')
play(noise_audio)
specplot(noise_audio)

In [None]:
harmonic_distribution_np = np.load('/content/pg_outputs/dag_harmonic_distribution.npy')
_ = plt.matshow(np.rot90(harmonic_distribution_np[0, :, :]), aspect='auto')

# Configuration with Gin

The main advantage of a ProcessorGroup is that it can be defined with Gin, allowing flexible configurations without having to write new python code for every DAG of processors.

In the example below we pretend we have an external file written, which we treat here as a string.

In [None]:
#@title Run ProcessorGroup with Gin config

SCRIPT = r'''
import os
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import ddsp
import gin

output_dir = '/content/pg_outputs'
os.makedirs(output_dir, exist_ok=True)

n_frames = 1000
hop_size = 64
n_samples = n_frames * hop_size

# Load inputs
inputs = {}
for k in ['amps', 'harmonic_distribution', 'f0_hz', 'magnitudes', 'ir']:
    inputs[k] = np.load(f'/content/pg_inputs/{k}.npy')

# Gin config string
gin_file_string = """
import ddsp

processors.ProcessorGroup.dag = [
  (@Harmonic(), ['amps', 'harmonic_distribution', 'f0_hz']),
  (@FilteredNoise(), ['magnitudes']),
  (@Add(), ['noise/signal', 'harmonic/signal']),
  (@Reverb(), ['ir', 'add/signal'])
]

Harmonic.name = 'harmonic'
FilteredNoise.name = 'noise'
processors.Add.name = 'add'

Harmonic.n_samples = 64000
FilteredNoise.n_samples = 64000
FilteredNoise.initial_bias = 0.0
"""

with gin.unlock_config():
    gin.parse_config(gin_file_string)

processor_group = ddsp.processors.ProcessorGroup()
audio_out = processor_group(inputs)

audio_np = audio_out.numpy() if hasattr(audio_out, 'numpy') else np.array(audio_out)
np.save(os.path.join(output_dir, 'audio_gin.npy'), audio_np)
print('Gin config audio saved.')
'''

with open('/content/pg_gin.py', 'w') as f:
  f.write(SCRIPT)

!unset PYTHONPATH PYTHONHOME && /content/miniconda/bin/python /content/pg_gin.py

Now, after parsing the gin file, the ProcessorGroup will be configured on construction.

In [None]:
audio_gin = np.load('/content/pg_outputs/audio_gin.npy')
play(audio_gin)
specplot(audio_gin)