In [None]:
import sys
import glob
import numpy as np
import soundfile as sf
import pymixconsole as pymc
import IPython.display as ipd

# pymixconsole 

pymixconsole is a lightweight Python library for implementing and controlling a headless multitrack mixing console. It aims to provide a simple interface to be controlled programatically allowing for more control compared to scripting in pre-existing DAWs, and is easily extensible to allow for advanced behaviour.

## Example audio

Before we get started let's first load a simple mono recording of an electric guitar that we will use for testing.

In [None]:
# Let's load an example audio file to use in our demonstrations
x, fs = sf.read('../signals/e_gtr_short.wav')
print(f"Loaded {x.shape[0]} samples at fs={fs}")

## Processor

The lowest level object that makes up a mixing console is the `Processor`. A `Processor` object generally has a set of `Parameter` objects that define how the device can be configured and also contains a `process()` method that takes in a block of mono or stereo audio and produces and output. One of the simplest processors is the `Gain` processor, which can be used as shown below. First we create an instance of the processor and then we adjust its gain value to be -12.0 dB.

In [None]:
gain = pymc.processors.Gain(block_size=512, sample_rate=44100)
gain.parameters.gain.value = -12.0

Next we want to use this processor to process our audio. To do this we will iterate over our audio file, each time passing a new chunk of 512 samples to the gain processor, storing the output into a new array with the same size as the input audio stream.

In [None]:
y = np.empty(shape=(x.shape[0],))

for n in range(x.shape[0]//512):
    start = n * 512
    stop = start + 512
    y[start:stop] = gain.process(x[start:stop])

In [None]:
ipd.display(ipd.Audio(data=x, rate=fs, normalize=False))
ipd.display(ipd.Audio(data=y, rate=fs, normalize=False))

Gain isn't too interesting so let's try something a bit cooler like Reverb.

In [None]:
reverb = pymc.processors.Reverb(block_size=512, sample_rate=44100)
reverb.parameters.dry_mix.value = 0.3
reverb.parameters.wet_mix.value = 0.03
reverb.parameters.room_size.value = 0.7

# the output of the reverb is always stereo
y = np.empty(shape=(x.shape[0],2))

for n in range(x.shape[0]//512):
    start = n * 512
    stop = start + 512
    y[start:stop] = reverb.process(x[start:stop])
      
ipd.display(ipd.Audio(data=x, rate=fs, normalize=True))
ipd.display(ipd.Audio(data=y.T, rate=fs, normalize=True))

## Parameter

I may have lied slightly, the `Parameter` class is slightly lower than the `Processor` class and every processor contains a list of `Parameters`. These parameters help to define the operation of the processor and provide an interface for the user to change its behaviour. 

To see all the parameters for a processor just call the print function on a processor's `ParameterList` object.

In [None]:
print(reverb.parameters)

You'll notice that every parameter has a name (string) as well as a current `value`. It also has a kind designation which is basically the parameter's type. It also has a default value so that it can be reset and a range that defines a min and max values. In the case of the string kind there is a list of possible options instead. We saw in the above examples how to modify the `value` of any parameter, but the most useful behaviour here is the `randmoize()` method.

We can actually call the `randomize()` method for the reverb processor and it will go through and indivually call the `randomize()` method for each of its parameters. We see below that the values have been randomized. What we don't see is that something special is going on behind the scenes with regards to how each parameter gets randomized. To keep it short, when a `Parameter` is defined the creator can specify whether they want to sample new values from a uniform distribution or from a Gaussian (with defined mean and stddev). We won't get into this more now.

In [None]:
reverb.randomize()
print(reverb.parameters)

Also see what happens if we try to change a parameter value to something beyond its valid range.

In [None]:
#reverb.parameters.width.value = 1.1

## Channel

The next level is the `Channel`. A Channel contains a number of processors, and when a block of data is passed to a channel it will apply all of its processors to that data in series. There are actually three kinds of processors in a channel: pre-processors, core-processors, and post-procesors. The reason for the distinctions in that during the `randomize()` process for a channel this ensure that only the core-processors can have their order shuffled. The pre and post-processors will always retain the order they are initiatlized to. This is needed for example for things like the `Panner`, which we want to ALWAYS be the last element in the signal chain for the channel. 

In the following example we will instantiate a new channel and then use it process our audio signal. 

By default the structure of a channel will be the following:

**pre-processors**:  pre-gain -> polarity-inverter

**core-processors**: equaliser -> compressor

**post-processors**: post-gain -> panner

In [None]:
channel = pymc.channel.Channel(block_size=512, sample_rate=44100)

We will apply our processing on the guitar signal from before in the exact same way, except this time we will call the `process()` method of the channel. Before we apply the processing with the default settings, let's change the equaliser and compressor (by default they will do no processing).

In [None]:
# first lets grab a reference for the equaliser and compressor using their default names
eq   = channel.processors.get("eq")
comp = channel.processors.get("compressor")

# now we can adjust the equaliser to filter out the low end
eq.parameters.low_shelf_freq.value = 420.0 # Hz
eq.parameters.low_shelf_gain.value = -24.0 # dB

# and dial in the compressor threshold and ratio
comp.parameters.threshold.value = -32.0 # dB
comp.parameters.ratio.value     =  10.0

In [None]:
# the output of the channel is always stereo
y = np.empty(shape=(x.shape[0],2))

for n in range(x.shape[0]//512):
    start = n * 512
    stop = start + 512
    y[start:stop] = channel.process(x[start:stop])
      
ipd.display(ipd.Audio(data=x, rate=fs, normalize=True))
ipd.display(ipd.Audio(data=y.T, rate=fs, normalize=True))

If you want to add another processor the a channel it's very easy. 

For example, let's try adding our reverb processor from earlier to this channel, right after the compressor.

In [None]:
channel.processors.add(reverb)

In [None]:
# the output of the channel is always stereo
y = np.empty(shape=(x.shape[0],2))

for n in range(x.shape[0]//512):
    start = n * 512
    stop = start + 512
    y[start:stop] = channel.process(x[start:stop])
      
ipd.display(ipd.Audio(data=x, rate=fs, normalize=True))
ipd.display(ipd.Audio(data=y.T, rate=fs, normalize=True))

## Console

Finally, we have made it to the top most level. Here we take all of the components from before and combine them to build an entire mixing console, complete, with channels, busses, processors, and all. 

When we create a console we will need to supply a `block_size` and `sample_rate` just like with all of the previous objects. But, in addition we will need to ask for a certain number of channels. This will create a new channel for each one with the default settings. This will also create two busses (one for reverb, and one for delay), as well as a special bus, the master, which has a equaliser and compressor. Before we get too far into the details let's create a console and try to process some audio.

Now this part a bit trickier since we need to create an array of mono input signals with the shape `[samples, tracks]`. Eventually this will be wrapped up into the `Multitrack` class which is still a work in progress.

In [None]:
mfiles = glob.glob("../signals/multitrack/*.wav")

block_size = 16384

# counters
ch_idx     = 0
n_samples  = 0
n_channels = 0

# first loop over all the files checking their info to create array
for mfile in mfiles:

    info = sf.info(mfile)
    tmp_samples = int(info.duration * info.samplerate)
    
    if tmp_samples > n_samples:
        n_samples = tmp_samples
    
    for i in range(info.channels):
        n_channels += 1
        
z = np.zeros(shape=(n_samples, n_channels))

for mfile in mfiles:
    
    info  = sf.info(mfile)
    d, fs = sf.read(mfile)
    
    tmp_samples = int(info.duration * info.samplerate)
    if tmp_samples < n_samples:
        pdsize = n_samples - tmp_samples - 1
        d = np.pad(d, (0, pdsize))
    
    if info.channels > 1:
        z[:,ch_idx] = d[:,0]
        ch_idx += 1
        z[:,ch_idx] = d[:,1] 
        ch_idx += 1
    else:        
        z[:,ch_idx] = d
        ch_idx += 1

print(z.shape, fs)

Now that we have our array of multitrack data let's create a console to process it.

In [None]:
console = pymc.Console(block_size=block_size, sample_rate=fs, num_channels=n_channels)

And we will follow the same process as before iterating over each block of the input. In the future the `Multitrack` object will make this a big simpler. Note that this can take a bit of time since we are essentially performing the same process as "bouncing" or rendering the full track in the DAW. This still should be quite a bit faster than realtime, and we will get an improvement if we use a larger `block_size`.

In [None]:
y = np.zeros(shape=(n_samples, 2))

for n in range(n_samples//block_size):

    start = n * block_size
    stop  = start + block_size

    sys.stdout.write(f"* {n:3d}/{n_samples//block_size:3d}\r")
    sys.stdout.flush()
    
    y[start:stop,:] = console.process_block(z[start:stop,:])

ipd.display(ipd.Audio(data=y.T, rate=fs, normalize=True))

In [None]:
import base64
def show_svg(svg):
    return ipd.HTML(f"""<img src="{svg}"></img>""")

This mix is pretty boring since all settings are at the default setting and the FX busses are turned off. So to make a more intersting mix we are going to want to change some parameters around. Since the console is large (16 channels of mono audio), it would be nice to visual the console. We can do this with the `render_diagram()` method. This will give us a full look at the console set up and the parameter settings for every processor in the console. This is quite large so you might want to open in another window and take a closer look.

In [None]:
console.randomize()
console.render_diagram()
show_svg('./pymixconsole_diagram.svg')

Now we could do into each channel and each processor to set some values to make our mix more interesting, but let's take the easy way out and just call the console levle `randomize()` method which will randomize the entirety of the console (but in a relvatively smart way). Then we can process the audio tracks again and listen to the new output. 

In [None]:
console.randomize()

for n in range(n_samples//block_size):

    start = n * block_size
    stop  = start + block_size

    sys.stdout.write(f"* {n:3d}/{n_samples//block_size:3d}\r")
    sys.stdout.flush()
    
    y[start:stop,:] = console.process_block(z[start:stop,:])

ipd.display(ipd.Audio(data=y.T, rate=fs, normalize=True))

Not a good mix by any means but we can hear some different elements, effects, and panning now.

## Serialize

The final import method to talk about is the `serialize()` method. This is a special method that can be called at any level that packages up all the details about how the console is configured. This serves a special purpose, since we want to build a deep learning model that learns how to perform that linear and nonlinear transformations of a mixing console. This method then produces a vector of all the parameter settings with some nice features. Let's first look at the result of doing this at the lower level of the `Processor`. 

In [None]:
print(eq.parameters.serialize())

We can see that this gives a dictionary are all the current values of each parameter in this processor. This is nice but not exactly what we want. We can also send in keyword arguments to change how this happens.

In [None]:
print(eq.parameters.serialize(normalize=True, one_hot_encode=True))

Now this has normalized all the values between 0-1 from min to max. Additionally it should one-hot-encode any string parameters that can take on a few different values. To get a better example of what this looks like let's try it for the entire console now.

In [None]:
print(channel.serialize(normalize=True, one_hot_encode=True))

To get this ready for something like a model trained in PyTorch we can use a function like this to convert it into a single array with values that range from 0 to 1.

In [None]:
def vectorize_params(serialized_params):

    vector = []

    for processor_type, processors in serialized_params.items():
        for processor in processors:
            for name, parameters in processor.items():
                for name, parameter in parameters.items():
                    if isinstance(parameter, np.ndarray):
                        for val in parameter:
                            vector.append(val)
                    else:
                        vector.append(parameter)

    return np.array(vector).astype(np.float32)

In [None]:
print(vectorize_params(channel.serialize(normalize=True, one_hot_encode=True)))

In [None]:
def apply_processor(x, processor, stereo=False, normalize=True):

    if stereo:
        y = np.empty(shape=(x.shape[0],2))
    else:
        y = np.empty(shape=(x.shape[0],))

    for n in range(x.shape[0]//processor.block_size):
        start = n * processor.block_size
        stop = start + processor.block_size
        y[start:stop] = processor.process(x[start:stop])

    ipd.display(ipd.Audio(data=x, rate=fs, normalize=normalize))
    ipd.display(ipd.Audio(data=y.T, rate=fs, normalize=normalize))
    

## Processors Overiew

In this section we will briefly cover each processor and run it over our electric guitar sample from before.

### Gain

As we saw before this is the most simple control over the level of the signal.

In [None]:
gain = pymc.processors.Gain(block_size=512, sample_rate=fs)
gain.parameters.gain.value = 12.0

print(gain.parameters)

apply_processor(x.copy(), gain, normalize=False)

### Polarity Inverter

The polarity inverter is a simple processor that flips the polarity of the input signal. While this may not seem very useful it is fairly important when you have multiple microphones, as it can be used to correct phase issues.

In [None]:
inverter = pymc.processors.PolarityInverter(block_size=512, sample_rate=fs)
inverter.parameters.invert.value = True

print(inverter.parameters)

apply_processor(x.copy(), inverter)

### Panner



In [None]:
panner = pymc.processors.Panner(block_size=512, sample_rate=fs)
# pan it all the way to the left
panner.parameters.pan.value = 0.0 

print(panner.parameters)

apply_processor(x.copy(), panner, stereo=True)

### Equaliser


In [None]:
eq = pymc.processors.Equaliser(block_size=512, sample_rate=fs)
eq.parameters.low_shelf_freq.value  = 120
eq.parameters.low_shelf_gain.value  = 12.0
eq.parameters.first_band_freq.value = 800.0
eq.parameters.first_band_gain.value = -20.0
eq.parameters.first_band_q.value    =  0.3
eq.parameters.high_shelf_freq.value = 8000
eq.parameters.high_shelf_gain.value = 12.0

print(eq.parameters)

apply_processor(x.copy(), eq, stereo=False)

### Compressor


In [None]:
compressor = pymc.processors.Compressor(block_size=512, sample_rate=fs)

print(compressor.parameters)

apply_processor(x.copy(), compressor, stereo=False)

### Distortion

In [None]:
distortion = pymc.processors.Distortion(block_size=512, sample_rate=fs)
distortion.parameters.factor.value = 9.0

print(distortion.parameters)

apply_processor(2.0 * x.copy(), distortion, stereo=False)

### Delay


In [None]:
delay = pymc.processors.Delay(block_size=512, sample_rate=fs)
delay.parameters.wet_mix.value = 0.3
delay.parameters.delay.value = 20000
delay.parameters.feedback.value = 0.6

print(delay.parameters)

apply_processor(x.copy(), delay, stereo=True)

### Reverb

In [None]:
reverb = pymc.processors.Reverb(block_size=512, sample_rate=fs)
reverb.parameters.damping.value = 1.0
reverb.parameters.dry_mix.value = 0.8
reverb.parameters.wet_mix.value = 0.15
reverb.parameters.room_size.value = 0.6
reverb.parameters.width.value = 1.0

print(reverb.parameters)

apply_processor(x.copy(), reverb, stereo=True)