In [None]:
#default_exp transforms

# Audio Transforms
> Data transformations made specifically for audio.

In [None]:
#export
from fastai2.basics import *
import librosa
import torchaudio

In [None]:
from IPython.display import Audio
import matplotlib.pyplot as plt
import seaborn as sns

# Normalize Audio

In [None]:
#export
class NormalizeAudio(Transform):
    def encodes(self, sig:ndarray):
        return librosa.util.normalize(sig)

# MuLawEncoding

In [None]:
#export
class MuLawEncoding(Transform):
    order = 11
    def __init__(self, precision:int=256):
        self.OneHot = partial(torch.nn.functional.one_hot, num_classes=precision)
        self.MuLawEnc = torchaudio.transforms.MuLawEncoding(precision)
        self.MuLawDec = torchaudio.transforms.MuLawDecoding(precision)
    def encodes(self, sig:Tensor):
        mulaw = self.MuLawEnc(sig)
        return self.OneHot(mulaw)
    def decodes(self, enc:Tensor):
        mulaw = torch.argmax(enc, dim=-2)
        return self.MuLawDec(mulaw)

# Frequency Order

In [None]:
#export
def get_pitch(S):
    return librosa.estimate_tuning(S=S[0], sr=44100)

def frequency_order(specs, attempt=0):
    pitch = L(specs).map(get_pitch)
    order = pitch.zipwith(specs).sorted(lambda x: x[0])
    return order.itemgot(1)

# 2D Stft

In [None]:
#export
class Stft2d(ItemTransform):
    def encodes(self, audio):
        spec = librosa.stft(audio)
        spec = np.pad(spec[:1024,:], ((0,0),(0,8)))
        spec = np.log2(1+abs(spec))
        return np.expand_dims(spec, axis=0)
    def encodes(self, audios:list):
        specs = []
        for audio in audios: specs.append(self.encodes(audio))
        specs = frequency_order(specs)
        return np.concatenate(specs, axis=0)

# Export

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script(fname="Transforms.ipynb")

Converted Transforms.ipynb.
