In [1]:
import import_ipynb
try:
    from bandERB import ERBBand, ERB_pro_matrix
except:
    from bandERB import ERBBand, ERB_pro_matrix

importing Jupyter notebook from bandERB.ipynb




importing Jupyter notebook from params.ipynb


In [2]:
def as_complex(x):
    if x.dtype == tf.complex64 or x.dtype == tf.complex128:
        return x
    else:
        return tf.complex(x[...,-2], x[..., -1], name='as_complex')

def as_real(x):
    if x.dtype == tf.complex64 or x.dtype == tf.complex128:
        return tf.concat([tf.expand_dims(tf.math.real(x),axis=-1), 
                          tf.expand_dims(tf.math.imag(x),axis=-1)],axis=-1, name='as_real')
    else: return x

In [3]:
import tensorflow as tf
import tensorflow.keras as keras

import numpy as np
import random
import math

# import os
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
# tf.executing_eagerly()

In [4]:
from params import model_params
p = model_params('config.ini')

In [5]:
def mask_operations(spec_inputs, mask, beta=0.02, training=True, return_mask=False, _min=0.0, cutoff=0.0):
    # spec (real) [B, T, F, 2], F: freq_bins
    # mask (real): [B, T, Fe], Fe: erb_bins
    p = model_params('config.ini')
    ERBB = ERBBand(N=p.nb_erb, high_lim=p.sr//2, NFFT=p.fft_size)
    iERB_Matrix = tf.convert_to_tensor(ERB_pro_matrix(ERBB, NFFT=p.fft_size, mode=1).T, dtype=tf.float32) # inverse ERB convert matrix
    
    if not training:
        mask_sin = mask * tf.math.sin(math.pi * mask / 2)
        mask_pf = (1 + beta) * mask / (1 + beta * (mask/(tf.where(mask_sin<1e-12,1e-12,mask_sin)))**2)
    else:
        mask_pf = mask
    freq_bins_masks = mask_pf @ iERB_Matrix
    freq_bins_masks = tf.expand_dims(freq_bins_masks,axis=-1)
    if not _min == 0.0: freq_bins_masks = np.maximum(freq_bins_masks, _min)
    if not training and cutoff>0.0:
        if np.mean(freq_bins_masks)<cutoff: freq_bins_masks *= 0.01
    spec = tf.multiply(spec_inputs, freq_bins_masks) # mask gain

    if return_mask:
        return spec, freq_bins_masks
    else:
        return spec

In [6]:
def df_operations(spec, coefs, alpha, training=True):
    # spec (real) [B, T, F, 2], O: df_order
    # coefs (real) [B, T, O, F, 2]
    # alpha (real) [B, T, 1]
    
    #spec = tf.convert_to_tensor(spec)
    #b, _, _, _ = spec.get_shape()
    padded = spec[:,:,:p.nb_df,:]
    if training:
        padded = spec_pad(padded, p.df_order, p.df_lookahead, dim=-3)
        padded = tf.split(padded, [1, 1], axis=-1)

        padded_real = tf.image.extract_patches(images=padded[0], sizes=[1,p.df_order,1,1], 
                                               strides=[1,1,1,1], rates=[1,1,1,1], padding='VALID')
        padded_imag = tf.image.extract_patches(images=padded[1], sizes=[1,p.df_order,1,1], 
                                               strides=[1,1,1,1], rates=[1,1,1,1], padding='VALID')
        padded_real = tf.expand_dims(padded_real, axis=-1)
        padded_imag = tf.expand_dims(padded_imag, axis=-1)
        
        padded = tf.concat([padded_real,padded_imag],axis=-1)
        padded = tf.transpose(padded, perm=[0, 1, 3, 2, 4])
    else:
        padded = tf.expand_dims(padded, axis=0)

    # Real part
    spec_real = padded[..., 0] * coefs[..., 0] - padded[..., 1] * coefs[..., 1]
    
    # Image part
    spec_image = padded[..., 1] * coefs[..., 0] + padded[..., 0] * coefs[..., 1]
    
    spec_real = tf.expand_dims(spec_real, axis=-1)
    spec_image = tf.expand_dims(spec_image, axis=-1)
    spec_f = tf.concat([spec_real,spec_image],axis=-1)

    spec_f = tf.reduce_sum(spec_f, axis=2)
    alpha = tf.expand_dims(alpha, axis=-1)
    
    if training:
        spec_out = tf.multiply(spec_f, alpha) + tf.multiply(spec[..., :p.nb_df, :], (1-alpha))
        spec_out = tf.concat((spec_out,spec[..., p.nb_df:, :]),axis=-2)
    else:
        if p.df_lookahead == 0:
            spec_out = tf.multiply(spec_f, alpha) + tf.multiply(spec[..., -1:, :p.nb_df, :], (1-alpha))
            spec_out = tf.concat((spec_out,spec[..., -1:, p.nb_df:, :]),axis=-2)
        else:
            spec_out = tf.multiply(spec_f, alpha) + tf.multiply(spec[..., -p.df_lookahead-1:-p.df_lookahead, :p.nb_df, :], (1-alpha))
            spec_out = tf.concat((spec_out,spec[..., -p.df_lookahead-1:-p.df_lookahead, p.nb_df:, :]),axis=-2)
    
    return spec_out

In [7]:
def df_operations_fast(spec, coefs, alpha):
    # spec (real) [B, T, F, 2], O: df_order
    # coefs (real) [B, T, O, F, 2]
    # alpha (real) [B, T, 1]
    p = model_params('config.ini')
    coefs = coefs[:,:,-p.df_lookahead-1:-p.df_lookahead,:,:]
    # Real part
    spec_real = spec[...,:p.nb_df, 0] * coefs[..., 0] - spec[...,:p.nb_df, 1] * coefs[..., 1]
    # Image part
    spec_image = spec[...,:p.nb_df, 1] * coefs[..., 0] + spec[...,:p.nb_df, 0] * coefs[..., 1]
    
    spec_real = tf.expand_dims(spec_real, axis=-1)
    spec_image = tf.expand_dims(spec_image, axis=-1)
    spec_f = tf.concat([spec_real,spec_image],axis=-1)
    
    spec_f = tf.reduce_sum(spec_f, axis=2)
    
    alpha = tf.expand_dims(alpha, axis=-1)

    spec_out = tf.multiply(spec_f, alpha) + tf.multiply(spec[..., :p.nb_df, :], (1-alpha))
    spec_out = tf.concat((spec_out,spec[..., p.nb_df:, :]),axis=-2)

    return spec_out

In [8]:
def spec_pad(x, window_size: int, lookahead: int, dim: int = 0):
    pad = [0] * len(x.get_shape()) * 2
    if dim >= 0:
        pad[(tf.rank(x) - dim - 1) * 2] = window_size - lookahead - 1
        pad[(tf.rank(x) - dim - 1) * 2 + 1] = lookahead
    else:
        pad[(-dim - 1) * 2] = window_size - lookahead - 1
        pad[(-dim - 1) * 2 + 1] = lookahead
        
    if len(pad)==2:
        padding = ((0, 0), (0, 0), (0, 0),(pad[-2], pad[-1]))
    elif len(pad)==4:
        padding = ((0, 0), (0, 0), (pad[-2], pad[-1]), (pad[-4],pad[-3]))
    elif len(pad)==6:
        padding = ((0, 0), (pad[-2], pad[-1]), (pad[-4],pad[-3]), (pad[-6],pad[-5]))
    else:
        padding = ((pad[-2], pad[-1]), (pad[-4],pad[-3]), (pad[-6],pad[-5]), (pad[-8],pad[-7]))
    return tf.pad(x, padding)

In [9]:
from tensorflow.keras.layers import Lambda, Flatten

In [10]:
def synthesis_frame(x):
    n_frames = int(np.ceil(p.length_sec*p.sr/p.hop_size))
    length = (n_frames+1)*p.hop_size
    
    win = tf.signal.vorbis_window(window_length = p.fft_size)
    win = tf.reshape(win,(1,p.fft_size))
    
    x_complex = as_complex(x) / (p.fft_size ** -0.5)

    X_ifft = Lambda(lambda v: tf.signal.irfft(tf.cast(v, dtype=tf.complex64), fft_length = tf.constant([p.fft_size], dtype=tf.int32)))(x_complex)
    win = tf.tile(tf.expand_dims(win,1), [1, n_frames, 1])
    
    x_win = tf.multiply(X_ifft,win)
    
    out = tf.signal.overlap_and_add(x_win, frame_step = p.hop_size)
    
    return out