Skip to content

Commit

Permalink
Merge pull request #11 from danielhkl/master
Browse files Browse the repository at this point in the history
get_power and setup
  • Loading branch information
LukasDrude committed Jul 24, 2018
2 parents 0747e82 + 4e3a010 commit 77a1a26
Show file tree
Hide file tree
Showing 7 changed files with 248 additions and 181 deletions.
8 changes: 4 additions & 4 deletions examples/WPE_Numpy_offline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"sampling_rate = 16000\n",
"delay = 3\n",
"iterations = 5\n",
"K = 10"
"taps = 10"
]
},
{
Expand Down Expand Up @@ -158,7 +158,7 @@
"metadata": {},
"outputs": [],
"source": [
"Z = wpe(Y, iterations=iterations, mode='full').transpose(1, 2, 0)\n",
"Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0)\n",
"z = istft(Z, size=stft_options['size'], shift=stft_options['shift'])\n",
"IPython.display.Audio(z[0], rate=sampling_rate)"
]
Expand Down Expand Up @@ -190,9 +190,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "py36",
"language": "python",
"name": "python3"
"name": "py36"
},
"language_info": {
"codemirror_mode": {
Expand Down
14 changes: 7 additions & 7 deletions examples/WPE_Numpy_online.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@
"channels = 8\n",
"sampling_rate = 16000\n",
"delay = 3\n",
"alpha=0.99\n",
"K = 10\n",
"alpha=0.9999\n",
"taps = 10\n",
"frequency_bins = stft_options['size'] // 2 + 1"
]
},
Expand Down Expand Up @@ -112,8 +112,8 @@
"T, _, _ = Y.shape\n",
"\n",
"def aquire_framebuffer():\n",
" buffer = list(Y[:K+delay+1, :, :])\n",
" for t in range(K+delay+1, T):\n",
" buffer = list(Y[:taps+delay+1, :, :])\n",
" for t in range(taps+delay+1, T):\n",
" yield np.array(buffer)\n",
" buffer.append(Y[t, :, :])\n",
" buffer.pop(0)"
Expand All @@ -136,11 +136,11 @@
"outputs": [],
"source": [
"Z_list = []\n",
"Q = np.stack([np.identity(channels * K) for a in range(frequency_bins)])\n",
"G = np.zeros((frequency_bins, channels * K, channels))\n",
"Q = np.stack([np.identity(channels * taps) for a in range(frequency_bins)])\n",
"G = np.zeros((frequency_bins, channels * taps, channels))\n",
"\n",
"for Y_step in tqdm(aquire_framebuffer()):\n",
" Z, Q, G = online_wpe_step(Y_step, get_power_online(Y_step), Q, G, alpha=alpha, K=K, delay=delay)\n",
" Z, Q, G = online_wpe_step(Y_step, get_power_online(Y_step.transpose(1, 2, 0)), Q, G, alpha=alpha, taps=taps, delay=delay)\n",
" Z_list.append(Z)\n",
"\n",
"Z_stacked = np.stack(Z_list)\n",
Expand Down
9 changes: 5 additions & 4 deletions examples/WPE_Tensorflow_offline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
"sampling_rate = 16000\n",
"delay = 3\n",
"iterations = 5\n",
"K = 10"
"taps = 10"
]
},
{
Expand Down Expand Up @@ -167,9 +167,10 @@
"metadata": {},
"outputs": [],
"source": [
"from nara_wpe.tf_wpe import get_power\n",
"with tf.Session()as session:\n",
" Y_tf = tf.placeholder(tf.complex128, shape=(None, None, None))\n",
" Z_tf = wpe(Y_tf, iterations=iterations)\n",
" Z_tf = wpe(Y_tf, taps=taps, iterations=iterations)\n",
" Z = session.run(Z_tf, {Y_tf: Y})\n",
"z = istft(Z.transpose(1, 2, 0), size=stft_options['size'], shift=stft_options['shift'])\n",
"IPython.display.Audio(z[0], rate=sampling_rate)"
Expand Down Expand Up @@ -201,9 +202,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "py36",
"language": "python",
"name": "python3"
"name": "py36"
},
"language_info": {
"codemirror_mode": {
Expand Down
105 changes: 20 additions & 85 deletions examples/WPE_Tensorflow_online.ipynb

Large diffs are not rendered by default.

93 changes: 78 additions & 15 deletions nara_wpe/tf_wpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,43 +57,106 @@ def _slice(x):


def get_power_online(signal):
"""Calculates power over last to frames for `signal`
"""Calculates power for `signal`
Args:
signal (tf.Tensor): Single frequency signal with shape (T, F, D).
signal (tf.Tensor): Signal with shape (F, D, T).
Returns:
tf.Tensor: Inverse power with shape (F,)
tf.Tensor: Power with shape (F,)
"""
power_estimate = tf.real(signal) ** 2 + tf.imag(signal) ** 2
power_estimate += tf.pad(
power_estimate,
((1, 0), (0, 0), (0, 0))
)[:-1, :]
power_estimate /= 2
power_estimate = tf.reduce_mean(power_estimate, axis=(0, -1))
power_estimate = get_power(signal)
power_estimate = tf.reduce_mean(power_estimate, axis=-1)
return power_estimate


def get_power_inverse(signal, channel_axis=0):
def get_power_inverse(signal):
"""Calculates inverse power for `signal`
Args:
signal (tf.Tensor): Single frequency signal with shape (D, T).
channel_axis (int): Axis of the channel dimension. Will be averaged.
psd_context: context for power estimation
Returns:
tf.Tensor: Inverse power with shape (T,)
"""
power = tf.reduce_mean(
tf.real(signal) ** 2 + tf.imag(signal) ** 2, axis=channel_axis)
power = get_power(signal)
eps = 1e-10 * tf.reduce_max(power)
inverse_power = tf.reciprocal(tf.maximum(power, eps))
return inverse_power


def get_power(signal, axis=-2):
"""Calculates power for `signal`
Args:
signal (tf.Tensor): Single frequency signal with shape (D, T) or (F, D, T).
axis: reduce_mean axis
Returns:
tf.Tensor: Power with shape (T,) or (F, T)
"""
power = tf.real(signal) ** 2 + tf.imag(signal) ** 2
power = tf.reduce_mean(power, axis=axis)

return power


#def get_power(signal, psd_context=0):
# """
# Calculates power for single frequency signal.
# In case psd_context is an tuple the two values
# are describing the left and right hand context.
#
# Args:
# signal: (D, T)
# psd_context: tuple or int
# """
# shape = tf.shape(signal)
# if len(signal.get_shape()) == 2:
# signal = tf.reshape(signal, (1, shape[0], shape[1]))
#
# power = tf.reduce_mean(
# tf.real(signal) ** 2 + tf.imag(signal) ** 2,
# axis=-2
# )
#
# if psd_context is not 0:
# if isinstance(psd_context, tuple):
# context = psd_context[0] + 1 + psd_context[1]
# else:
# context = 2 * psd_context + 1
# psd_context = (psd_context, psd_context)
#
# power = tf.pad(
# power,
# ((0, 0), (psd_context[0], psd_context[1])),
# mode='constant'
# )
# print(power)
# power = tf.nn.convolution(
# power,
# tf.ones(context),
# padding='VALID'
# )[psd_context[1]:-psd_context[0]]
#
# denom = tf.nn.convolution(
# tf.zeros_like(power) + 1.,
# tf.ones(context),
# padding='VALID'
# )[psd_context[1]:-psd_context[0]]
# print(power)
# power /= denom
#
# elif psd_context == 0:
# pass
# else:
# raise ValueError(psd_context)
#
# return tf.squeeze(power, axis=0)


def get_correlations(Y, inverse_power, taps, delay):
"""Calculates weighted correlations of a window of length taps
Expand Down
Loading

0 comments on commit 77a1a26

Please sign in to comment.