Merge pull request #11 from danielhkl/master

get_power and setup
fgnt · Jul 24, 2018 · 77a1a26 · 77a1a26
2 parents 0747e82 + 4e3a010
commit 77a1a26
Show file tree

Hide file tree

Showing 7 changed files with 248 additions and 181 deletions.
diff --git a/examples/WPE_Numpy_offline.ipynb b/examples/WPE_Numpy_offline.ipynb
@@ -101,7 +101,7 @@
     "sampling_rate = 16000\n",
     "delay = 3\n",
     "iterations = 5\n",
-    "K = 10"
+    "taps = 10"
    ]
   },
   {
@@ -158,7 +158,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "Z = wpe(Y, iterations=iterations, mode='full').transpose(1, 2, 0)\n",
+    "Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0)\n",
     "z = istft(Z, size=stft_options['size'], shift=stft_options['shift'])\n",
     "IPython.display.Audio(z[0], rate=sampling_rate)"
    ]
@@ -190,9 +190,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "py36",
    "language": "python",
-   "name": "python3"
+   "name": "py36"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/examples/WPE_Numpy_online.ipynb b/examples/WPE_Numpy_online.ipynb
@@ -63,8 +63,8 @@
     "channels = 8\n",
     "sampling_rate = 16000\n",
     "delay = 3\n",
-    "alpha=0.99\n",
-    "K = 10\n",
+    "alpha=0.9999\n",
+    "taps = 10\n",
     "frequency_bins = stft_options['size'] // 2 + 1"
    ]
   },
@@ -112,8 +112,8 @@
     "T, _, _ = Y.shape\n",
     "\n",
     "def aquire_framebuffer():\n",
-    "    buffer = list(Y[:K+delay+1, :, :])\n",
-    "    for t in range(K+delay+1, T):\n",
+    "    buffer = list(Y[:taps+delay+1, :, :])\n",
+    "    for t in range(taps+delay+1, T):\n",
     "        yield np.array(buffer)\n",
     "        buffer.append(Y[t, :, :])\n",
     "        buffer.pop(0)"
@@ -136,11 +136,11 @@
    "outputs": [],
    "source": [
     "Z_list = []\n",
-    "Q = np.stack([np.identity(channels * K) for a in range(frequency_bins)])\n",
-    "G = np.zeros((frequency_bins, channels * K, channels))\n",
+    "Q = np.stack([np.identity(channels * taps) for a in range(frequency_bins)])\n",
+    "G = np.zeros((frequency_bins, channels * taps, channels))\n",
     "\n",
     "for Y_step in tqdm(aquire_framebuffer()):\n",
-    "    Z, Q, G = online_wpe_step(Y_step, get_power_online(Y_step), Q, G, alpha=alpha, K=K, delay=delay)\n",
+    "    Z, Q, G = online_wpe_step(Y_step, get_power_online(Y_step.transpose(1, 2, 0)), Q, G, alpha=alpha, taps=taps, delay=delay)\n",
     "    Z_list.append(Z)\n",
     "\n",
     "Z_stacked = np.stack(Z_list)\n",

diff --git a/examples/WPE_Tensorflow_offline.ipynb b/examples/WPE_Tensorflow_offline.ipynb
@@ -110,7 +110,7 @@
     "sampling_rate = 16000\n",
     "delay = 3\n",
     "iterations = 5\n",
-    "K = 10"
+    "taps = 10"
    ]
   },
   {
@@ -167,9 +167,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from nara_wpe.tf_wpe import get_power\n",
     "with tf.Session()as session:\n",
     "    Y_tf = tf.placeholder(tf.complex128, shape=(None, None, None))\n",
-    "    Z_tf = wpe(Y_tf, iterations=iterations)\n",
+    "    Z_tf = wpe(Y_tf, taps=taps, iterations=iterations)\n",
     "    Z = session.run(Z_tf, {Y_tf: Y})\n",
     "z = istft(Z.transpose(1, 2, 0), size=stft_options['size'], shift=stft_options['shift'])\n",
     "IPython.display.Audio(z[0], rate=sampling_rate)"
@@ -201,9 +202,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "py36",
    "language": "python",
-   "name": "python3"
+   "name": "py36"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/examples/WPE_Tensorflow_online.ipynb b/examples/WPE_Tensorflow_online.ipynb
diff --git a/nara_wpe/tf_wpe.py b/nara_wpe/tf_wpe.py
@@ -57,43 +57,106 @@ def _slice(x):
 
 
 def get_power_online(signal):
-    """Calculates power over last to frames for `signal`
+    """Calculates power for `signal`
 
         Args:
-            signal (tf.Tensor): Single frequency signal with shape (T, F, D).
+            signal (tf.Tensor): Signal with shape (F, D, T).
 
         Returns:
-            tf.Tensor: Inverse power with shape (F,)
+            tf.Tensor: Power with shape (F,)
 
     """
-    power_estimate = tf.real(signal) ** 2 + tf.imag(signal) ** 2
-    power_estimate += tf.pad(
-        power_estimate,
-        ((1, 0), (0, 0), (0, 0))
-    )[:-1, :]
-    power_estimate /= 2
-    power_estimate = tf.reduce_mean(power_estimate, axis=(0, -1))
+    power_estimate = get_power(signal)
+    power_estimate = tf.reduce_mean(power_estimate, axis=-1)
     return power_estimate
 
 
-def get_power_inverse(signal, channel_axis=0):
+def get_power_inverse(signal):
     """Calculates inverse power for `signal`
 
     Args:
         signal (tf.Tensor): Single frequency signal with shape (D, T).
-        channel_axis (int): Axis of the channel dimension. Will be averaged.
-
+        psd_context: context for power estimation
     Returns:
         tf.Tensor: Inverse power with shape (T,)
 
     """
-    power = tf.reduce_mean(
-        tf.real(signal) ** 2 + tf.imag(signal) ** 2, axis=channel_axis)
+    power = get_power(signal)
     eps = 1e-10 * tf.reduce_max(power)
     inverse_power = tf.reciprocal(tf.maximum(power, eps))
     return inverse_power
 
 
+def get_power(signal, axis=-2):
+    """Calculates power for `signal`
+
+    Args:
+        signal (tf.Tensor): Single frequency signal with shape (D, T) or (F, D, T).
+        axis: reduce_mean axis
+    Returns:
+        tf.Tensor: Power with shape (T,) or (F, T)
+
+    """
+    power = tf.real(signal) ** 2 + tf.imag(signal) ** 2
+    power = tf.reduce_mean(power, axis=axis)
+
+    return power
+
+
+#def get_power(signal, psd_context=0):
+#    """
+#    Calculates power for single frequency signal.
+#    In case psd_context is an tuple the two values
+#    are describing the left and right hand context.
+#
+#    Args:
+#        signal: (D, T)
+#        psd_context: tuple or int
+#    """
+#    shape = tf.shape(signal)
+#    if len(signal.get_shape()) == 2:
+#        signal = tf.reshape(signal, (1, shape[0], shape[1]))
+#
+#    power = tf.reduce_mean(
+#        tf.real(signal) ** 2 + tf.imag(signal) ** 2,
+#        axis=-2
+#    )
+#
+#    if psd_context is not 0:
+#        if isinstance(psd_context, tuple):
+#            context = psd_context[0] + 1 + psd_context[1]
+#        else:
+#            context = 2 * psd_context + 1
+#            psd_context = (psd_context, psd_context)
+#
+#        power = tf.pad(
+#            power,
+#            ((0, 0), (psd_context[0], psd_context[1])),
+#            mode='constant'
+#        )
+#        print(power)
+#        power = tf.nn.convolution(
+#            power,
+#            tf.ones(context),
+#            padding='VALID'
+#        )[psd_context[1]:-psd_context[0]]
+#
+#        denom = tf.nn.convolution(
+#            tf.zeros_like(power) + 1.,
+#            tf.ones(context),
+#            padding='VALID'
+#        )[psd_context[1]:-psd_context[0]]
+#        print(power)
+#        power /= denom
+#
+#    elif psd_context == 0:
+#        pass
+#    else:
+#        raise ValueError(psd_context)
+#
+#    return tf.squeeze(power, axis=0)
+
+
 def get_correlations(Y, inverse_power, taps, delay):
     """Calculates weighted correlations of a window of length taps