Merge pull request #145 from fraunhoferportugal/feature/spectral-rena…

…ming Feature/spectral renaming
fraunhoferportugal · Apr 3, 2024 · c5b98ff · c5b98ff
2 parents ff61958 + d1880f2
commit c5b98ff
Show file tree

Hide file tree

Showing 7 changed files with 83 additions and 53 deletions.
diff --git a/.gitignore b/.gitignore
@@ -121,4 +121,9 @@ notebooks/CorrelationReport.html
 notebooks/orange_input_test.csv
 notebooks/orange_input_train.csv
 notebooks/UCI HAR Dataset.zip
+
 tests/datasets/cached_datasets/
+
+# Files downloaded from raw GitHub pages
+raw.githubusercontent.com/
+
diff --git a/tests/test_calc_features.py b/tests/test_calc_features.py
@@ -74,7 +74,7 @@ def test_input_list_window_multi_axis_multi(self):
         )
         np.testing.assert_array_equal(
             features0.shape,
-            (16, 552),
+            (16, 495),
         )
 
     def test_input_array_window_tosplit_axis_single(self):
@@ -91,7 +91,7 @@ def test_input_array_window_tosplit_axis_single(self):
 
         np.testing.assert_array_equal(
             features1.shape,
-            (16, 184),
+            (16, 165),
         )
 
     def test_input_series_window_tosplit_axis_single(self):
@@ -107,7 +107,7 @@ def test_input_series_window_tosplit_axis_single(self):
 
         np.testing.assert_array_equal(
             features2.shape,
-            (16, 184),
+            (16, 165),
         )
 
     def test_input_dataframe_window_single_axis_multi(self):
@@ -136,7 +136,7 @@ def test_input_array_window_single_axis_multi(self):
 
         np.testing.assert_array_equal(
             features4.shape,
-            (1, 1005),
+            (1, 333),
         )
 
     def test_input_series_window_single_axis_single(self):
@@ -182,7 +182,7 @@ def test_personal_features(self):
 
         np.testing.assert_array_equal(
             features7.shape,
-            (1, 393),
+            (1, 169),
         )
 
     def test_dataset_extractor(self):

diff --git a/tests/test_features.py b/tests/test_features.py
@@ -1345,23 +1345,23 @@ def test_power_bandwidth(self):
 
     def test_fft_mean_coeff(self):
         np.testing.assert_almost_equal(
-            fft_mean_coeff(const0, Fs, nfreq=10),
+            spectrogram_mean_coeff(const0, Fs, bins=10)["values"],
             (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
         )
         np.testing.assert_almost_equal(
-            fft_mean_coeff(const1, Fs, nfreq=10),
+            spectrogram_mean_coeff(const1, Fs, bins=10)["values"],
             (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
         )
         np.testing.assert_almost_equal(
-            fft_mean_coeff(constNeg, Fs, nfreq=10),
+            spectrogram_mean_coeff(constNeg, Fs, bins=10)["values"],
             (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
         )
         np.testing.assert_almost_equal(
-            fft_mean_coeff(constF, Fs, nfreq=10),
+            spectrogram_mean_coeff(constF, Fs, bins=10)["values"],
             (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
         )
         np.testing.assert_almost_equal(
-            fft_mean_coeff(lin, Fs, nfreq=10),
+            spectrogram_mean_coeff(lin, Fs, bins=10)["values"],
             (
                 0.00408221375370652,
                 0.29732082717207287,
@@ -1376,7 +1376,7 @@ def test_fft_mean_coeff(self):
             ),
         )
         np.testing.assert_almost_equal(
-            fft_mean_coeff(lin0, Fs, nfreq=10),
+            spectrogram_mean_coeff(lin0, Fs, bins=10)["values"],
             (
                 0.004523228535962903,
                 0.3294413597474491,
@@ -1391,7 +1391,7 @@ def test_fft_mean_coeff(self):
             ),
         )
         np.testing.assert_almost_equal(
-            fft_mean_coeff(wave, Fs, nfreq=10),
+            spectrogram_mean_coeff(wave, Fs, bins=10)["values"],
             (
                 2.0234880089914443e-06,
                 0.0001448004568848076,
@@ -1406,7 +1406,7 @@ def test_fft_mean_coeff(self):
             ),
         )
         np.testing.assert_almost_equal(
-            fft_mean_coeff(offsetWave, Fs, nfreq=10),
+            spectrogram_mean_coeff(offsetWave, Fs, bins=10)["values"],
             (
                 2.0234880089914642e-06,
                 0.00014480045688480763,
@@ -1421,7 +1421,7 @@ def test_fft_mean_coeff(self):
             ),
         )
         np.testing.assert_almost_equal(
-            fft_mean_coeff(noiseWave, Fs, nfreq=10),
+            spectrogram_mean_coeff(noiseWave, Fs, bins=10)["values"],
             (
                 3.2947755935395495e-06,
                 0.00014466702099241778,

diff --git a/tests/tests_tools/test_features.json b/tests/tests_tools/test_features.json
@@ -66,7 +66,7 @@
             "use": "yes"
         },
         "new_feature_with_tag": {
-            "complexity": "constant",
+            "complexity": "log",
             "description": "A new feature with a tag.",
             "function": "new_feature_with_tag",
             "parameters": "",
@@ -75,17 +75,6 @@
         }
     },
     "spectral": {
-        "FFT mean coefficient": {
-            "complexity": "constant",
-            "description": "Computes the mean value of each spectrogram frequency.",
-            "function": "tsfel.fft_mean_coeff",
-            "n_features": "nfreq",
-            "parameters": {
-                "fs": 100,
-                "nfreq": 256
-            },
-            "use": "yes"
-        },
         "Fundamental frequency": {
             "complexity": "log",
             "description": "Computes the fundamental frequency.",
@@ -300,6 +289,17 @@
             },
             "use": "yes"
         },
+        "Spectrogram mean coefficient": {
+            "complexity": "constant",
+            "description": "Calculates the average value for each frequency in the spectrogram over the entire duration of the signal.",
+            "function": "tsfel.spectrogram_mean_coeff",
+            "n_features": "bins",
+            "parameters": {
+                "bins": 32,
+                "fs": 100
+            },
+            "use": "yes"
+        },
         "Wavelet absolute mean": {
             "complexity": "linear",
             "description": "Computes CWT absolute mean value of each wavelet scale.",
@@ -554,7 +554,7 @@
             "use": "yes"
         },
         "new_feature_with_parameter": {
-            "complexity": "constant",
+            "complexity": "log",
             "description": "A new feature.",
             "function": "new_feature_with_parameter",
             "parameters": {

diff --git a/tsfel/feature_extraction/calc_features.py b/tsfel/feature_extraction/calc_features.py
@@ -519,19 +519,28 @@ def calc_window_features(
                 for ax in range(len(header_names)):
                     sig_ax = signal_window if single_axis else signal_window[:, ax]
                     eval_result_ax = locals()[func_total](sig_ax, **parameters_total)
+
                     # Function returns more than one element
                     if isinstance(eval_result_ax, tuple):
-                        if np.isnan(eval_result_ax[0]):
-                            eval_result_ax = np.zeros(len(eval_result_ax))
-                        for rr in range(len(eval_result_ax)):
-                            feature_results += [eval_result_ax[rr]]
-                            feature_names += [
-                                str(header_names[ax]) + "_" + feat + "_" + str(rr),
-                            ]
+                        eval_result_ax = (
+                            np.zeros(len(eval_result_ax)) if np.isnan(eval_result_ax[0]) else eval_result_ax
+                        )
+                        for rr, value in enumerate(eval_result_ax):
+                            feature_results.append(value)
+                            feature_names.append(f"{header_names[ax]}_{feat}_{rr}")
+
+                    elif isinstance(eval_result_ax, dict):
+                        names = eval_result_ax["names"]
+                        values = eval_result_ax["values"]
+                        eval_result_ax = np.zeros(len(values)) if np.isnan(values[0]) else eval_result_ax
+                        for name, value in zip(names, values):
+                            feature_results.append(value)
+                            feature_names.append(f"{header_names[ax]}_{feat}_{name}Hz")
                     else:
                         feature_results += [eval_result_ax]
                         feature_names += [str(header_names[ax]) + "_" + feat]
 
+
     features = pd.DataFrame(
         data=np.array(feature_results).reshape(1, len(feature_results)),
         columns=np.array(feature_names),

diff --git a/tsfel/feature_extraction/features.json b/tsfel/feature_extraction/features.json
@@ -1,14 +1,14 @@
 {
   "spectral": {
-    "FFT mean coefficient": {
+    "Spectrogram mean coefficient": {
       "complexity": "constant",
-      "description": "Computes the mean value of each spectrogram frequency.",
-      "function": "tsfel.fft_mean_coeff",
+      "description": "Calculates the average value for each frequency in the spectrogram over the entire duration of the signal.",
+      "function": "tsfel.spectrogram_mean_coeff",
       "parameters": {
         "fs": 100,
-        "nfreq": 256
+        "bins": 32
       },
-      "n_features": "nfreq",
+      "n_features": "bins",
       "use": "yes"
     },
     "Fundamental frequency": {

diff --git a/tsfel/feature_extraction/features.py b/tsfel/feature_extraction/features.py
@@ -1504,34 +1504,50 @@ def power_bandwidth(signal, fs):
 
 
 @set_domain("domain", "spectral")
-def fft_mean_coeff(signal, fs, nfreq=256):
-    """Computes the mean value of each spectrogram frequency.
+def spectrogram_mean_coeff(signal, fs, bins=32):
+    """Calculates the average power spectral density (PSD) for each frequency
+    throughout the entire signal duration provided by the spectrogram.
 
-    nfreq can not be higher than half signal length plus one.
-    When it does, it is automatically set to half signal length plus one.
+    The values represent the average power spectral density computed on frequency bins. The feature name refers to the
+    frequency bin where the PSD was taken. Each bin is ``fs`` / (``bins`` * 2 - 2) Hz wide. The method relies on the
+    `scipy.signal.spectrogram` and except for ``nperseg`` and ``fs``, all the other parameters are set to its defaults.
 
     Feature computational cost: 1
 
     Parameters
     ----------
-    signal : nd-array
-        Input from which fft mean coefficients are computed
+    signal : array_like
+        Input from which the spectrogram average power spectral density coefficients are computed.
     fs : float
-        Sampling frequency
-    nfreq : int
-        The number of frequencies
+        Sampling frequency of the ``signal``.
+    bins : int, optional
+        The number of frequency bins.
 
     Returns
     -------
     nd-array
-        The mean value of each spectrogram frequency
+        The power spectral density for each frequency bin averaged along the entire signal duration.
+
+    Notes
+    -----
+    The optimal number of frequency bins depend on the task at hand. Using a
+    higher number of bins with low sampling frequencies may result in excessive
+    frequency resolution and the loss of valuable coarse-grained information.
+    The default value should be suitable for most cases when working with the
+    default sampling frequency. The number of frequency bins must be modified
+    in the feature configuration file.
+
+    .. versionadded:: 0.1.7
     """
-    if nfreq > len(signal) // 2 + 1:
-        nfreq = len(signal) // 2 + 1
 
-    fmag_mean = scipy.signal.spectrogram(signal, fs, nperseg=nfreq * 2 - 2)[2].mean(1)
+    if bins > len(signal) // 2 + 1:
+        bins = len(signal) // 2 + 1
+
+    frequencies, _, Sxx = scipy.signal.spectrogram(signal, fs, nperseg=bins * 2 - 2)
+    Sxx_mean = Sxx.mean(1)
+    f_keys = np.round(frequencies, 2).astype(str)
 
-    return tuple(fmag_mean)
+    return {"names": f_keys, "values": Sxx_mean}
 
 
 @set_domain("domain", "spectral")