From 8c1d4d8d49d4af92d90d7d7fb5017177f8191be7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Tue, 7 Feb 2023 09:46:13 +0000 Subject: [PATCH 01/47] Add derived biosignals: ACCMAG, IBI, RRI, HR --- src/ltbio/biosignals/modalities/ACC.py | 17 ++++++++++++- src/ltbio/biosignals/modalities/ECG.py | 11 +++++++-- src/ltbio/biosignals/modalities/HR.py | 27 +++++++++++++++------ src/ltbio/biosignals/modalities/PPG.py | 19 ++++++++++++++- src/ltbio/biosignals/modalities/__init__.py | 6 ++--- 5 files changed, 65 insertions(+), 15 deletions(-) diff --git a/src/ltbio/biosignals/modalities/ACC.py b/src/ltbio/biosignals/modalities/ACC.py index 6e6c323a..d1dba368 100644 --- a/src/ltbio/biosignals/modalities/ACC.py +++ b/src/ltbio/biosignals/modalities/ACC.py @@ -14,7 +14,7 @@ # =================================== -from ltbio.biosignals.modalities.Biosignal import Biosignal +from ltbio.biosignals.modalities.Biosignal import Biosignal, DerivedBiosignal from ltbio.biosignals.timeseries.Unit import G, Multiplier @@ -27,3 +27,18 @@ def __init__(self, timeseries, source=None, patient=None, acquisition_location=N def plot_summary(self, show: bool = True, save_to: str = None): pass + + +class ACCMAG(DerivedBiosignal): + + DEFAULT_UNIT = G(Multiplier._) + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: ACC | None = None): + super().__init__(timeseries, source, patient, acquisition_location, name, original) + + @classmethod + def fromACC(cls): + pass + + def plot_summary(self, show: bool = True, save_to: str = None): + pass diff --git a/src/ltbio/biosignals/modalities/ECG.py b/src/ltbio/biosignals/modalities/ECG.py index 567df024..aec91950 100644 --- a/src/ltbio/biosignals/modalities/ECG.py +++ b/src/ltbio/biosignals/modalities/ECG.py @@ -495,7 +495,10 @@ def zhaoSQI(self, by_segment: bool = False): peaks1 = self.__r_indices(channel, hamilton_segmenter) peaks2 = self.__r_indices(channel, christov_segmenter) - res[channel_name] = [channel._apply_operation_and_return(ZZ2018, p1, p2, fs=channel.sampling_frequency, search_window=100, nseg=1024, mode='fuzzy') + def aux(signal, p1, p2, **kwargs): + return ZZ2018(signal, p1, p2, **kwargs) + + res[channel_name] = [channel._apply_operation_and_return(aux, fs=channel.sampling_frequency, search_window=100, nseg=1024, mode='fuzzy') for p1, p2 in zip(peaks1, peaks2)] if not by_segment: @@ -505,8 +508,12 @@ def zhaoSQI(self, by_segment: bool = False): class RRI(DerivedBiosignal): - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original=None): + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: ECG | None = None): super().__init__(timeseries, source, patient, acquisition_location, name, original) + @classmethod + def fromECG(cls): + pass + def plot_summary(self, show: bool = True, save_to: str = None): pass diff --git a/src/ltbio/biosignals/modalities/HR.py b/src/ltbio/biosignals/modalities/HR.py index ce3bcc8c..de4520b1 100644 --- a/src/ltbio/biosignals/modalities/HR.py +++ b/src/ltbio/biosignals/modalities/HR.py @@ -14,15 +14,26 @@ # =================================== -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.biosignals.modalities.ECG import ECG -from ltbio.biosignals.modalities.PPG import PPG +from ltbio.biosignals.modalities.Biosignal import DerivedBiosignal +from ltbio.biosignals.modalities.ECG import ECG, RRI +from ltbio.biosignals.modalities.PPG import PPG, IBI +from ltbio.biosignals.timeseries.Unit import BeatsPerMinute -class HR(Biosignal): - def plot_summary(self, show: bool = True, save_to: str = None): +class HR(DerivedBiosignal): + + DEFAULT_UNIT = BeatsPerMinute() + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: RRI | IBI | ECG | PPG | None = None): + super(HR, self).__init__(timeseries, source, patient, acquisition_location, name, original) + + @classmethod + def fromRRI(cls): pass - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original_signal:ECG|PPG=None): - super(HR, self).__init__(timeseries, source, patient, acquisition_location, name) - self.__original_signal = original_signal + @classmethod + def fromIBI(cls): + pass + + def plot_summary(self, show: bool = True, save_to: str = None): + pass diff --git a/src/ltbio/biosignals/modalities/PPG.py b/src/ltbio/biosignals/modalities/PPG.py index 1badd38c..93fc288f 100644 --- a/src/ltbio/biosignals/modalities/PPG.py +++ b/src/ltbio/biosignals/modalities/PPG.py @@ -14,7 +14,9 @@ # =================================== -from ltbio.biosignals.modalities.Biosignal import Biosignal +from ltbio.biosignals.modalities.Biosignal import Biosignal, DerivedBiosignal +from ltbio.biosignals.timeseries.Unit import Second + class PPG(Biosignal): @@ -25,3 +27,18 @@ def __init__(self, timeseries, source=None, patient=None, acquisition_location=N def plot_summary(self, show: bool = True, save_to: str = None): pass + + +class IBI(DerivedBiosignal): + + DEFAULT_UNIT = Second() + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: PPG | None = None): + super().__init__(timeseries, source, patient, acquisition_location, name, original) + + @classmethod + def fromPPG(cls): + pass + + def plot_summary(self, show: bool = True, save_to: str = None): + pass diff --git a/src/ltbio/biosignals/modalities/__init__.py b/src/ltbio/biosignals/modalities/__init__.py index bc1b71b7..7155f885 100644 --- a/src/ltbio/biosignals/modalities/__init__.py +++ b/src/ltbio/biosignals/modalities/__init__.py @@ -1,10 +1,10 @@ # Quick shortcuts to classes -from ltbio.biosignals.modalities.ACC import ACC -from ltbio.biosignals.modalities.ECG import ECG +from ltbio.biosignals.modalities.ACC import ACC, ACCMAG +from ltbio.biosignals.modalities.ECG import ECG, RRI from ltbio.biosignals.modalities.EDA import EDA from ltbio.biosignals.modalities.EEG import EEG from ltbio.biosignals.modalities.EMG import EMG from ltbio.biosignals.modalities.HR import HR -from ltbio.biosignals.modalities.PPG import PPG +from ltbio.biosignals.modalities.PPG import PPG, IBI from ltbio.biosignals.modalities.RESP import RESP from ltbio.biosignals.modalities.TEMP import TEMP From 1a58476a2a3a683df4812e5b128a431fdc2f0e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sat, 11 Feb 2023 17:26:31 +0000 Subject: [PATCH 02/47] Fix bug in Segment._partition When called during Segmenter.apply, the _partition method did not consider the last complete fragment of the Segment, and so this would be lost. This was an issue when the no. samples was divisible by the given fragment's length. --- src/ltbio/biosignals/timeseries/Timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ltbio/biosignals/timeseries/Timeseries.py b/src/ltbio/biosignals/timeseries/Timeseries.py index befaa8ee..16e71d47 100644 --- a/src/ltbio/biosignals/timeseries/Timeseries.py +++ b/src/ltbio/biosignals/timeseries/Timeseries.py @@ -489,7 +489,7 @@ def _partition(self, individual_length: int, overlap_length: int = 0): res = [] step = individual_length - overlap_length - for i in range(0, len(self) - individual_length, step): + for i in range(0, len(self), step): trimmed_samples = self.__samples[i: i + individual_length] trimmed_raw_samples = self.__raw_samples[i: i + individual_length] res.append(self._new(samples=trimmed_samples, raw_samples=trimmed_raw_samples, From 7db9af78dfaba90889b942347f9df1053af06933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 13:56:55 +0000 Subject: [PATCH 03/47] Add some quality indexes for EDA --- src/ltbio/biosignals/modalities/EDA.py | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/ltbio/biosignals/modalities/EDA.py b/src/ltbio/biosignals/modalities/EDA.py index a1909fea..75b9d677 100644 --- a/src/ltbio/biosignals/modalities/EDA.py +++ b/src/ltbio/biosignals/modalities/EDA.py @@ -13,6 +13,10 @@ # Last Updated: 07/07/2022 # =================================== +from datetime import timedelta + +from datetimerange import DateTimeRange +from numpy import mean from ltbio.biosignals.modalities.Biosignal import Biosignal from ltbio.biosignals.timeseries.Unit import Volt, Multiplier @@ -27,3 +31,31 @@ def __init__(self, timeseries, source=None, patient=None, acquisition_location=N def plot_summary(self, show: bool = True, save_to: str = None): pass + + @property + def preview(self): + """Returns 2 minutes of the middle of the signal.""" + domain = self.domain + middle_of_domain: DateTimeRange = domain[len(domain) // 2] + middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) + try: + return self[middle - timedelta(seconds=2): middle + timedelta(minutes=2)] + except IndexError: + raise AssertionError( + f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") + + @staticmethod + def racSQI(samples): + """ + Rate of Amplitude change (RAC) + It is recomended to be analysed in windows of 2 seconds. + """ + max_, min_ = max(samples), min(samples) + amplitude = max_ - min_ + return abs(amplitude / max_) + + def acceptable_quality(self): # -> Timeline + """ + Suggested by Böttcher et al. Scientific Reports, 2022, for wearable wrist EDA. + """ + return self.when(lambda x: mean(x) > 0.05 and EDA.racSQI(x) < 0.2, window=timedelta(seconds=2)) From 26816e0340fb2f370e71923f995daacb9d86f88d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 13:57:03 +0000 Subject: [PATCH 04/47] Add some quality indexes for HR --- src/ltbio/biosignals/modalities/HR.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/ltbio/biosignals/modalities/HR.py b/src/ltbio/biosignals/modalities/HR.py index de4520b1..ae28401a 100644 --- a/src/ltbio/biosignals/modalities/HR.py +++ b/src/ltbio/biosignals/modalities/HR.py @@ -37,3 +37,9 @@ def fromIBI(cls): def plot_summary(self, show: bool = True, save_to: str = None): pass + + def acceptable_quality(self): # -> Timeline + """ + Acceptable physiological values + """ + return self.when(lambda x: 40 <= x <= 200) # between 40-200 bpm From 5f79c034baca6d2b207e6411899e3b492c79f8df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 13:57:13 +0000 Subject: [PATCH 05/47] Add some quality indexes for PPG --- src/ltbio/biosignals/modalities/PPG.py | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/ltbio/biosignals/modalities/PPG.py b/src/ltbio/biosignals/modalities/PPG.py index 93fc288f..7129d713 100644 --- a/src/ltbio/biosignals/modalities/PPG.py +++ b/src/ltbio/biosignals/modalities/PPG.py @@ -13,6 +13,10 @@ # Last Updated: 09/07/2022 # =================================== +from datetime import timedelta + +import numpy as np +from scipy.signal import welch from ltbio.biosignals.modalities.Biosignal import Biosignal, DerivedBiosignal from ltbio.biosignals.timeseries.Unit import Second @@ -28,6 +32,34 @@ def __init__(self, timeseries, source=None, patient=None, acquisition_location=N def plot_summary(self, show: bool = True, save_to: str = None): pass + def acceptable_quality(self): # -> Timeline + """ + Suggested for wearable wrist PPG by: + - Glasstetter et al. MDPI Sensors, 21, 2021 + - Böttcher et al. Scientific Reports, 2022 + """ + + sfreq = self.sampling_frequency + nperseg = int(4 * self.sampling_frequency) # 4 s window + fmin = 0.1 # Hz + fmax = 5 # Hz + + def spectral_entropy(x, sfreq, nperseg, fmin, fmax): + if len(x) < nperseg: # if segment smaller than 4s + nperseg = len(x) + noverlap = int(0.9375 * nperseg) # if nperseg = 4s, then 3.75 s of overlap + f, psd = welch(x, sfreq, nperseg=nperseg, noverlap=noverlap) + idx_min = np.argmin(np.abs(f - fmin)) + idx_max = np.argmin(np.abs(f - fmax)) + psd = psd[idx_min:idx_max] + psd /= np.sum(psd) # normalize the PSD + entropy = -np.sum(psd * np.log2(psd)) + N = idx_max - idx_min + entropy_norm = entropy / np.log2(N) + return entropy_norm + + return self.when(lambda x: spectral_entropy(x, sfreq, nperseg, fmin, fmax) < 0.8, window=timedelta(seconds=4)) + class IBI(DerivedBiosignal): From 0320e2ae8800007af4a3171deb9c0885a097662e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 13:57:23 +0000 Subject: [PATCH 06/47] Add some quality indexes for TEMP --- src/ltbio/biosignals/modalities/TEMP.py | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/ltbio/biosignals/modalities/TEMP.py b/src/ltbio/biosignals/modalities/TEMP.py index 3652a379..838c14c4 100644 --- a/src/ltbio/biosignals/modalities/TEMP.py +++ b/src/ltbio/biosignals/modalities/TEMP.py @@ -13,6 +13,10 @@ # Last Updated: 09/07/2022 # =================================== +from datetime import timedelta + +from datetimerange import DateTimeRange +from numpy import mean from ltbio.biosignals.modalities.Biosignal import Biosignal from ltbio.biosignals.timeseries.Unit import DegreeCelsius, Multiplier @@ -27,3 +31,31 @@ def __init__(self, timeseries, source=None, patient=None, acquisition_location=N def plot_summary(self, show: bool = True, save_to: str = None): pass + + @property + def preview(self): + """Returns 2 minutes of the middle of the signal.""" + domain = self.domain + middle_of_domain: DateTimeRange = domain[len(domain) // 2] + middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) + try: + return self[middle - timedelta(seconds=2): middle + timedelta(minutes=2)] + except IndexError: + raise AssertionError( + f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") + + @staticmethod + def racSQI(samples): + """ + Rate of Amplitude change (RAC) + It is recomended to be analysed in windows of 2 seconds. + """ + max_, min_ = max(samples), min(samples) + amplitude = max_ - min_ + return abs(amplitude / max_) + + def acceptable_quality(self): # -> Timeline + """ + Suggested by Böttcher et al. Scientific Reports, 2022, for wearable wrist TEMP. + """ + return self.when(lambda x: 25 < mean(x) < 40 and TEMP.racSQI(x) < 0.2, window=timedelta(seconds=2)) From 6b7dc0c843bf3f83c9f56c8582621a144cf7bcf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 13:58:04 +0000 Subject: [PATCH 07/47] Add Timeline --- src/ltbio/biosignals/timeseries/Timeline.py | 175 ++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 src/ltbio/biosignals/timeseries/Timeline.py diff --git a/src/ltbio/biosignals/timeseries/Timeline.py b/src/ltbio/biosignals/timeseries/Timeline.py new file mode 100644 index 00000000..41657518 --- /dev/null +++ b/src/ltbio/biosignals/timeseries/Timeline.py @@ -0,0 +1,175 @@ +# -- encoding: utf-8 -- + +# =================================== + +# IT - LongTermBiosignals + +# Package: src/ltbio/biosignals/timeseries +# Module: Timeline +# Description: + +# Contributors: João Saraiva +# Created: 08/02/2023 + +# =================================== +from datetime import datetime, timedelta +from typing import Sequence + +import matplotlib.pyplot as plt +from datetimerange import DateTimeRange +from matplotlib import cm +from matplotlib.dates import date2num +from matplotlib.lines import Line2D +from matplotlib.patches import Rectangle + + +class Timeline(): + + class Group(): + + def __init__(self, intervals: Sequence[DateTimeRange] = [], points: Sequence[datetime] = [], name: str = None, color_hex: str = None): + self.intervals = list(intervals) + self.points = list(points) + self.name = name + self.color_hex = color_hex + + def __repr__(self): + res = '' + if len(self.intervals) > 1: + res += ' U '.join(['[' + str(interval) + '[' for interval in self.intervals]) + if len(self.points) > 1: + res += '\nand the following tiempoints:\n' + res += ', '.join(['[' + str(point) + '[' for point in self.points]) + return res + + @property + def initial_datetime(self) -> datetime: + return min([interval.start_datetime for interval in self.intervals] + self.points) + + @property + def final_datetime(self) -> datetime: + return max([interval.end_datetime for interval in self.intervals] + self.points) + + @property + def duration(self) -> timedelta: + return sum([interval.timedelta for interval in self.intervals], timedelta()) + + @property + def has_only_intervals(self) -> bool: + return len(self.intervals) > 0 and len(self.points) == 0 + + @property + def has_only_points(self) -> bool: + return len(self.intervals) == 0 and len(self.points) > 0 + + def _as_index(self) -> tuple: + if self.has_only_intervals: + return tuple([slice(interval.start_datetime, interval.end_datetime) for interval in self.intervals]) + if self.has_only_points: + return tuple(self.points) + return None + + def __init__(self, *groups: Group, name: str = None): + self.groups = list(groups) + self.__name = name + + @property + def name(self): + return self.__name if self.__name else "No Name" + + @name.setter + def name(self, name: str): + self.__name = name + + def __repr__(self): + if len(self.groups) == 1: + return repr(self.groups[0]) + else: + res = '' + for g in self.groups: + res += f'\nGroup {g}\n' + res += repr(g) + return res + + def __and__(self, other): + if isinstance(other, Timeline): + groups = [] + groups += self.groups + groups += other.groups + group_names = [g.name for g in groups] + if len(set(group_names)) != len(group_names): + raise NameError('Cannot join Timelines with groups with the same names.') + return Timeline(*groups, name = self.name + " and " + other.name) + + @property + def initial_datetime(self) -> datetime: + return min([g.initial_datetime for g in self.groups]) + + @property + def final_datetime(self) -> datetime: + return max([g.final_datetime for g in self.groups]) + + @property + def duration(self) -> timedelta: + if len(self.groups) == 1: + return self.groups[0].duration + else: + return NotImplementedError() + + @property + def is_index(self) -> bool: + """ + Returns whether or not this can serve as an index to a Biosignal. + A Timeline can be an index when: + - It only contains one interval or a union of intervals (serves as a subdomain) + - It only contains one point or a set of points (serves as set of objects) + """ + return len(self.groups) == 1 and (self.groups[0].has_only_intervals ^ self.groups[0].has_only_points) + + def _as_index(self) -> tuple | None: + if self.is_index: + return self.groups[0]._as_index() + + def plot(self, show:bool=True, save_to:str=None): + fig = plt.figure(figsize=(len(self.groups)*10, len(self.groups)*2)) + ax = plt.gca() + legend_elements = [] + + cmap = cm.get_cmap('tab20b') + for y, g in enumerate(self.groups): + color = g.color_hex + if color is None: + color = cmap(y/len(self.groups)) + + for interval in g.intervals: + start = date2num(interval.start_datetime) + end = date2num(interval.end_datetime) + rect = Rectangle((start, y + 0.4), end - start, 0.4, facecolor=color, alpha=0.5) + ax.add_patch(rect) + + for point in g.points: + ax.scatter(date2num(point), y + 0.95, color=color, alpha=0.5, marker = 'o', markersize=10) + + if len(self.groups) > 1: + legend_elements.append(Line2D([0], [0], marker='o', color=color, label=g.name, markerfacecolor='g', markersize=10)) + + ax.set_xlim(date2num(self.initial_datetime), date2num(self.final_datetime)) + ax.set_ylim(0, len(self.groups)) + ax.get_yaxis().set_visible(False) + for pos in ['right', 'top', 'left']: + plt.gca().spines[pos].set_visible(False) + ax.xaxis_date() + fig.autofmt_xdate() + + if len(self.groups) > 1: + ax.legend(handles=legend_elements, loc='center') + + if self.name: + fig.suptitle(self.name, fontsize=11) + fig.tight_layout() + if save_to is not None: + fig.savefig(save_to) + plt.show() if show else plt.close() + + def _repr_png_(self): + self.plot() From a29c76bdeaf7ae71b26dd4c6995934fbd19208df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:41:12 +0000 Subject: [PATCH 08/47] Remove segment copies on making new Segment and Timeseries --- src/ltbio/biosignals/timeseries/Timeseries.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ltbio/biosignals/timeseries/Timeseries.py b/src/ltbio/biosignals/timeseries/Timeseries.py index 16e71d47..0aa2b057 100644 --- a/src/ltbio/biosignals/timeseries/Timeseries.py +++ b/src/ltbio/biosignals/timeseries/Timeseries.py @@ -442,11 +442,11 @@ def _new(self, samples: array = None, initial_datetime: datetime = None, samplin :return: A new Segment with the given fields changed. All other contents shall remain the same. :rtype: Segment """ - samples = self.__samples.copy() if samples is None else samples # copy + samples = self.__samples if samples is None else samples initial_datetime = self.__initial_datetime if initial_datetime is None else initial_datetime - sampling_frequency = self.__sampling_frequency.__copy__() if sampling_frequency is None else sampling_frequency + sampling_frequency = self.__sampling_frequency if sampling_frequency is None else sampling_frequency is_filtered = self.__is_filtered if is_filtered is None else is_filtered - raw_samples = self.__raw_samples if raw_samples is None else raw_samples # no copy + raw_samples = self.__raw_samples if raw_samples is None else raw_samples new = type(self)(samples, initial_datetime, sampling_frequency, is_filtered) new._Segment__raw_samples = raw_samples @@ -1202,8 +1202,7 @@ def __new(self, segments: List[__Segment] = None, sampling_frequency: float = No """ initial_datetime = self.initial_datetime if segments is None else segments[0].initial_datetime - segments = [seg.__copy__() for seg in - self.__segments] if segments is None else segments # Uses shortcut in __init__ + segments = self.__segments if segments is None else segments # Uses shortcut in __init__ sampling_frequency = self.__sampling_frequency if sampling_frequency is None else sampling_frequency if isinstance( sampling_frequency, Frequency) else Frequency(sampling_frequency) From e827943ef882bbc8953062881e362100395167af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:41:45 +0000 Subject: [PATCH 09/47] Fix tests regarding previous commit 1a58476a --- src/ltbio/pipeline/PipelineUnit.py | 2 +- tests/ml/datasets/test_DatasetAugmentationTechnique.py | 2 +- tests/ml/datasets/test_SegmentToSegmentDataset.py | 2 +- tests/processing/test_Segmenter.py | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ltbio/pipeline/PipelineUnit.py b/src/ltbio/pipeline/PipelineUnit.py index fc68b1e3..281c7c2c 100644 --- a/src/ltbio/pipeline/PipelineUnit.py +++ b/src/ltbio/pipeline/PipelineUnit.py @@ -434,7 +434,7 @@ def _PipelineUnitsUnion__delegate(self, separate_inputs: Iterable) -> list: return separate_outputs def _PipelineUnitsUnion__pack(self, previous_packet: Packet, current_output) -> Packet: - if Packet.TIMESERIES_LABEL in current_output and len(self.__original_ts_labels) == len(current_output[Packet.TIMESERIES_LABEL]): + if isinstance(current_output, dict) and Packet.TIMESERIES_LABEL in current_output and len(self.__original_ts_labels) == len(current_output[Packet.TIMESERIES_LABEL]): return PipelineUnit._pack_with_original_ts_labels(previous_packet, current_output, self.current_unit, self.__original_ts_labels) else: return PipelineUnit._pack_separate_outputs(previous_packet, current_output, self.current_unit, self.__original_ts_labels) diff --git a/tests/ml/datasets/test_DatasetAugmentationTechnique.py b/tests/ml/datasets/test_DatasetAugmentationTechnique.py index 07fd7bdc..c66e7016 100644 --- a/tests/ml/datasets/test_DatasetAugmentationTechnique.py +++ b/tests/ml/datasets/test_DatasetAugmentationTechnique.py @@ -28,7 +28,7 @@ def setUpClass(cls): for name, channel in cls.eda._Biosignal__timeseries.items(): cls.eda._Biosignal__timeseries[name] = segmenter.apply(channel) - cls.dataset_length = 20 * 60 / 2 - 1 # = 600 - 1 = 599 segments + cls.dataset_length = 20 * 60 / 2 # = 600 segments cls.object_segment_length = cls.temp.sampling_frequency * 2 cls.target_segment_length = cls.acc.sampling_frequency * 2 diff --git a/tests/ml/datasets/test_SegmentToSegmentDataset.py b/tests/ml/datasets/test_SegmentToSegmentDataset.py index a7fd313f..5d65c892 100644 --- a/tests/ml/datasets/test_SegmentToSegmentDataset.py +++ b/tests/ml/datasets/test_SegmentToSegmentDataset.py @@ -36,7 +36,7 @@ def setUpClass(cls): for name, channel in cls.eda._Biosignal__timeseries.items(): cls.eda._Biosignal__timeseries[name] = segmenter.apply(channel) - cls.dataset_length = 20 * 60 / 2 - 1 # = 600 - 1 = 599 segments + cls.dataset_length = 20 * 60 / 2 # = 600 segments cls.object_segment_length = cls.temp.sampling_frequency * 2 cls.target_segment_length = cls.acc.sampling_frequency * 2 diff --git a/tests/processing/test_Segmenter.py b/tests/processing/test_Segmenter.py index 7b28b32f..e6df42ea 100644 --- a/tests/processing/test_Segmenter.py +++ b/tests/processing/test_Segmenter.py @@ -37,8 +37,8 @@ def test_segment_without_overlap(self): x_segmented = segmenter.apply(self.x) y_segmented = segmenter.apply(self.y) - self.assertEqual(len(x_segmented), 649998) - self.assertEqual(len(y_segmented), 649998) + self.assertEqual(len(x_segmented), 650000) + self.assertEqual(len(y_segmented), 650000) segment_length = int(0.01*self.sf) # each segment should have 3 samples @@ -58,8 +58,8 @@ def test_segment_with_overlap(self): self.assertTrue(isinstance(x_segmented, OverlappingTimeseries)) self.assertTrue(isinstance(y_segmented, OverlappingTimeseries)) - self.assertEqual(len(x_segmented), 974997) - self.assertEqual(len(y_segmented), 974997) + self.assertEqual(len(x_segmented), 974999) + self.assertEqual(len(y_segmented), 974999) segment_length = int(0.01*self.sf) # each segment should have 3 samples n_samples_overlap = int(0.003*self.sf) # 1 sample of overlap From 1eb77cd666ffa02667865946ab04ef4dfe9ae800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:42:30 +0000 Subject: [PATCH 10/47] Fix visibility of 'aux_date' from E4 --- src/ltbio/biosignals/sources/E4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ltbio/biosignals/sources/E4.py b/src/ltbio/biosignals/sources/E4.py index 50163d5f..44164a39 100644 --- a/src/ltbio/biosignals/sources/E4.py +++ b/src/ltbio/biosignals/sources/E4.py @@ -39,7 +39,7 @@ def __repr__(self): return "Empatica E4 - Epilepsy Wristband" @staticmethod - def __aux_date(date): + def _aux_date(date): """ Receives a string that contains a unix timestamp in UTC Returns a datetime after convertion """ From 9a75e1228ab4bafa7d561e3719e2a71355e155ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:45:30 +0000 Subject: [PATCH 11/47] Add Biosignal.when and shortcuts for binary logical operations --- src/ltbio/biosignals/modalities/Biosignal.py | 61 +++++++++- src/ltbio/biosignals/timeseries/Timeseries.py | 113 ++++++++++++++++-- 2 files changed, 164 insertions(+), 10 deletions(-) diff --git a/src/ltbio/biosignals/modalities/Biosignal.py b/src/ltbio/biosignals/modalities/Biosignal.py index 621e5e93..262b3d21 100644 --- a/src/ltbio/biosignals/modalities/Biosignal.py +++ b/src/ltbio/biosignals/modalities/Biosignal.py @@ -21,7 +21,7 @@ from math import ceil from shutil import rmtree from tempfile import mkdtemp -from typing import Dict, Tuple, Collection, Set, ClassVar +from typing import Dict, Tuple, Collection, Set, ClassVar, Callable import matplotlib.pyplot as plt import numpy as np @@ -38,6 +38,7 @@ from ltbio.clinical.conditions.MedicalCondition import MedicalCondition from ltbio.processing.noises.Noise import Noise from .. import timeseries +from ..timeseries.Timeline import Timeline class Biosignal(ABC): @@ -119,7 +120,7 @@ def __copy__(self): return type(self)({ts: self.__timeseries[ts].__copy__() for ts in self.__timeseries}, self.__source, self.__patient, self.__acquisition_location, str(self.__name)) def _new(self, timeseries: Dict[str|BodyLocation, timeseries.Timeseries] | str | Tuple[datetime] = None, source:BiosignalSource.__subclasses__()=None, patient:Patient=None, acquisition_location:BodyLocation=None, name:str=None, events:Collection[Event]=None, added_noise=None): - timeseries = {ts: self.__timeseries[ts].__copy__() for ts in self.__timeseries} if timeseries is None else timeseries # copy + timeseries = {ts: self.__timeseries[ts] for ts in self.__timeseries} if timeseries is None else timeseries # copy source = self.__source if source is None else source # no copy patient = self.__patient if patient is None else patient # no copy acquisition_location = self.__acquisition_location if acquisition_location is None else acquisition_location # no copy @@ -194,6 +195,10 @@ def preview(self): except IndexError: raise AssertionError(f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") + def when(self, condition: Callable, window: timedelta = None): + return Timeline(*[Timeline.Group(channel._when(condition, window), name=channel_name) for channel_name, channel in self], + name=self.name + " when '" + condition.__name__ + "' is True" + f" (in windows of {window})" if window else "") + def __getitem__(self, item): '''The built-in slicing and indexing operations.''' @@ -746,6 +751,58 @@ def __rshift__(self, other): return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, name=name, events=events) + # =================================== + # Binary Logic using Time and Conditions + + def __lt__(self, other): + if isinstance(other, Biosignal): + return self.final_datetime < other.initial_datetime + else: + res = self.when(lambda x: x < other) + res.name(self.name + ' < ' + str(other)) + return res + + def __le__(self, other): + if isinstance(other, Biosignal): + return self.final_datetime <= other.initial_datetime + else: + res = self.when(lambda x: x <= other) + res.name(self.name + ' >= ' + str(other)) + return res + + def __gt__(self, other): + if isinstance(other, Biosignal): + return self.initial_datetime > other.final_datetime + else: + res = self.when(lambda x: x > other) + res.name(self.name + ' >= ' + str(other)) + return res + + def __ge__(self, other): + if isinstance(other, Biosignal): + return self.initial_datetime >= other.final_datetime + else: + res = self.when(lambda x: x >= other) + res.name(self.name + ' >= ' + str(other)) + return res + + def __eq__(self, other): + if isinstance(other, Biosignal): + return self.initial_datetime == other.initial_datetime and self.final_datetime == other.final_datetime + else: + res = self.when(lambda x: x == other) + res.name(self.name + ' >= ' + str(other)) + return res + + def __ne__(self, other): + if isinstance(other, Biosignal): + return not self.__eq__(other) + else: + res = self.when(lambda x: x != other) + res.name(self.name + ' >= ' + str(other)) + return res + + ######## Events def set_channel_name(self, current:str|BodyLocation, new:str|BodyLocation): if current in self.__timeseries.keys(): diff --git a/src/ltbio/biosignals/timeseries/Timeseries.py b/src/ltbio/biosignals/timeseries/Timeseries.py index 0aa2b057..75c74be6 100644 --- a/src/ltbio/biosignals/timeseries/Timeseries.py +++ b/src/ltbio/biosignals/timeseries/Timeseries.py @@ -19,7 +19,7 @@ from math import ceil from os.path import join from tempfile import mkstemp -from typing import List, Iterable, Collection, Dict, Tuple, Callable +from typing import List, Iterable, Collection, Dict, Tuple, Callable, Sequence import matplotlib.pyplot as plt import numpy as np @@ -329,28 +329,46 @@ def min(self): return np.min(self.__samples) # =================================== - # Binary Logic using Time + # Binary Logic using Time and Conditions def __lt__(self, other): """A Segment comes before other Segment if its end is less than the other's start.""" - return self.final_datetime < other.initial_datetime + if isinstance(other, Timeseries._Timeseries__Segment): + return self.final_datetime < other.initial_datetime + else: + return tuple(self.__when(self.__samples < other)) def __le__(self, other): - return self.final_datetime <= other.initial_datetime + if isinstance(other, Timeseries._Timeseries__Segment): + return self.final_datetime <= other.initial_datetime + else: + return tuple(self.__when(self.__samples <= other)) def __gt__(self, other): """A Segment comes after other Segment if its start is greater than the other's end.""" - return self.initial_datetime > other.final_datetime + if isinstance(other, Timeseries._Timeseries__Segment): + return self.initial_datetime > other.final_datetime + else: + return tuple(self.__when(self.__samples > other)) def __ge__(self, other): - return self.initial_datetime >= other.final_datetime + if isinstance(other, Timeseries._Timeseries__Segment): + return self.initial_datetime >= other.final_datetime + else: + return tuple(self.__when(self.__samples >= other)) def __eq__(self, other): """A Segment corresponds to the same time period than other Segment if their start and end are equal.""" - return self.initial_datetime == other.initial_datetime and self.final_datetime == other.final_datetime + if isinstance(other, Timeseries._Timeseries__Segment): + return self.initial_datetime == other.initial_datetime and self.final_datetime == other.final_datetime + else: + return tuple(self.__when(self.__samples == other)) def __ne__(self, other): - return not self.__eq__(other) + if isinstance(other, Timeseries._Timeseries__Segment): + return not self.__eq__(other) + else: + return tuple(self.__when(self.__samples != other)) def overlaps(self, other): """A Segment overlaps other Segment if its end comes after the other's start, or its start comes before the others' end, or vice versa.""" @@ -363,6 +381,38 @@ def adjacent(self, other): """Returns True if the Segments' start or end touch.""" return self.final_datetime == other.initial_datetime or self.initial_datetime == other.final_datetime + def __when(self, condition): + intervals = [] + true_interval = False + start, end = None, None + + for i, x in enumerate(condition): + if x: + if not true_interval: # not open + true_interval = True # then open + start = i + else: + if true_interval: # is open + true_interval = False + end = i + intervals.append((start, end)) # close interval + + if true_interval: # is open + intervals.append((start, i+1)) # then close + + return intervals + + def _when(self, condition, window_length: int = 1): + assert window_length > 0 + if window_length == 1: + evaluated = [condition(x) for x in self.__samples] + else: + evaluated = [] + for i in range(0, len(self.__samples), window_length): + x = self.__samples[i: i+window_length] + evaluated += [condition(x), ] * len(x) + return self.__when(evaluated) + # =================================== # INTERNAL USAGE - Accept Methods @@ -849,6 +899,53 @@ def __rshift__(self, other): raise TypeError("Trying to concatenate an object of type {}. Expected type: Timeseries.".format(type(other))) + # =================================== + # Binary Logic using Time and Conditions + + def __lt__(self, other): + if isinstance(other, Timeseries): + return self.final_datetime < other.initial_datetime + else: + return self._indices_to_timepoints([seg < other for seg in self.__segments]) + + def __le__(self, other): + if isinstance(other, Timeseries): + return self.final_datetime <= other.initial_datetime + else: + return self._indices_to_timepoints(np.concatenate([seg <= other for seg in self.__segments]), by_segment=False) + + def __gt__(self, other): + if isinstance(other, Timeseries): + return self.initial_datetime > other.final_datetime + else: + return self._indices_to_timepoints([seg > other for seg in self.__segments]) + + def __ge__(self, other): + if isinstance(other, Timeseries): + return self.initial_datetime >= other.final_datetime + else: + return self._indices_to_timepoints(np.concatenate([seg >= other for seg in self.__segments]), by_segment=False) + + def __eq__(self, other): + if isinstance(other, Timeseries): + return self.initial_datetime == other.initial_datetime and self.final_datetime == other.final_datetime + else: + return self._indices_to_timepoints(np.concatenate([seg == other for seg in self.__segments]), by_segment=False) + + def __ne__(self, other): + if isinstance(other, Timeseries): + return not self.__eq__(other) + else: + return self._indices_to_timepoints(np.concatenate([seg != other for seg in self.__segments]), by_segment=False) + + def _when(self, condition, window: timedelta): + if window is not None: + window_length = int(window.total_seconds() * self.__sampling_frequency) + x = [seg._when(condition, window_length) for seg in self.__segments] + else: + x = [seg._when(condition) for seg in self.__segments] + return self._indices_to_timepoints(x, by_segment=False) + # =================================== # Methods From 983e2ddd21385556a981be17528d7b7818354e1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:46:07 +0000 Subject: [PATCH 12/47] Add Biosignal indexing with Timeline --- src/ltbio/biosignals/modalities/Biosignal.py | 39 ++++++++++++-------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/ltbio/biosignals/modalities/Biosignal.py b/src/ltbio/biosignals/modalities/Biosignal.py index 262b3d21..7b91fea7 100644 --- a/src/ltbio/biosignals/modalities/Biosignal.py +++ b/src/ltbio/biosignals/modalities/Biosignal.py @@ -351,23 +351,10 @@ def __get_events_with_padding(event_name, padding_before=timedelta(seconds=0), p return new - if isinstance(item, tuple): - if len(self) == 1: - res = list() - for k in item: - if isinstance(k, datetime): - res.append(self.__timeseries[k]) - if isinstance(k, str): - try: - res.append(self.__timeseries[to_datetime(k)]) - except ParserError: - raise IndexError("String datetimes must be in a correct format.") - else: - raise IndexError("Index types not supported. Give a tuple of datetimes (can be in string format).") - return tuple(res) - else: + # Structure-related: Channels + if all(isinstance(k, (str, BodyLocation)) and k in self.channel_names for k in item): ts = {} events = set() for k in item: @@ -382,6 +369,28 @@ def __get_events_with_padding(event_name, padding_before=timedelta(seconds=0), p new = self._new(timeseries=ts, events=events) return new + # Time-related: Slices, Datetimes, Events, ... + else: + item = sorted(item) + res = None + for k in item: + if res is None: + res = self[item[0]] + else: + print(k) + res = res >> self[k] + + res.name = self.name + return res + + if isinstance(item, Timeline): + if item.is_index: + res = self[item._as_index()] + res.name += f" indexed by '{item.name}'" + return res + else: + return IndexError("This Timeline cannot serve as index, because it contains multiple groups of intervals or points.") + raise IndexError("Index types not supported. Give a datetime (can be in string format), a slice or a tuple of those.") From 37651ff0768fd6e39157c901fecc37e26f2a8158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:46:44 +0000 Subject: [PATCH 13/47] Fix Biosignal indexing with tuple --- src/ltbio/biosignals/modalities/Biosignal.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/ltbio/biosignals/modalities/Biosignal.py b/src/ltbio/biosignals/modalities/Biosignal.py index 7b91fea7..e1d748a3 100644 --- a/src/ltbio/biosignals/modalities/Biosignal.py +++ b/src/ltbio/biosignals/modalities/Biosignal.py @@ -358,12 +358,6 @@ def __get_events_with_padding(event_name, padding_before=timedelta(seconds=0), p ts = {} events = set() for k in item: - if isinstance(k, datetime): - raise IndexError("This Biosignal has multiple channels. Index the channel before indexing the datetimes.") - if isinstance(k, str) and (k not in self.channel_names): - raise IndexError("'{}' is not a channel of this Biosignal.".format(k)) - if not isinstance(k, str): - raise IndexError("Index types not supported. Give a tuple of channel names (in str).") ts[k] = self.__timeseries[k] events.update(set(self.__timeseries[k].events)) new = self._new(timeseries=ts, events=events) From 34682d18f29a9ff787c935ce81f9a724048136dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:47:37 +0000 Subject: [PATCH 14/47] Fix Biosignal.doamin_timeline (temporarly) This feature is to be refactored to Biosignal.domain --- src/ltbio/biosignals/modalities/Biosignal.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ltbio/biosignals/modalities/Biosignal.py b/src/ltbio/biosignals/modalities/Biosignal.py index e1d748a3..257a03d9 100644 --- a/src/ltbio/biosignals/modalities/Biosignal.py +++ b/src/ltbio/biosignals/modalities/Biosignal.py @@ -451,6 +451,10 @@ def domain(self) -> Tuple[DateTimeRange]: cumulative_intersection = channels[k].overlap(cumulative_intersection) return cumulative_intersection + @property + def domain_timeline(self) -> Timeline: # TODO: mmerge with domain + return Timeline(Timeline.Group(self.domain), name=self.name + ' Domain') + @property def subdomains(self) -> Tuple[DateTimeRange]: if len(self) == 1: From ca2ec85eaa980eaa46ad336fd3cc097f27629fe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:48:23 +0000 Subject: [PATCH 15/47] Fix 'source' in binary Biosignal operations --- src/ltbio/biosignals/modalities/Biosignal.py | 28 ++++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/ltbio/biosignals/modalities/Biosignal.py b/src/ltbio/biosignals/modalities/Biosignal.py index 257a03d9..1e948df3 100644 --- a/src/ltbio/biosignals/modalities/Biosignal.py +++ b/src/ltbio/biosignals/modalities/Biosignal.py @@ -700,10 +700,16 @@ def __and__(self, other): name = f"{self.name} and {other.name}" acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None patient = self.__patient if self.patient_code == other.patient_code else None - source = type(self.source) if ((isinstance(self.source, ABCMeta) and isinstance(other.source, ABCMeta) - and self.source == other.source) or - (type(self.source) == type(other.source)) - ) else None + if isclass(self.source) and isclass(other.source): # Un-instatiated sources + if self.source == other.source: + source = self.__source + else: + source = None + else: + if type(self.source) == type(other.source) and self.source == other.source: + source = self.__source + else: + source = None # Join channels res_timeseries = {} @@ -742,10 +748,16 @@ def __rshift__(self, other): name = f"{self.name} >> {other.name}" acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None patient = self.__patient if self.patient_code == other.patient_code else None - source = type(self.source) if ((isinstance(self.source, ABCMeta) and isinstance(other.source, ABCMeta) - and self.source == other.source) or - (type(self.source) == type(other.source)) - ) else None + if isclass(self.source) and isclass(other.source): # Un-instatiated sources + if self.source == other.source: + source = self.__source + else: + source = None + else: + if type(self.source) == type(other.source) and self.source == other.source: + source = self.__source + else: + source = None # Perform concatenation res_timeseries = {} From 21e8784bfaeb4a99f1eb2a1bbea82272d2ffcafe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:49:38 +0000 Subject: [PATCH 16/47] Add Biosignal.acquisition_scores and onbody score example for E4 --- src/ltbio/biosignals/modalities/Biosignal.py | 28 ++++++++++++++ src/ltbio/biosignals/sources/E4.py | 39 ++++++++++++++++++-- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/ltbio/biosignals/modalities/Biosignal.py b/src/ltbio/biosignals/modalities/Biosignal.py index 1e948df3..4d9bdd19 100644 --- a/src/ltbio/biosignals/modalities/Biosignal.py +++ b/src/ltbio/biosignals/modalities/Biosignal.py @@ -1228,6 +1228,34 @@ def fromNoise(cls, return cls(channels, name=name) + def acquisition_scores(self): + print(f"Acquisition scores for '{self.name}'") + completness_score = self.completeness_score() + print("Completness Score = " + ("%.2f" % (completness_score*100) if completness_score else "n.d.") + "%") + onbody_score = self.onbody_score() + print("On-body Score = " + ("%.2f" % (onbody_score*100) if onbody_score else "n.d.") + "%") + quality_score = self.quality_score(_onbody_duration=onbody_score*self.duration) + print("Quality Score = " + ("%.2f" % (quality_score*100) if quality_score else "n.d.") + "%") + + def completeness_score(self): + recorded_duration = self.duration + expected_duration = self.final_datetime - self.initial_datetime + return recorded_duration / expected_duration + + def onbody_score(self): + if hasattr(self.source, 'onbody'): # if the BiosignalSource defines an 'onbody' method, then this score exists, it's computed and returned + x = self.source.onbody(self) + if x: + return self.source.onbody(self).duration / self.duration + + def quality_score(self, _onbody_duration=None): + if _onbody_duration: + if hasattr(self, 'acceptable_quality'): # if the Biosignal modality defines an 'acceptable_quality' method, then this score exists, it's computed and returned + return self.acceptable_quality().duration / _onbody_duration + else: + if hasattr(self, 'acceptable_quality') and hasattr(self.source, 'onbody'): + return self.acceptable_quality().duration / self.source.onbody(self).duration + # =================================== # SERIALIZATION diff --git a/src/ltbio/biosignals/sources/E4.py b/src/ltbio/biosignals/sources/E4.py index 44164a39..140ec2bf 100644 --- a/src/ltbio/biosignals/sources/E4.py +++ b/src/ltbio/biosignals/sources/E4.py @@ -17,10 +17,11 @@ import csv from ast import literal_eval -from datetime import datetime +from datetime import datetime, timedelta from os import listdir, path, sep from os.path import isdir +import numpy as np from numpy import vstack from .. import timeseries @@ -85,7 +86,7 @@ def __read_file(file_path): channel_labels = (channel_labels, ) if len(a[0]) == 1 else ('x', 'y', 'z') # First row is the initial datetime - datetime = E4.__aux_date(a[0][0]) + datetime = E4._aux_date(a[0][0]) # Second row is sampling frequency sampling_frequency = float(a[1][0]) @@ -177,7 +178,7 @@ def _events(dir:str, file_key='tag'): # Events are named numerically for i in range(len(a)): n_events += 1 - res.append(timeseries.Event('event' + str(n_events), E4.__aux_date(a[i][0]))) + res.append(timeseries.Event('event' + str(n_events), E4._aux_date(a[i][0]))) return res @staticmethod @@ -191,3 +192,35 @@ def _write(path:str, timeseries: dict): @staticmethod def _transfer(samples, to_unit): pass + + @staticmethod + def onbody(biosignal): + + window = timedelta(minutes=1) + + def condition_is_met_99_percent(x, condition): + count = np.count_nonzero(condition) + return count / len(x) >= 0.99 + + if type(biosignal) is modalities.ACC: + biosignal = biosignal['x'] + biosignal['y'] + biosignal['z'] # sum sample-by-sample the 3 axes + + def moving_std(x): + window_size = 10 * biosignal.sampling_frequency # 10 s moving standard deviation + cumsum = np.cumsum(x, dtype=float) + cumsum[window_size:] = cumsum[window_size:] - cumsum[:-window_size] + moving_averages = cumsum[window_size - 1:] / window_size + moving_sq_averages = np.cumsum(x ** 2, dtype=float) + moving_sq_averages[window_size:] = moving_sq_averages[window_size:] - moving_sq_averages[:-window_size] + moving_sq_averages = moving_sq_averages[window_size - 1:] / window_size + return np.sqrt(moving_sq_averages - moving_averages ** 2) + + return biosignal.when(lambda x: condition_is_met_99_percent(x, moving_std(x) > 0.2), window=window) + + if type(biosignal) is modalities.EDA: + return biosignal.when(lambda x: condition_is_met_99_percent(x, x > 0.05), window=window) + + if type(biosignal) is modalities.TEMP: + return biosignal.when(lambda x: condition_is_met_99_percent(x, 25 < x < 40), window=window) + + return None \ No newline at end of file From d7f668e162b3ab3a0e5218f4483904ba278c43fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:50:36 +0000 Subject: [PATCH 17/47] Add DateTimeRange operability to Timeseries._indices_to_timepoints --- src/ltbio/biosignals/timeseries/Timeseries.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/ltbio/biosignals/timeseries/Timeseries.py b/src/ltbio/biosignals/timeseries/Timeseries.py index 75c74be6..2dd8dfd4 100644 --- a/src/ltbio/biosignals/timeseries/Timeseries.py +++ b/src/ltbio/biosignals/timeseries/Timeseries.py @@ -1127,11 +1127,14 @@ def __check_boundaries(self, datetime_or_range: datetime | DateTimeRange) -> Non raise IndexError( f"Interval given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") - def _indices_to_timepoints(self, indices: list[list[int]], by_segment=False) -> tuple[datetime] | tuple[list[datetime]]: + def _indices_to_timepoints(self, indices: Sequence[Sequence[int]] | Sequence[Sequence[Sequence[int]]], by_segment=False) -> Sequence[datetime] | Sequence[Sequence[datetime]] | Sequence[DateTimeRange] | Sequence[Sequence[DateTimeRange]]: all_timepoints = [] for index, segment in zip(indices, self.__segments): - timepoints = divide(index, self.__sampling_frequency) # Transform to timepoints - x = [segment.initial_datetime + timedelta(seconds=tp) for tp in timepoints] + timepoints = divide(index, self.__sampling_frequency) # Transform to seconds + if isinstance(timepoints, ndarray) and len(timepoints.shape) == 2 and timepoints.shape[1] == 2: # Intervals + x = [DateTimeRange(segment.initial_datetime + timedelta(seconds=tp[0]), segment.initial_datetime + timedelta(seconds=tp[1])) for tp in timepoints] + else: # Timepoints + x = [segment.initial_datetime + timedelta(seconds=tp) for tp in timepoints] if by_segment: all_timepoints.append(x) # Append as list else: From 84e0166569f8aeb22fe3354972647b4098bb9f95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Sun, 12 Feb 2023 14:51:54 +0000 Subject: [PATCH 18/47] Remove compression on saving Biosignals (v. 2023.0) --- src/ltbio/biosignals/modalities/Biosignal.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/ltbio/biosignals/modalities/Biosignal.py b/src/ltbio/biosignals/modalities/Biosignal.py index 4d9bdd19..0e47ef3b 100644 --- a/src/ltbio/biosignals/modalities/Biosignal.py +++ b/src/ltbio/biosignals/modalities/Biosignal.py @@ -1295,9 +1295,8 @@ def save(self, save_to:str): channel._memory_map(temp_dir) # Write - from bz2 import BZ2File from _pickle import dump - with BZ2File(save_to, 'w') as f: + with open(save_to, 'wb') as f: dump(self, f) # Clean up memory maps @@ -1309,11 +1308,20 @@ def load(cls, filepath:str): if not filepath.endswith(Biosignal.EXTENSION): raise IOError("Only .biosignal files are allowed.") - # Read - from bz2 import BZ2File from _pickle import load - data = BZ2File(filepath, 'rb') - return load(data) + from _pickle import UnpicklingError + + # Read + try: # Versions >= 2023.0: + f = open(filepath, 'rb') + biosignal = load(f) + except UnpicklingError as e: # Versions 2022.0, 2022.1 and 2022.2: + from bz2 import BZ2File + # print("Loading...\nNote: Loading a version older than 2023.0 takes significantly more time. It is suggested you save this Biosignal again, so you can have it in the newest fastest format.") + f = BZ2File(filepath, 'rb') + biosignal = load(f) + f.close() + return biosignal class DerivedBiosignal(Biosignal): From c8ea213e18bdf47bc1812ec2d16c4dfa08789fa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Tue, 7 Mar 2023 12:50:11 +0000 Subject: [PATCH 19/47] Add union and intersection methods to Timeline --- src/ltbio/biosignals/timeseries/Timeline.py | 125 ++++++++++++++++++-- 1 file changed, 118 insertions(+), 7 deletions(-) diff --git a/src/ltbio/biosignals/timeseries/Timeline.py b/src/ltbio/biosignals/timeseries/Timeline.py index 41657518..088ad0cf 100644 --- a/src/ltbio/biosignals/timeseries/Timeline.py +++ b/src/ltbio/biosignals/timeseries/Timeline.py @@ -13,7 +13,8 @@ # =================================== from datetime import datetime, timedelta -from typing import Sequence +from functools import reduce +from typing import Sequence, List import matplotlib.pyplot as plt from datetimerange import DateTimeRange @@ -35,11 +36,17 @@ def __init__(self, intervals: Sequence[DateTimeRange] = [], points: Sequence[dat def __repr__(self): res = '' - if len(self.intervals) > 1: - res += ' U '.join(['[' + str(interval) + '[' for interval in self.intervals]) - if len(self.points) > 1: - res += '\nand the following tiempoints:\n' - res += ', '.join(['[' + str(point) + '[' for point in self.points]) + if 0 < len(self.intervals): + if len(self.intervals) < 10: + res += ' U '.join(['[' + str(interval) + '[' for interval in self.intervals]) + else: + res += f'{len(self.intervals)} intervals with {self.duration} of total duration' + if 0 < len(self.points): + if len(self.points) < 10: + res += '\nand the following timepoints:\n' + res += ', '.join(['[' + str(point) + '[' for point in self.points]) + else: + res += f'\nand {len(self.points)} timepoints.\n' return res @property @@ -64,7 +71,7 @@ def has_only_points(self) -> bool: def _as_index(self) -> tuple: if self.has_only_intervals: - return tuple([slice(interval.start_datetime, interval.end_datetime) for interval in self.intervals]) + return tuple(self.intervals) if self.has_only_points: return tuple(self.points) return None @@ -109,6 +116,14 @@ def initial_datetime(self) -> datetime: def final_datetime(self) -> datetime: return max([g.final_datetime for g in self.groups]) + @property + def has_single_group(self) -> bool: + return len(self.groups) == 1 + + @property + def single_group(self) -> Group: + return self.groups[0] if self.has_single_group else None + @property def duration(self) -> timedelta: if len(self.groups) == 1: @@ -173,3 +188,99 @@ def plot(self, show:bool=True, save_to:str=None): def _repr_png_(self): self.plot() + + @classmethod + def union(cls, *timelines): + # Check input + if not all(isinstance(tl, Timeline) for tl in timelines): + raise TypeError("Give objects Timeline to Timeline.union.") + if len(timelines) < 2: + raise ValueError("Give at least 2 Timelines to compute their union.") + + # Get sets of intervals of each Timeline + tl_intervals = [] + for i, tl in enumerate(timelines): + if tl.has_single_group and tl.single_group.has_only_intervals: + tl_intervals.append(tl.single_group.intervals) + else: + raise AssertionError(f"The {i+1}th Timeline does not have a single group with only intervals.") + + # Binary function + def union_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): + intervals = intervals1 + intervals2 + intervals.sort(key=lambda x: x.start_datetime) + union = [intervals[0]] + for i in range(1, len(intervals)): + if union[-1].end_datetime >= intervals[i].start_datetime: + union[-1].set_end_datetime(max(union[-1].end_datetime, intervals[i].end_datetime)) + else: + union.append(intervals[i]) + return union + + res_intervals = reduce(union_of_two_timelines, tl_intervals) + return Timeline(Timeline.Group(res_intervals), name=f"Union of " + ', '.join(tl.name for tl in timelines)) + + @classmethod + def intersection(cls, *timelines): + # Check input + if not all(isinstance(tl, Timeline) for tl in timelines): + raise TypeError("Give objects Timeline to Timeline.union.") + if len(timelines) < 2: + raise ValueError("Give at least 2 Timelines to compute their union.") + + # Get sets of intervals of each Timeline + tl_intervals = [] + for i, tl in enumerate(timelines): + if tl.has_single_group and tl.single_group.has_only_intervals: + tl_intervals.append(tl.single_group.intervals) + else: + raise AssertionError(f"The {i + 1}th Timeline does not have a single group with only intervals.") + + # Binary function + def intersection_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): + intervals1.sort(key=lambda x: x.start) + intervals2.sort(key=lambda x: x.start) + + intersection = [] + i, j = 0, 0 + while i < len(intervals1) and j < len(intervals2): + if intervals1[i].end_datetime <= intervals2[j].start_datetime: + i += 1 + elif intervals2[j].end_datetime <= intervals1[i].start_datetime: + j += 1 + else: + start = max(intervals1[i].start_datetime, intervals2[j].start_datetime) + end = min(intervals1[i].end_datetime, intervals2[j].end_datetime) + intersection.append(DateTimeRange(start, end)) + if intervals1[i].end_datetime <= intervals2[j].end_datetime: + i += 1 + else: + j += 1 + + return intersection + + res_intervals = reduce(intersection_of_two_timelines, tl_intervals) + return Timeline(Timeline.Group(res_intervals), name=f"Intersection of " + ', '.join(tl.name for tl in timelines)) + + EXTENSION = '.timeline' + + def save(self, save_to: str): + # Check extension + if not save_to.endswith(Timeline.EXTENSION): + save_to += Biosignal.EXTENSION + # Write + from _pickle import dump + with open(save_to, 'wb') as f: + dump(self, f) + + @classmethod + def load(cls, filepath: str): + # Check extension + if not filepath.endswith(Timeline.EXTENSION): + raise IOError("Only .timeline files are allowed.") + + # Read + from _pickle import load + with open(filepath, 'rb') as f: + timeline = load(f) + return timeline From 5467c505f333b6ad00c91b4ff4d8c174a861d0df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Tue, 7 Mar 2023 12:50:55 +0000 Subject: [PATCH 20/47] Fix semantic bug in E4.onbody --- src/ltbio/biosignals/sources/E4.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/ltbio/biosignals/sources/E4.py b/src/ltbio/biosignals/sources/E4.py index 140ec2bf..8c93ad79 100644 --- a/src/ltbio/biosignals/sources/E4.py +++ b/src/ltbio/biosignals/sources/E4.py @@ -198,15 +198,15 @@ def onbody(biosignal): window = timedelta(minutes=1) - def condition_is_met_99_percent(x, condition): + def condition_is_met_1_percent(x, condition): count = np.count_nonzero(condition) - return count / len(x) >= 0.99 + return count / len(x) >= 0.01 if type(biosignal) is modalities.ACC: biosignal = biosignal['x'] + biosignal['y'] + biosignal['z'] # sum sample-by-sample the 3 axes + window_size = int(10 * biosignal.sampling_frequency) # 10 s moving standard deviation def moving_std(x): - window_size = 10 * biosignal.sampling_frequency # 10 s moving standard deviation cumsum = np.cumsum(x, dtype=float) cumsum[window_size:] = cumsum[window_size:] - cumsum[:-window_size] moving_averages = cumsum[window_size - 1:] / window_size @@ -215,12 +215,18 @@ def moving_std(x): moving_sq_averages = moving_sq_averages[window_size - 1:] / window_size return np.sqrt(moving_sq_averages - moving_averages ** 2) - return biosignal.when(lambda x: condition_is_met_99_percent(x, moving_std(x) > 0.2), window=window) + x = biosignal.when(lambda x: condition_is_met_1_percent(x, moving_std(x) > 0.2), window=window) + x.name = biosignal.name + " Onbody Domain" + return x if type(biosignal) is modalities.EDA: - return biosignal.when(lambda x: condition_is_met_99_percent(x, x > 0.05), window=window) + x = biosignal.when(lambda x: condition_is_met_1_percent(x, x > 0.05), window=window) + x.name = biosignal.name + " Onbody Domain" + return x if type(biosignal) is modalities.TEMP: - return biosignal.when(lambda x: condition_is_met_99_percent(x, 25 < x < 40), window=window) + x = biosignal.when(lambda x: condition_is_met_1_percent(x, (x > 25) & (x < 40)), window=window) + x.name = biosignal.name + " Onbody Domain" + return x return None \ No newline at end of file From 5fa6871e5152813c2520927a84eb70b6500d4a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Mon, 27 Mar 2023 11:35:51 +0100 Subject: [PATCH 21/47] Refactoring modules (1) --- src/ltbio/biosignals/__init__.py | 1606 ++++++++++++++++- src/ltbio/biosignals/derived.py | 96 + .../{modalities/ECG.py => modalities.py} | 184 +- src/ltbio/biosignals/modalities/ACC.py | 44 - src/ltbio/biosignals/modalities/Biosignal.py | 1339 -------------- src/ltbio/biosignals/modalities/EDA.py | 61 - src/ltbio/biosignals/modalities/EEG.py | 29 - src/ltbio/biosignals/modalities/EMG.py | 29 - src/ltbio/biosignals/modalities/HR.py | 45 - .../modalities/MultimodalBiosignal.py | 104 -- src/ltbio/biosignals/modalities/PPG.py | 76 - src/ltbio/biosignals/modalities/RESP.py | 29 - src/ltbio/biosignals/modalities/TEMP.py | 61 - src/ltbio/biosignals/modalities/__init__.py | 10 - src/ltbio/biosignals/sources.py | 1311 ++++++++++++++ .../biosignals/sources/BiosignalSource.py | 94 - src/ltbio/biosignals/sources/Bitalino.py | 272 --- src/ltbio/biosignals/sources/E4.py | 232 --- src/ltbio/biosignals/sources/HEM.py | 109 -- src/ltbio/biosignals/sources/HSM.py | 90 - src/ltbio/biosignals/sources/MITDB.py | 134 -- src/ltbio/biosignals/sources/Multisource.py | 45 - src/ltbio/biosignals/sources/Seer.py | 127 -- src/ltbio/biosignals/sources/Sense.py | 388 ---- src/ltbio/biosignals/sources/__init__.py | 8 - .../Timeseries.py => timeseries.py} | 483 ++++- src/ltbio/biosignals/timeseries/Event.py | 156 -- src/ltbio/biosignals/timeseries/Frequency.py | 37 - src/ltbio/biosignals/timeseries/Timeline.py | 286 --- src/ltbio/biosignals/timeseries/__init__.py | 3 - .../{timeseries/Unit.py => units.py} | 0 31 files changed, 3607 insertions(+), 3881 deletions(-) create mode 100644 src/ltbio/biosignals/derived.py rename src/ltbio/biosignals/{modalities/ECG.py => modalities.py} (78%) delete mode 100644 src/ltbio/biosignals/modalities/ACC.py delete mode 100644 src/ltbio/biosignals/modalities/Biosignal.py delete mode 100644 src/ltbio/biosignals/modalities/EDA.py delete mode 100644 src/ltbio/biosignals/modalities/EEG.py delete mode 100644 src/ltbio/biosignals/modalities/EMG.py delete mode 100644 src/ltbio/biosignals/modalities/HR.py delete mode 100644 src/ltbio/biosignals/modalities/MultimodalBiosignal.py delete mode 100644 src/ltbio/biosignals/modalities/PPG.py delete mode 100644 src/ltbio/biosignals/modalities/RESP.py delete mode 100644 src/ltbio/biosignals/modalities/TEMP.py delete mode 100644 src/ltbio/biosignals/modalities/__init__.py create mode 100644 src/ltbio/biosignals/sources.py delete mode 100644 src/ltbio/biosignals/sources/BiosignalSource.py delete mode 100644 src/ltbio/biosignals/sources/Bitalino.py delete mode 100644 src/ltbio/biosignals/sources/E4.py delete mode 100644 src/ltbio/biosignals/sources/HEM.py delete mode 100644 src/ltbio/biosignals/sources/HSM.py delete mode 100644 src/ltbio/biosignals/sources/MITDB.py delete mode 100644 src/ltbio/biosignals/sources/Multisource.py delete mode 100644 src/ltbio/biosignals/sources/Seer.py delete mode 100644 src/ltbio/biosignals/sources/Sense.py delete mode 100644 src/ltbio/biosignals/sources/__init__.py rename src/ltbio/biosignals/{timeseries/Timeseries.py => timeseries.py} (81%) delete mode 100644 src/ltbio/biosignals/timeseries/Event.py delete mode 100644 src/ltbio/biosignals/timeseries/Frequency.py delete mode 100644 src/ltbio/biosignals/timeseries/Timeline.py delete mode 100644 src/ltbio/biosignals/timeseries/__init__.py rename src/ltbio/biosignals/{timeseries/Unit.py => units.py} (100%) diff --git a/src/ltbio/biosignals/__init__.py b/src/ltbio/biosignals/__init__.py index 3ffc8bab..1c46359c 100644 --- a/src/ltbio/biosignals/__init__.py +++ b/src/ltbio/biosignals/__init__.py @@ -1,11 +1,1602 @@ -from typing import Collection +# -- encoding: utf-8 -- -from matplotlib import pyplot as plt +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: init +# Description: Essential classes for .biosignals package: Biosignal, MultimodalBiosignal and Event + +# Contributors: João Saraiva +# Created: 07/03/2023 + +# =================================== + +import logging +from abc import ABC, abstractmethod, ABCMeta +from copy import deepcopy +from datetime import datetime, timedelta +from inspect import isclass, signature +from logging import warning +from math import ceil +from shutil import rmtree +from tempfile import mkdtemp +from typing import Dict, Tuple, Collection, Set, ClassVar, Callable + +import matplotlib.pyplot as plt +import numpy as np +from datetimerange import DateTimeRange +from dateutil.parser import parse as to_datetime +from numpy import ndarray +from pandas import DataFrame from scipy.signal import correlate -from .modalities.Biosignal import Biosignal -from .timeseries import Timeseries, OverlappingTimeseries, Event -from .timeseries.Unit import Unitless +from .sources import __BiosignalSource as BS +from .timeseries import Timeseries, Timeline +from .units import Unitless +# from ...processing.filters.Filter import Filter +from ..clinical.BodyLocation import BodyLocation +from ..clinical.Patient import Patient +from ..clinical.conditions.MedicalCondition import MedicalCondition +from ..processing.noises.Noise import Noise + + +class Event(): + __SERIALVERSION: int = 1 + + def __init__(self, name: str, onset: datetime | str = None, offset: datetime | str = None): + if onset is None and offset is None: # at least one + raise AssertionError("At least an onset or an offset must be given to create an Event.") + self.__onset = to_datetime(onset) if isinstance(onset, str) else onset + self.__offset = to_datetime(offset) if isinstance(offset, str) else offset + if onset is not None and offset is not None and offset < onset: + raise AssertionError(f"In Event '{name}', the offset cannot come before the onset.") + self.__name = name + + @property + def has_onset(self) -> bool: + return self.__onset != None + + @property + def has_offset(self) -> bool: + return self.__offset != None + + @property + def onset(self) -> datetime: + if self.has_onset: + return self.__onset + else: + raise AttributeError(f"Event {self.name} has no onset.") + + @onset.setter + def onset(self, datetime: datetime): + self.__onset = datetime + + @property + def offset(self) -> datetime: + if self.has_offset: + return self.__offset + else: + raise AttributeError(f"Event {self.name} has no offset.") + + @offset.setter + def offset(self, datetime: datetime): + self.__offset = datetime + + @property + def duration(self) -> timedelta: + if self.__onset is None: + raise AttributeError(f"Event has no duration, only an {self.name} has no offset.") + if self.__offset is None: + raise AttributeError(f"Event has no duration, only an {self.name} has no onset.") + return self.__offset - self.__onset + + @property + def domain(self) -> DateTimeRange: + if self.__onset is None: + raise AttributeError(f"Event has no duration, only an {self.name} has no offset.") + if self.__offset is None: + raise AttributeError(f"Event has no duration, only an {self.name} has no onset.") + return DateTimeRange(self.__onset, self.__offset) + + @property + def name(self) -> str: + return self.__name + + def domain_with_padding(self, before: timedelta = timedelta(seconds=0), after: timedelta = timedelta(seconds=0)): + """ + The Event domain with before, after, or both paddings. Negative paddings go back in time; positive paddings go forward in time. + :param before: Padding before onset if defined, or offset otherwised. + :param after: Padding after offset if defined, or onset otherwised. + :return: DateTimeRange of the padded domain. + """ + + if not isinstance(before, timedelta) or not isinstance(after, timedelta): + raise TypeError('At least one padding (before or after) is necessary. Also, they should be timedelta objects.') + + # return: event [start, end[ + start = self.__onset if self.__onset is not None else self.__offset + end = self.__offset if self.__offset is not None else self.__onset + + # return: event [start + before, end + after[ + start, end = start + before, end + after + + return DateTimeRange(start, end) + + def __repr__(self): + if self.__offset is None: + return self.__name + ': Starts at ' + self.__onset.strftime("%d %b, %H:%M:%S") + elif self.__onset is None: + return self.__name + ': Ends at ' + self.__offset.strftime("%d %b, %H:%M:%S") + else: + return self.__name + ': [' + self.__onset.strftime("%d %b, %H:%M:%S") + '; ' + self.__offset.strftime("%d %b, %H:%M:%S") + ']' + + def __hash__(self): + return hash((self.__name, self.__onset, self.__offset)) + + def __eq__(self, other): + return self.__name == other.name and self.__onset == other._Event__onset and self.__offset == other._Event__offset + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): # A Segment comes before other Segment if its end is less than the other's start. + after = other._Event__onset if other._Event__onset is not None else other._Event__offset + before = self.__offset if self.__offset is not None else self.__onset + return before < after + + def __le__(self, other): + return self < other or self == other + + def __gt__(self, other): + return not self < other + + def __ge__(self, other): + return self > other or self == other + + def __getstate__(self): + """ + 1: name (str) + 2: onset (datetime) + 3: offset (datetime) + 4: other... (dict) + """ + other_attributes = self.__dict__.copy() + del other_attributes['_Event__name'], other_attributes['_Event__onset'], other_attributes['_Event__offset'] + return (self.__SERIALVERSION, self.__name, self.__onset, self.__offset) if len(other_attributes) == 0 \ + else (self.__SERIALVERSION, self.__name, self.__onset, self.__offset, other_attributes) + + def __setstate__(self, state): + if state[0] == 1: + self.__name, self.__onset, self.__offset = state[1], state[2], state[3] + if len(state) == 5: + self.__dict__.update(state[4]) + else: + raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' + f'Supported versions: 1.') + + +# =================================== +# Base Class 'Biosignal' and 'MultimodalBiosignal' +# =================================== + +class Biosignal(ABC): + """ + A Biosignal is a set of channels (Timeseries), each of which with samples measuring a biological variable. + It may be associated with a source, a patient, and a body location. It can also have a name. + It has an initial and final datetime. Its length is its number of channels. + It can be resampled, filtered, and concatenated to other Biosignals. + Amplitude and spectrum plots can be displayed and saved. + """ + + __SERIALVERSION: int = 2 + + def __init__(self, + timeseries: Dict[str | BodyLocation, timeseries.Timeseries] | str | Tuple[datetime], + source: BS.__subclasses__() = None, + patient: Patient = None, + acquisition_location: BodyLocation = None, + name: str = None, + **options): + + # Save BS, if given + if source and not isinstance(source, tuple(BS.__subclasses__())): + raise TypeError("Source must be a BS.") + self.__source = source + + # Create some empty properites + self.__patient = None + self.__acquisition_location = None + self.__name = None + self.__associated_events = {} + self.__added_noise = None + + # Option 1: 'timeseries' is a string path -> Read samples from files + if isinstance(timeseries, str): + filepath = timeseries + if source is None: + raise ValueError("To read a Biosignal from files, specify a BS in 'source'.") + else: + # BS can give the samples (required) and many other optional metadata. + # It's the BS that decides what it gives, depending on what it can get. + + # Get all data that the source can read: + data = self.__source._get(filepath, type(self), **options) + + # Unwrap data: + # 'timeseries': dictionary of Timeseries (required) + # 'patient': Patient + # 'acquisition_location': BodyLocation + # 'events': tuple of Events + # 'name': string + self.__timeseries = data['timeseries'] + if data['patient'] is not None: + self.__patient = data['patient'] + if data['acquisition_location'] is not None: + self.__acquisition_location = data['acquisition_location'] + if data['events'] is not None: + self.associate(data['events']) + if data['name'] is not None: + self.__name = data['name'] + + # Option 2: 'timeseries' is a dictionary {chanel name: Timeseries} -> Save directly + if isinstance(timeseries, dict): + # Check if all keys are strings or BodyLocation + for key in timeseries.keys(): + if not isinstance(key, str) and not isinstance(key, BodyLocation): + raise TypeError("All keys in 'timeseries' must be strings or BodyLocation.") + # Check if all values are Timeseries + for ts in timeseries.values(): + if not isinstance(ts, Timeseries): + raise TypeError("All values in 'timeseries' must be Timeseries.") + + # Save dictionary of Timeseries + self.__timeseries = timeseries + + # Check if Timeseries come with Events associated + for ts in timeseries.values(): + for event in ts.events: + if event.name in self.__associated_events and self.__associated_events[event.name] != event: + raise AssertionError("There are different Events with the same name among the Timeseries given.") + else: + self.__associated_events[event.name] = event + + # If user gives metadata, override what was given by the source: + if patient is not None: + self.__patient = patient + if acquisition_location is not None: + self.__acquisition_location = acquisition_location + if name is not None: + self.__name = name + + def __copy__(self): + return type(self)({ts: self.__timeseries[ts].__copy__() for ts in self.__timeseries}, self.__source, self.__patient, + self.__acquisition_location, str(self.__name)) + + def _new(self, timeseries: Dict[str | BodyLocation, timeseries.Timeseries] | str | Tuple[datetime] = None, + source: BS.__subclasses__() = None, patient: Patient = None, acquisition_location: BodyLocation = None, name: str = None, + events: Collection[Event] = None, added_noise=None): + timeseries = {ts: self.__timeseries[ts] for ts in self.__timeseries} if timeseries is None else timeseries # copy + source = self.__source if source is None else source # no copy + patient = self.__patient if patient is None else patient # no copy + acquisition_location = self.__acquisition_location if acquisition_location is None else acquisition_location # no copy + name = str(self.__name) if name is None else name # copy + + new = type(self)(timeseries, source, patient, acquisition_location, name) + + # Associate events; no need to copy + events = self.__associated_events if events is None else events + events = events.values() if isinstance(events, dict) else events + # Check if some event can be associated + logging.disable(logging.WARNING) # if outside the domain of every channel -> no problem; the Event will not be associated + new.associate(events) + logging.disable(logging.NOTSET) # undo supress warnings + + # Associate added noise reference: + if added_noise is not None: + new._Biosignal__added_noise = added_noise + + return new + + def _apply_operation_and_new(self, operation, + source: BS.__subclasses__() = None, patient: Patient = None, + acquisition_location: BodyLocation = None, name: str = None, events: Collection[Event] = None, + **kwargs): + new_channels = {} + for channel_name in self.channel_names: + new_channels[channel_name] = self.__timeseries[channel_name]._apply_operation_and_new(operation, **kwargs) + return self._new(new_channels, source=source, patient=patient, acquisition_location=acquisition_location, + name=name, events=events) + + def _apply_operation_and_return(self, operation, **kwargs): + pass # TODO + + @property + def has_single_channel(self) -> bool: + return len(self) == 1 + + def _get_channel(self, channel_name: str | BodyLocation) -> timeseries.Timeseries: + if channel_name in self.channel_names: + return self.__timeseries[channel_name] + else: + raise AttributeError(f"No channel named '{channel_name}'.") + + def _get_single_channel(self) -> tuple[str | BodyLocation, timeseries.Timeseries]: + """ + :return: channel_name, channel + """ + if not self.has_single_channel: + raise AttributeError(f"This Biosignal does not have a single channel. It has multiple channels.") + return tuple(self.__timeseries.items())[0] + + def get_event(self, name: str) -> Event: + if name in self.__associated_events: + return self.__associated_events[name] + from_conditions = self.__get_events_from_medical_conditions() + if name in from_conditions: + return from_conditions[name] + else: + raise NameError(f"No Event named '{name}' associated to the Biosignal or its paitent's conditions.") + + @property + def preview(self): + """Returns 5 seconds of the middle of the signal.""" + domain = self.domain + middle_of_domain: DateTimeRange = domain[len(domain) // 2] + middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) + try: + return self[middle - timedelta(seconds=2): middle + timedelta(seconds=3)] + except IndexError: + raise AssertionError( + f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") + + def when(self, condition: Callable, window: timedelta = None): + if len(signature(condition).parameters) > 1: + assert set(signature(condition).parameters) + sf = self.sampling_frequency # that all channels have the same sampling frequnecy + window = 1 if window is None else int(window * sf) + intervals = [] + for i in range(len(self._n_segments)): # gives error if not all channles have the same domain + x = self._vblock(i) + evaluated = [] + for i in range(0, len(x[0]), window): + y = x[:, i: i + window] + evaluated += [y] * window + intervals.append(timeseries.Timeseries._Timeseries__Segment._Segment__when(evaluated)) + intervals = self.__timeseries[0]._indices_to_timepoints(intervals) + return Timeline(*[Timeline.Group(channel._when(condition, window), name=channel_name) for channel_name, channel in self], + name=self.name + " when '" + condition.__name__ + "' is True" + f" (in windows of {window})" if window else "") + + else: + return Timeline(*[Timeline.Group(channel._when(condition, window), name=channel_name) for channel_name, channel in self], + name=self.name + " when '" + condition.__name__ + "' is True" + f" (in windows of {window})" if window else "") + + def __getitem__(self, item): + '''The built-in slicing and indexing operations.''' + + if isinstance(item, datetime): + if len(self) != 1: + raise IndexError("This Biosignal has multiple channels. Index the channel before indexing the datetime.") + return tuple(self.__timeseries.values())[0][item] + + if isinstance(item, (str, BodyLocation)): + if item in self.channel_names: + if len(self) == 1: + raise IndexError("This Biosignal only has 1 channel. Index only the datetimes.") + ts = {item: self.__timeseries[item].__copy__(), } + return self._new(timeseries=ts) + + elif item in self.__associated_events or item in self.__get_events_from_medical_conditions(): + if item in self.__associated_events: # Internal own Events + event = self.__associated_events[item] + else: # Events associated to MedicalConditions + event = self.__get_events_from_medical_conditions()[item] + + if event.has_onset and event.has_offset: + return self[DateTimeRange(event.onset, event.offset)] + elif event.has_onset: + return self[event.onset] + elif event.has_offset: + return self[event.offset] + + else: + try: + self.__timeseries[to_datetime(item)] + except: + raise IndexError("Datetime in incorrect format or '{}' is not a channel nor an event of this Biosignal.".format(item)) + + def __get_events_with_padding(event_name, padding_before=timedelta(seconds=0), padding_after=timedelta(seconds=0), + exclude_event=False): + # Get Event object + if event_name in self.__associated_events: + event = self.__associated_events[event_name] + elif event_name in self.__get_events_from_medical_conditions(): + event = self.__get_events_from_medical_conditions()[event_name] + else: + raise IndexError(f"No Event named '{event_name}' associated to this Biosignal.") + + if isinstance(padding_before, datetime) and isinstance(padding_after, datetime) and exclude_event: + if event.has_onset and event.has_offset: + return self[DateTimeRange(padding_before, event.onset)] >> self[ + DateTimeRange(event.offset + timedelta(seconds=1 / self.sampling_frequency), + padding_after)] # FIXME: Sampling frequency might not be the same for all channels! + else: + raise IndexError(f"Event {event_name} is a point in time, not an event with a duration.") + + # Convert specific datetimes to timedeltas; is this inneficient? + if isinstance(padding_before, datetime): + if event.has_onset: + padding_before = event.onset - padding_before + elif event.has_offset: + padding_before = event.offset - padding_before + if exclude_event: + padding_after = - event.duration + if isinstance(padding_after, datetime): + if event.has_offset: + padding_after = padding_after - event.offset + elif event.has_onset: + padding_after = padding_after - event.onset + if exclude_event: + padding_before = - event.duration + + # Index + if event.has_onset and event.has_offset: + return self[DateTimeRange(event.onset - padding_before, event.offset + padding_after)] + elif event.has_onset: + return self[DateTimeRange(event.onset - padding_before, event.onset + padding_after)] + elif event.has_offset: + return self[DateTimeRange(event.offset - padding_before, event.offset + padding_after)] + + if isinstance(item, slice): + + # Everything but event + if isinstance(item.stop, str) and item.start is None and item.step is None: + if not item.stop.startswith('-'): + raise ValueError( + "Indexing a Biosignal like x[:'event':] is equivalent to having its entire domain. Did you mean x[:'-event':]?") + return __get_events_with_padding(item.stop[1:], padding_before=self.initial_datetime, padding_after=self.final_datetime, + exclude_event=True) + + # Everything before event + if isinstance(item.stop, str) and item.start is None: + event_name, exclude_event = item.stop, False + if event_name.startswith('-'): + event_name, exclude_event = event_name[1:], True + return __get_events_with_padding(event_name, padding_before=self.initial_datetime, exclude_event=exclude_event) + + # Everything after event + if isinstance(item.start, str) and item.stop is None: + event_name, exclude_event = item.start, False + if event_name.startswith('-'): + event_name, exclude_event = event_name[1:], True + return __get_events_with_padding(event_name, padding_after=self.final_datetime, exclude_event=exclude_event) + + # Event with padding + if isinstance(item.start, (timedelta, int)) and isinstance(item.step, (timedelta, int)) and isinstance(item.stop, str): + start = timedelta(seconds=item.start) if isinstance(item.start, int) else item.start # shortcut for seconds + step = timedelta(seconds=item.step) if isinstance(item.step, int) else item.step # shortcut for seconds + return __get_events_with_padding(item.stop, padding_before=start, padding_after=step) + elif isinstance(item.start, (timedelta, int)) and isinstance(item.stop, str): + start = timedelta(seconds=item.start) if isinstance(item.start, int) else item.start # shortcut for seconds + return __get_events_with_padding(item.stop, padding_before=start) + elif isinstance(item.start, str) and isinstance(item.stop, (timedelta, int)): + stop = timedelta(seconds=item.stop) if isinstance(item.stop, int) else item.stop # shortcut for seconds + return __get_events_with_padding(item.start, padding_after=stop) + + # Index by datetime + if isinstance(item.start, datetime) and isinstance(item.stop, datetime) and item.stop < item.start: + raise IndexError("Given final datetime comes before the given initial datetime.") + + if self.has_single_channel: # one channel + channel_name = tuple(self.__timeseries.keys())[0] + channel = self.__timeseries[channel_name] + return self._new(timeseries={channel_name: channel[item]}) # FIXME: Why aren't events being updated here? (See below) + + else: # multiple channels + ts = {} + events = set() + for k in self.channel_names: + ts[k] = self.__timeseries[k][item] + # Events outside the new domain get discarded, hence collecting the ones that remained + events.update(set(self.__timeseries[k].events)) # FIXME: (See Above) Like in here! + new = self._new(timeseries=ts, events=events) + return new + + if isinstance(item, DateTimeRange): # Pass item directly to each channel + ts = {} + events = set() + for k in self.channel_names: + res = self.__timeseries[k][item] + if res is not None: + ts[k] = res + # Events outside the new domain get discarded, hence collecting the ones that remained + events.update(set(self.__timeseries[k].events)) + + if len(ts) == 0: + raise IndexError(f"Event is outside every channel's domain.") + + new = self._new(timeseries=ts, events=events) + return new + + if isinstance(item, tuple): + + # Structure-related: Channels + if all(isinstance(k, (str, BodyLocation)) and k in self.channel_names for k in item): + ts = {} + events = set() + for k in item: + ts[k] = self.__timeseries[k] + events.update(set(self.__timeseries[k].events)) + new = self._new(timeseries=ts, events=events) + return new + + # Time-related: Slices, Datetimes, Events, ... + else: + if isinstance(item[0], DateTimeRange): + item = sorted(item, key=lambda x: x.start_datetime) + else: + item = sorted(item) + + return self._new({channel_name: channel[tuple(item)] for channel_name, channel in self}) + + if isinstance(item, Timeline): + if item.is_index: + res = self[item._as_index()] + res.name += f" indexed by '{item.name}'" + return res + else: + return IndexError("This Timeline cannot serve as index, because it contains multiple groups of intervals or points.") + + raise IndexError("Index types not supported. Give a datetime (can be in string format), a slice or a tuple of those.") + + @property + def channel_names(self) -> set[str | BodyLocation]: + '''Returns a tuple with the labels associated to every channel.''' + return set(self.__timeseries.keys()) + + @property + def name(self): + '''Returns the associated name, or 'No Name' if none was provided.''' + return self.__name if self.__name != None else "No Name" + + @name.setter + def name(self, name: str): + self.__name = name + + @property + def patient(self) -> Patient: + """Returns the associated patient, if any.""" + return self.__patient + + @property + def patient_code(self): # TODO: Delete method + """deprecated: use .patient.code instead.""" + return self.__patient.code if self.__patient != None else 'n.d.' + + @property + def patient_conditions(self) -> Set[MedicalCondition]: # TODO: Delete method + """deprecated: use .patient.conditions instead.""" + return self.__patient.conditions if self.__patient != None else set() + + @property + def acquisition_location(self): + '''Returns the associated acquisition location, or None if none was provided.''' + return self.__acquisition_location + + @property + def source(self) -> BS: + '''Returns the BS from where the data was read, or None if was not specified.''' + return self.__source + + @property + def type(self) -> ClassVar: + '''Returns the biosignal modality class. E.g.: ECG, EMG, EDA, ...''' + return type(self) + + @property + def initial_datetime(self) -> datetime: + '''Returns the initial datetime of the channel that starts the earliest.''' + return min([ts.initial_datetime for ts in self.__timeseries.values()]) + + @property + def final_datetime(self) -> datetime: + '''Returns the final datetime of the channel that ends the latest.''' + return max([ts.final_datetime for ts in self.__timeseries.values()]) + + @property + def domain(self) -> Tuple[DateTimeRange]: + if len(self) == 1: + return tuple(self.__timeseries.values())[0].domain + else: + channels = tuple(self.__timeseries.values()) + cumulative_intersection: Tuple[DateTimeRange] + for k in range(1, len(self)): + if k == 1: + cumulative_intersection = channels[k].overlap(channels[k - 1]) + else: + cumulative_intersection = channels[k].overlap(cumulative_intersection) + return cumulative_intersection + + @property + def domain_timeline(self) -> Timeline: # TODO: merge with domain + return Timeline(Timeline.Group(self.domain), name=self.name + ' Domain') + + @property + def subdomains(self) -> Tuple[DateTimeRange]: + if len(self) == 1: + return tuple(self.__timeseries.values())[0].subdomains + else: + raise NotImplementedError() + + def _vblock(self, i: int): + """ + Returns a block of timelly allined segments, vertially alligned for all channels. + Note: This assumes all channels are segmented in the same way, i.e., have exactly the same set of subdomains. + :param i: The block index + :return: ndarray of vertical stacked segmetns + """ + N = self._n_segments + if isinstance(N, int): + if i < N: + return np.vstack([channel[i] for channel in self.__timeseries.values()]) + else: + IndexError(f"This Biosignal as only {N} blocks.") + else: + raise AssertionError("Not all channels are segmented in the same way, hence blocks cannot be created.") + + def _block_subdomain(self, i: int) -> DateTimeRange: + if len(self) == 1: + return tuple(self.__timeseries.values())[0]._block_subdomain(i) + else: + raise NotImplementedError() + + @property + def _n_segments(self) -> int | dict: + """ + Returns the number of segments of each Timeseries. + :rtype: dict, with the number of segments labelled by channel name; or int if they are all the same + """ + n_segments = {} + last_n = None + all_equal = True + for channel_name, channel in self.__timeseries.items(): + x = channel.n_segments + last_n = x + n_segments[channel_name] = x + if last_n is not None and last_n != x: + all_equal = False + if all_equal: + return last_n + else: + return n_segments + + @property + def duration(self): + common_duration = tuple(self.__timeseries.values())[0].duration + for _, channel in self: + if channel.duration != common_duration: + raise AssertionError("Not all channels have the same duration.") + return common_duration + + def __get_events_from_medical_conditions(self): + res = {} + for condition in self.patient_conditions: + res.update(condition._get_events()) + return res + + @property + def events(self): + '''Tuple of associated Events, ordered by datetime.''' + return tuple(sorted(list(self.__associated_events.values()) + list(self.__get_events_from_medical_conditions().values()))) + + @property + def sampling_frequency(self) -> float: + '''Returns the sampling frequency of every channel (if equal), or raises an error if they are not equal.''' + if len(self) == 1: + return tuple(self.__timeseries.values())[0].sampling_frequency + else: + common_sf = None + for _, channel in self: + if common_sf is None: + common_sf = channel.sampling_frequency + elif channel.sampling_frequency != common_sf: + raise AttributeError("Biosignal contains 2+ channels, all not necessarly with the same sampling frequency.") + return common_sf + + @property + def added_noise(self): + '''Returns a reference to the noisy component, if the Biosignal was created with added noise; else the property does not exist.''' + if self.__added_noise is not None: + return self.__added_noise + else: + raise AttributeError("No noise was added to this Biosignal.") + + def __len__(self): + '''Returns the number of channels.''' + return len(self.__timeseries) + + def __repr__(self): + '''Returns a textual description of the Biosignal.''' + res = "Name: {}\nType: {}\nLocation: {}\nNumber of Channels: {}\nChannels: {}\nUseful Duration: {}\nSource: {}\n".format( + self.name, + self.type.__name__, + self.acquisition_location, + len(self), + ''.join([(x + ', ') for x in self.channel_names]), + self.duration, + self.source.__str__(None) if isinstance(self.source, ABCMeta) else str(self.source)) + + if len(self.__associated_events) != 0: + res += "Events:\n" + for event in sorted(self.__associated_events.values()): + res += '- ' + str(event) + '\n' + events_from_medical_conditions = dict(sorted(self.__get_events_from_medical_conditions().items(), key=lambda item: item[1])) + if len(events_from_medical_conditions) != 0: + res += "Events associated to Medical Conditions:\n" + for key, event in events_from_medical_conditions.items(): + res += f"- {key}:\n{event}\n" + return res + + def _to_dict(self) -> Dict[str | BodyLocation, timeseries.Timeseries]: # TODO: Delete method + return deepcopy(self.__timeseries) + + def _to_array(self) -> ndarray: + """ + Converts Biosignal to a NumPy ndarray. + :return: C x M x N array, where C is the number of channels, M the number of segments of each, and N their length. + :rtype: list[numpy.ndarray] + """ + x = [channel._to_array() for channel in self.__timeseries.values()] + return np.stack(x) + + def to_dataframe(self) -> DataFrame: + pass + + def __iter__(self): + return self.__timeseries.items().__iter__() + + def __contains__(self, item): + if isinstance(item, str): + if item in self.__timeseries.keys(): # if channel exists + return True + if item in self.__associated_events: # if Event occurs + return True + events_from_consitions = self.__get_events_from_medical_conditions() + for label, event in events_from_consitions: + if item == label and event.domain in self: + return True + return False + elif isinstance(item, (datetime, DateTimeRange)): + for _, channel in self: + if item in channel: # if at least one channel defines this point in time + return True + return False + else: + raise TypeError(f'Cannot apply this operation with {type(item)}.') + + def __mul__(self, other): + if isinstance(other, (float, int)): + suffix = f' (dilated up by {str(other)})' if other > 1 else f' (compressed up by {str(other)})' + return self._apply_operation_and_new(lambda x: x * other, name=self.name + suffix) + + def __sub__(self, other): + return self + (other * -1) + + def __neg__(self): + return self * -1 + + def __add__(self, other): + """ + If a float or int: + Add constant to every channel. Translation of the signal. + If Biosignal: + Adds both sample-by-sample, if they have the same domain. + Notes: + - If the two Biosignals have two distinct acquisition locations, they will be lost. + - If the two Biosignals have two distinct sources, they will be lost. + - If the two Biosignals have the distict patients, they will be lost. + Raises: + - TypeError if Biosignals are not of the same type. + - ArithmeticError if Biosignals do not have the same domain. + """ + + if isinstance(other, (float, int)): + return self._apply_operation_and_new(lambda x: x + other, name=self.name + f' (shifted up by) {str(other)}') + + if isinstance(other, Biosignal): + # Check errors + if self.type != other.type: + while True: + answer = input( + f"Trying to add an {self.type.__name__} with an {other.type.__name__}. Do you mean to add templeates of the second as noise? (y/n)") + if answer.lower() in ('y', 'n'): + if answer.lower() == 'y': + return Biosignal.withAdditiveNoise(self, other) + else: + raise TypeError("Cannot add a {0} to a {1} if not as noise.".format(other.type.__name__, self.type.__name__)) + + if (not self.has_single_channel or not other.has_single_channel) and self.channel_names != other.channel_names: + raise ArithmeticError( + "Biosignals to add must have the same number of channels and the same channel names.") # unless each only has one channel + if self.domain != other.domain: + raise ArithmeticError("Biosignals to add must have the same domains.") + + # Prepare common metadata + name = f"{self.name} + {other.name}" + acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None + patient = self.__patient if self.patient_code == other.patient_code else None + if isclass(self.source) and isclass(other.source): # Un-instatiated sources + if self.source == other.source: + source = self.__source + else: + source = None + else: + if type(self.source) == type(other.source) and self.source == other.source: + source = self.__source + else: + source = None + + # Perform addition + res_timeseries = {} + if self.has_single_channel and other.has_single_channel: + ch_name1, ch1 = self._get_single_channel() + ch_name2, ch2 = self._get_single_channel() + res_timeseries[f'{ch_name1}+{ch_name2}'] = ch1 + ch2 + else: + for channel_name in self.channel_names: + res_timeseries[channel_name] = self._to_dict()[channel_name] + other._to_dict()[channel_name] + + # Union of Events + events = set(self.__associated_events.values()).union(set(other._Biosignal__associated_events.values())) + + return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, + name=name, events=events) + + raise TypeError(f"Addition operation not valid with Biosignal and object of type {type(other)}.") + + def __and__(self, other): + """ + Joins the channels of two Biosignals of the same type, if they do not have the same set of channel names. + Notes: + - If the two Biosignals have two distinct acquisition locations, they will be lost. + - If the two Biosignals have two distinct sources, they will be lost. + - If the two Biosignals have the distict patients, they will be lost. + Raises: + - TypeError if Biosignals are not of the same type. + - ArithmeticError if both Biosignals have any channel name in common. + """ + + # Check errors + if not isinstance(other, Biosignal): + raise TypeError(f"Operation join channels is not valid with object of type {type(other)}.") + if self.type != other.type: + raise TypeError("Cannot join a {0} to a {1}".format(other.type.__name__, self.type.__name__)) + if len(self.channel_names.intersection(other.channel_names)) != 0: + raise ArithmeticError("Channels to join cannot have the same names.") + + # Prepare common metadata + name = f"{self.name} and {other.name}" + acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None + patient = self.__patient if self.patient_code == other.patient_code else None + if isclass(self.source) and isclass(other.source): # Un-instatiated sources + if self.source == other.source: + source = self.__source + else: + source = None + else: + if type(self.source) == type(other.source) and self.source == other.source: + source = self.__source + else: + source = None + + # Join channels + res_timeseries = {} + res_timeseries.update(self._to_dict()) + res_timeseries.update(other._to_dict()) + + # Union of Events + events = set(self.__associated_events.values()).union(set(other._Biosignal__associated_events.values())) + + return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, name=name, + events=events) + + def __rshift__(self, other): + """ + Temporally concatenates two Biosignal, if they have the same set of channel names. + Notes: + - If the two Biosignals have two distinct acquisition locations, they will be lost. + - If the two Biosignals have two distinct sources, they will be lost. + - If the two Biosignals have the distict patients, they will be lost. + Raises: + - TypeError if Biosignals are not of the same type. + - ArithmeticError if both Biosignals do not have the same channel names. + - ArithmeticError if the second comes before the first. + """ + + # Check errors + if not isinstance(other, Biosignal): + raise TypeError(f"Operation join channels is not valid with object of type {type(other)}.") + if self.type != other.type: + raise TypeError("Cannot join a {0} to a {1}".format(other.type.__name__, self.type.__name__)) + if self.channel_names != other.channel_names: + raise ArithmeticError("Biosignals to concatenate must have the same channel names.") + if other.initial_datetime < self.final_datetime: + raise ArithmeticError("The second Biosignal comes before (in time) the first Biosignal.") + + # Prepare common metadata + name = f"{self.name} >> {other.name}" + acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None + patient = self.__patient if self.patient_code == other.patient_code else None + if isclass(self.source) and isclass(other.source): # Un-instatiated sources + if self.source == other.source: + source = self.__source + else: + source = None + else: + if type(self.source) == type(other.source) and self.source == other.source: + source = self.__source + else: + source = None + + # Perform concatenation + res_timeseries = {} + for channel_name in self.channel_names: + res_timeseries[channel_name] = self._get_channel(channel_name) >> other._get_channel(channel_name) + + # Union of Events + events = set(self.__associated_events.values()).union(set(other._Biosignal__associated_events.values())) + + return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, name=name, + events=events) + + # =================================== + # Binary Logic using Time and Conditions + + def __lt__(self, other): + if isinstance(other, Biosignal): + return self.final_datetime < other.initial_datetime + else: + res = self.when(lambda x: x < other) + res.name(self.name + ' < ' + str(other)) + return res + + def __le__(self, other): + if isinstance(other, Biosignal): + return self.final_datetime <= other.initial_datetime + else: + res = self.when(lambda x: x <= other) + res.name(self.name + ' >= ' + str(other)) + return res + + def __gt__(self, other): + if isinstance(other, Biosignal): + return self.initial_datetime > other.final_datetime + else: + res = self.when(lambda x: x > other) + res.name(self.name + ' >= ' + str(other)) + return res + + def __ge__(self, other): + if isinstance(other, Biosignal): + return self.initial_datetime >= other.final_datetime + else: + res = self.when(lambda x: x >= other) + res.name(self.name + ' >= ' + str(other)) + return res + + def __eq__(self, other): + if isinstance(other, Biosignal): + return self.initial_datetime == other.initial_datetime and self.final_datetime == other.final_datetime + else: + res = self.when(lambda x: x == other) + res.name(self.name + ' >= ' + str(other)) + return res + + def __ne__(self, other): + if isinstance(other, Biosignal): + return not self.__eq__(other) + else: + res = self.when(lambda x: x != other) + res.name(self.name + ' >= ' + str(other)) + return res + + ######## Events + + def set_channel_name(self, current: str | BodyLocation, new: str | BodyLocation): + if current in self.__timeseries.keys(): + self.__timeseries[new] = self.__timeseries[current] + del self.__timeseries[current] + else: + raise AttributeError(f"Channel named '{current}' does not exist.") + + def set_event_name(self, current: str, new: str): + if current in self.__associated_events.keys(): + event = self.__associated_events[current] + self.__associated_events[new] = Event(new, event._Event__onset, event._Event__offset) + del self.__associated_events[current] + else: + raise AttributeError(f"Event named '{current}' is not associated.") + + def delete_events(self): + for _, channel in self: + channel.delete_events() + self.__associated_events = {} + + def filter(self, filter_design) -> int: + ''' + Filters every channel with to the given filter_design. + + @param filter_design: A Filter object specifying the designed filter to be applied. + @return: 0 if the filtering is applied successfully. + @rtype: int + ''' + for channel in self.__timeseries.values(): + channel._accept_filtering(filter_design) + return 0 + + def undo_filters(self): + ''' + Restores the raw samples of every channel, eliminating the action of any applied filter. + ''' + for channel in self.__timeseries.values(): + channel._undo_filters() + + def resample(self, frequency: float): + ''' + Resamples every channel to the new sampling frequency given, using Fourier method. + @param frequency: New sampling frequency (in Hertz). + ''' + for channel in self.__timeseries.values(): + channel._resample(frequency) + + def __draw_plot(self, timeseries_plotting_method, title, xlabel, ylabel, grid_on: bool, show: bool = True, save_to: str = None): + ''' + Draws a base plot to display every channel in a subplot. It is independent of the content that is plotted. + + @param timeseries_plotting_method: The method to be called in Timeseries, that defines what content to plot. + @param title: What the content is about. The Biosignal's name and patient code will be added. + @param xlabel: Label for the horizontal axis. + @param ylabel: Label for the vertical axis. + @param grid_on: True if grid in to be drawn or not; False otherwise. + @param show: True if plot is to be immediately displayed; False otherwise. + @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. + @return: + ''' + fig = plt.figure(figsize=(13, 2.5 * len(self))) + + all_events = self.events + all_onsets = [e.onset for e in all_events if e.has_onset] + all_offsets = [e.offset for e in all_events if e.has_offset] + all_vlines = all_onsets + all_offsets + + for i, channel_name in zip(range(len(self)), self.channel_names): + channel = self.__timeseries[channel_name] + ax = plt.subplot(len(self), 1, i + 1, title=channel_name) + ax.title.set_size(10) + ax.margins(x=0) + ax.set_xlabel(xlabel, fontsize=8, rotation=0, loc="right") + ax.set_ylabel(ylabel, fontsize=8, rotation=90, loc="top") + plt.xticks(fontsize=9) + plt.yticks(fontsize=9) + if grid_on: + ax.grid() + timeseries_plotting_method(self=channel) + + _vlines = [int((t - channel.initial_datetime).total_seconds() * channel.sampling_frequency) for t in all_vlines if t in channel] + plt.vlines(_vlines, ymin=channel.min(), ymax=channel.max(), colors='red') + + fig.suptitle((title + ' ' if title is not None else '') + self.name + ' from patient ' + str(self.patient_code), fontsize=11) + fig.tight_layout() + if save_to is not None: + fig.savefig(save_to) + plt.show() if show else plt.close() + + # return fig + + def plot_spectrum(self, show: bool = True, save_to: str = None): + ''' + Plots the Bode plot of every channel. + @param show: True if plot is to be immediately displayed; False otherwise. + @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. + ''' + self.__draw_plot(timeseries.Timeseries._plot_spectrum, 'Power Spectrum of', 'Frequency (Hz)', 'Power (dB)', True, show, save_to) + + def plot(self, show: bool = True, save_to: str = None): + ''' + Plots the amplitude in time of every channel. + @param show: True if plot is to be immediately displayed; False otherwise. + @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. + ''' + return self.__draw_plot(timeseries.Timeseries._plot, None, 'Time', 'Amplitude (n.d.)', False, show, save_to) + + @abstractmethod + def plot_summary(self, show: bool = True, save_to: str = None): + ''' + Plots a summary of relevant aspects of common analysis of the Biosignal. + ''' + pass # Implemented in each type + + def apply_operation(self, operation, **kwargs): + for channel in self.__timeseries.values(): + channel._apply_operation(operation, **kwargs) + + def apply_and_new(self, operation, **kwargs): + for channel in self.__timeseries.values(): + channel._apply_operation_and_new(operation, **kwargs) + + def invert(self, channel_label: str = None): + inversion = lambda x: -1 * x + if channel_label is None: # apply to all channels + self.apply_operation(inversion) + else: # apply only to one channel + self.__timeseries[channel_label]._apply_operation(inversion) + + def associate(self, events: Event | Collection[Event] | Dict[str, Event]): + ''' + Associates an Event to all Timeseries. + Events have names that serve as keys. If keys are given, + i.e. if 'events' is a dict, then the Event names are overridden. + @param events: One or multiple Event objects. + @rtype: None + ''' + + def __add_event(event: Event): + n_channels_associated = 0 + for _, channel in self: + try: + channel.associate(event) + n_channels_associated += 1 + except ValueError: + pass + if n_channels_associated > 0: # If at least one association was possible + self.__associated_events[event.name] = event + else: + warning(f"Event '{event.name}' was not associated, because it is outside of every channel's domain.") + + if isinstance(events, Event): + __add_event(events) + elif isinstance(events, dict): + for event_key in events: + event = events[event_key] + __add_event(Event(event_key, event._Event__onset, event._Event__offset)) # rename with given key + else: + for event in events: + __add_event(event) + + def disassociate(self, event_name: str): + ''' + Disassociates an Event from all Timeseries. + @param event_name: The name of the Event to be removed. + @rtype: None + ''' + if event_name in self.__associated_events: + for _, channel in self: + try: + channel.disassociate(event_name) + except NameError: + pass + del self.__associated_events[event_name] + else: + raise NameError(f"There's no Event '{event_name}' associated to this Biosignal.") + + @classmethod + def withAdditiveNoise(cls, original, noise, name: str = None): + """ + Creates a new Biosignal from 'original' with added 'noise'. + + :param original: (Biosignal) The original Biosignal to be contaminated with noise. + :param noise: (Noise | Timeseries | Biosignal) The noise to add to the original Biosignal. + :param name: (str) The name to associate to the resulting Biosignal. + + When 'noise' is a Noise: + - A trench of noise, with the duration of the channel, will be generated to be added to each channel. + - 'noise' should be configured with the same sampling frequency has the channels. + + When 'noise' is a Biosignal: + When it has the same set of channels as 'original', sampled at the same frequency: + - Each noisy channel will be added to the corresponding channel of 'original', in a template-wise manner. + When it has a unique channel: + - That noisy channel will be added to every channel of 'original', in a template-wise manner. + - That noisy channel should have the same sampling frequency has every channel of 'original'. + - If 'noise' has multiple segments, they are concatenated to make a hyper-template. + - Exception: in the case where both Timeseries having the same domain, the noisy samples will be added in a + segment-wise manner. + + When 'noise' is a Timeseries sampled at the same frequency of 'original': + - Its samples will be added to every channel of 'original', in a template-wise manner. + - If 'noise' has multiple segments, they are concatenated to make a hyper-template. + - Exception: in the case where both Timeseries having the same domain, the noisy samples will be added in a + segment-wise manner. + - 'noise' should have been sampled at the same frequency as 'original'. + + What is "template-wise manner"? + - If the template segment is longer than any original segment, the template segment will be trimmed accordingly. + - If the template segment is shorter than any original segment, the template will repeated in time. + - If the two segments are of equal length, they are added as they are. + + :return: A Biosignal with the same properties as the 'original', but with noise added to the samples of every channel. + :rtype: Biosignal subclass + """ + + if not isinstance(original, Biosignal): + raise TypeError(f"Parameter 'original' must be of type Biosignal; but {type(original)} was given.") + + if not isinstance(noise, (Noise, timeseries.Timeseries, Biosignal)): + raise TypeError(f"Parameter 'noise' must be of types Noise, Timeseries or Biosignal; but {type(noise)} was given.") + + if name is not None and not isinstance(name, str): + raise TypeError( + f"Parameter 'name' must be of type str; but {type(name)} was given.") + + def __add_template_noise(samples: ndarray, template: ndarray): + # Case A + if len(samples) < len(template): + _template = template[:len(samples)] # cut where it is enough + return samples + _template # add values + # Case B + elif len(samples) > len(template): + _template = np.tile(template, ceil(len(samples) / len(template))) # repeat full-pattern + _template = _template[:len(samples)] # cut where it is enough + return samples + _template # add values + # Case C + else: # equal lengths + return samples + template # add values + + def __noisy_timeseries(original: timeseries.Timeseries, noise: timeseries.Timeseries) -> timeseries.Timeseries: + # Case 1: Segment-wise + if original.domain == noise.domain: + template = [noise.samples, ] if noise.is_contiguous else noise.samples + return original._apply_operation_and_new(__add_template_noise, template=template, + iterate_over_each_segment_key='template') + # Case 2: Template-wise + elif noise.is_contiguous: + template = noise.samples + return original._apply_operation_and_new(__add_template_noise, template=template) + # Case 3: Template-wise, with hyper-template + else: + template = np.concatenate(noise.samples) # concatenate as a hyper-template + return original._apply_operation_and_new(__add_template_noise, template=template) + + noisy_channels = {} + + # Case Noise + if isinstance(noise, Noise): + for channel_name in original.channel_names: + channel = original._get_channel(channel_name) + if channel.sampling_frequency == noise.sampling_frequency: + template = noise[channel.duration] + noisy_channels[channel_name] = channel._apply_operation_and_new(__add_template_noise, template=template) + else: + raise AssertionError( + f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." + f"Suggestion: Resample one of them first.") + + # Case Timeseries + elif isinstance(noise, timeseries.Timeseries): + for channel_name in original.channel_names: + channel = original._get_channel(channel_name) + if channel.units != noise.units and channel.units != None and channel.units != Unitless and noise.units != None and noise.units != Unitless: + raise AssertionError( + f"Noise does not have the same units as channel '{channel_name}' of 'original'." + f"Suggestion: If possible, convert one of them first or drop units.") + if channel.sampling_frequency == noise.sampling_frequency: + noisy_channel = __noisy_timeseries(channel, noise) + noisy_channels[channel_name] = noisy_channel + else: + raise AssertionError( + f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." + f"Suggestion: Resample one of them first.") + + + elif isinstance(noise, Biosignal): + # Case Biosignal channel-wise + if original.channel_names == noise.channel_names: + for channel_name in original.channel_names: + original_channel = original._get_channel(channel_name) + noise_channel = noise._get_channel(channel_name) + if original_channel.units != noise_channel.units and original_channel.units != None and original_channel.units != Unitless and noise_channel.units != None and noise_channel.units != Unitless: + raise AssertionError( + f"Noise does not have the same units as channel '{channel_name}' of 'original'." + f"Suggestion: If possible, convert one of them first or drop units.") + if original_channel.sampling_frequency == noise_channel.sampling_frequency: + noisy_channel = __noisy_timeseries(original_channel, noise_channel) + noisy_channels[channel_name] = noisy_channel + else: + raise AssertionError(f"Channels '{channel_name}' do not have the same sampling frequency in 'original' and 'noise'." + f"Suggestion: Resample one of them first.") + + # Case Biosignal unique channel + elif len(noise) == 1: + _, x = tuple(iter(noise))[0] + for channel_name in original.channel_names: + channel = original._get_channel(channel_name) + if channel.units != x.units and channel.units != None and channel.units != Unitless and x.units != None and x.units != Unitless: + raise AssertionError( + f"Noise does not have the same units as channel '{channel_name}' of 'original'." + f"Suggestion: If possible, convert one of them first or drop units.") + if channel.sampling_frequency == x.sampling_frequency: + noisy_channel = __noisy_timeseries(channel, x) + noisy_channels[channel_name] = noisy_channel + else: + raise AssertionError(f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." + f"Suggestion: Resample one of them first.") + + else: + raise ArithmeticError("Noise should have 1 channel only (to be added to every channel of 'original') " + "or the same channels as 'original' (for each to be added to the corresponding channel of 'original'.") + + events = events = set(original.__associated_events.values()).union(set(noise._Biosignal__associated_events.values())) if isinstance( + noise, (Biosignal, timeseries.Timeseries)) else None + + return original._new(timeseries=noisy_channels, name=name if name is not None else 'Noisy ' + original.name, + events=events, added_noise=noise) + + def restructure_domain(self, time_intervals: tuple[DateTimeRange]): + domain = self.domain + if len(domain) >= len(time_intervals): + for _, channel in self: + # 1. Concatenate segments + channel._concatenate_segments() + # 2. Partition according to new domain + channel._partition(time_intervals) + else: + NotImplementedError("Not yet implemented.") + + def undo_segmentation(self, time_intervals: tuple[DateTimeRange]): + for _, channel in self: + channel._merge(time_intervals) + + def tag(self, tags: str | tuple[str]): + """ + Mark all channels with a tag. Useful to mark machine learning targets. + :param tags: The label or labels to tag the channels. + :return: None + """ + if isinstance(tags, str): + for _, channel in self: + channel.tag(tags) + elif isinstance(tags, tuple) and all(isinstance(x, str) for x in tags): + for x in tags: + for _, channel in self: + channel.tag(x) + else: + raise TypeError("Give one or multiple string labels to tag the channels.") + + @classmethod + def fromNoise(cls, + noises: Noise | Dict[str | BodyLocation, Noise], + time_intervals: DateTimeRange | tuple[DateTimeRange], + name: str = None): + """ + Creates a type of Biosignal from a noise source. + + :param noises: + - If a Noise object is given, the Biosignal will have 1 channel for the specified time interval. + - If a dictionary of Noise objects is given, the Biosignal will have multiple channels, with different + generated samples, for the specified time interval, named after the dictionary keys. + + :param time_interval: Interval [x, y[ where x will be the initial date and time of every channel, and y will be + the final date and time of every channel; on a union of intervals, in case a tuple is given. + + :param name: The name to be associated to the Biosignal. Optional. + + :return: Biosignal subclass + """ + + if not isinstance(time_intervals, DateTimeRange) and isinstance(time_intervals, tuple) and \ + not all([isinstance(x, DateTimeRange) for x in time_intervals]): + raise TypeError(f"Parameter 'time_interval' should be of type DateTimeRange or a tuple of them.") + + if isinstance(time_intervals, tuple) and len(time_intervals) == 1: + time_intervals = time_intervals[0] + + channels = {} + + if isinstance(noises, Noise): + if isinstance(time_intervals, DateTimeRange): + samples = noises[time_intervals.timedelta] + channels[noises.name] = timeseries.Timeseries(samples, time_intervals.start_datetime, noises.sampling_frequency, + units=Unitless(), name=noises.name) + else: + segments = {x.start_datetime: noises[x.timedelta] for x in time_intervals} + channels[noises.name] = timeseries.Timeseries.withDiscontiguousSegments(segments, noises.sampling_frequency, + units=Unitless(), name=noises.name) + + elif isinstance(noises, dict): + if isinstance(time_intervals, DateTimeRange): + for channel_name, noise in noises.items(): + samples = noise[time_intervals.timedelta] + channels[channel_name] = timeseries.Timeseries(samples, time_intervals.start_datetime, noise.sampling_frequency, + units=Unitless(), name=noise.name + f" : {channel_name}") + else: + for channel_name, noise in noises.items(): + segments = {x.start_datetime: noise[x.timedelta] for x in time_intervals} + channels[channel_name] = timeseries.Timeseries.withDiscontiguousSegments(segments, noise.sampling_frequency, + units=Unitless(), + name=noise.name + f" : {channel_name}") + + return cls(channels, name=name) + + def acquisition_scores(self): + print(f"Acquisition scores for '{self.name}'") + completness_score = self.completeness_score() + print("Completness Score = " + ("%.2f" % (completness_score * 100) + "%" if completness_score else "n.d.")) + onbody_score = self.onbody_score() + print("On-body Score = " + ("%.2f" % (onbody_score * 100) + "%" if onbody_score else "n.d.")) + quality_score = self.quality_score(_onbody_duration=onbody_score * self.duration if onbody_score else self.duration) + print("Quality Score = " + ("%.2f" % (quality_score * 100) + "%" if quality_score else "n.d.")) + + def completeness_score(self): + recorded_duration = self.duration + expected_duration = self.final_datetime - self.initial_datetime + return recorded_duration / expected_duration + + def onbody_score(self): + if hasattr(self.source, 'onbody'): # if the BS defines an 'onbody' method, then this score exists, it's computed and returned + x = self.source.onbody(self) + if x: + return self.source.onbody(self).duration / self.duration + + def quality_score(self, _onbody_duration=None): + if _onbody_duration: + if hasattr(self, + 'acceptable_quality'): # if the Biosignal modality defines an 'acceptable_quality' method, then this score exists, it's computed and returned + return self.acceptable_quality().duration / _onbody_duration + else: + if hasattr(self, 'acceptable_quality') and hasattr(self.source, 'onbody'): + return self.acceptable_quality().duration / self.source.onbody(self).duration + + # =================================== + # SERIALIZATION + + def __getstate__(self): + """ + 1: __name (str) + 2: __source (BS subclass (instantiated or not)) + 3: __patient (Patient) + 4: __acquisition_location (BodyLocation) + 5: __associated_events (tuple) + 6: __timeseries (dict) + """ + return (self.__SERIALVERSION, self.__name, self.__source, self.__patient, self.__acquisition_location, + tuple(self.__associated_events.values()), self.__timeseries) + + def __setstate__(self, state): + if state[0] in (1, 2): + self.__name, self.__source, self.__patient, self.__acquisition_location = state[1:5] + self.__timeseries = state[6] + self.__associated_events = {} + self.associate(state[5]) + else: + raise IOError( + f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' + f'Supported versions: 1 and 2.') + + EXTENSION = '.biosignal' + + def save(self, save_to: str): + # Check extension + if not save_to.endswith(Biosignal.EXTENSION): + save_to += Biosignal.EXTENSION + + # Make memory maps + temp_dir = mkdtemp(prefix='ltbio.') + for _, channel in self: + channel._memory_map(temp_dir) + + # Write + from _pickle import dump + with open(save_to, 'wb') as f: + dump(self, f) + + # Clean up memory maps + rmtree(temp_dir) + + @classmethod + def load(cls, filepath: str): + # Check extension + if not filepath.endswith(Biosignal.EXTENSION): + raise IOError("Only .biosignal files are allowed.") + + from _pickle import load + from _pickle import UnpicklingError + + # Read + try: # Versions >= 2023.0: + f = open(filepath, 'rb') + biosignal = load(f) + except UnpicklingError as e: # Versions 2022.0, 2022.1 and 2022.2: + from bz2 import BZ2File + print( + "Loading...\nNote: Loading a version older than 2023.0 takes significantly more time. It is suggested you save this Biosignal again, so you can have it in the newest fastest format.") + f = BZ2File(filepath, 'rb') + biosignal = load(f) + f.close() + return biosignal + + +class MultimodalBiosignal(Biosignal): + + def __init__(self, **biosignals): + + timeseries = {} + # sources = {} + patient = None + # locations = {} + name = "Union of" + events = {} + + for label, biosignal in biosignals.items(): + if patient is None: + patient = biosignal._Biosignal__patient + elif patient != biosignal._Biosignal__patient: + raise ValueError("When joining Biosignals, they all must be from the same Patient.") + + for channel_label, ts in biosignal._to_dict().items(): + timeseries[label + ':' + channel_label] = ts # Join Timeseries in a single dictionary + + # sources[label] = biosignal.source # Join sources + + # if biosignal.acquisition_location is not None: + # locations[label] = biosignal.acquisition_location + + name += f" '{biosignal.name}'," if biosignal.name != "No Name" else f" '{label}'," + + for event in biosignal.events: + if event.name in events and events[event.name] != event: + raise ValueError("There are two event names associated to different onsets/offsets in this set of Biosignals.") + else: + events[event.name] = event + + super(MultimodalBiosignal, self).__init__(timeseries, None, patient, None, name[:-1]) + self.associate(events) + self.__biosignals = biosignals + + if (len(self.type)) == 1: + raise TypeError("Cannot create Multimodal Biosignal of just 1 modality.") + + @property + def type(self): + return {biosignal.type for biosignal in self.__biosignals.values()} + + @property + def source(self) -> Set[BS]: + return {biosignal.source for biosignal in self.__biosignals.values()} + + @property + def acquisition_location(self) -> Set[BodyLocation]: + return {biosignal.acquisition_location for biosignal in self.__biosignals.values()} + + def __getitem__(self, item): + if isinstance(item, tuple): + if len(item) == 2: + biosignal = self.__biosignals[item[0]] + return biosignal[item[1]] + + elif isinstance(item, str) and item in self.__biosignals.keys(): + return self.__biosignals[item] + + raise IndexError("Indexing a Multimodal Biosignal should have two arguments, like 'multisignal['ecg'][V5]," + "where 'ecg' is the Biosignal to address and 'V5' is the channel to get.") + + def __contains__(self, item): + if isinstance(item, str) and item in self.__biosignals.keys(): + return True + if isinstance(item, Biosignal) and item in self.__biosignals.values(): + return True + + super(MultimodalBiosignal, self).__contains__(item) + + def __str__(self): + '''Returns a textual description of the MultimodalBiosignal.''' + res = f"MultimodalBiosignal containing {len(self.__biosignals)}:\n" + for i, biosignal in enumerate(self.__biosignals): + res += "({})\n{}".format(i, str(biosignal)) + return res + + def plot_summary(self, show: bool = True, save_to: str = None): + raise TypeError("Functionality not available for Multimodal Biosignals.") def plot_comparison(biosignals: Collection[Biosignal], show: bool = True, save_to: str = None): @@ -23,7 +1614,6 @@ def plot_comparison(biosignals: Collection[Biosignal], show: bool = True, save_t if item.channel_names != channel_names: raise AssertionError("The set of channel names of all Biosignals must be the same for comparison.") - fig = plt.figure(figsize=(13, 6)) for i, channel_name in zip(range(len(channel_names)), channel_names): @@ -74,7 +1664,7 @@ def cross_correlation(biosignal1: Biosignal, biosignal2: Biosignal): ts1: Timeseries = biosignal1._get_channel(biosignal1.channel_names.pop()) ts2: Timeseries = biosignal2._get_channel(biosignal2.channel_names.pop()) - #correlations = correlate(ts1.samples, ts2.samples, mode='full', method='direct') + # correlations = correlate(ts1.samples, ts2.samples, mode='full', method='direct') if ts1.is_contiguous: iterate_over_each_segment_key = None else: @@ -83,6 +1673,6 @@ def cross_correlation(biosignal1: Biosignal, biosignal2: Biosignal): correlation = ts1._apply_operation_and_new(correlate, units=Unitless(), name=f'Cross-Correlation between {biosignal1.name} and {biosignal2.name}', in2=ts2.samples, iterate_over_each_segment_key=iterate_over_each_segment_key, - )#mode='full', method='direct') + ) # mode='full', method='direct') return correlation diff --git a/src/ltbio/biosignals/derived.py b/src/ltbio/biosignals/derived.py new file mode 100644 index 00000000..0addcfbf --- /dev/null +++ b/src/ltbio/biosignals/derived.py @@ -0,0 +1,96 @@ +# -- encoding: utf-8 -- + +# =================================== + +# IT - LongTermBiosignals + +# Package: src/ltbio/biosignals +# Module: pseudo +# Description: + +# Contributors: João Saraiva +# Created: 07/03/2023 + +# =================================== + +class DerivedBiosignal(Biosignal): + """ + A DerivedBiosignal is a set of Timeseries of some extracted feature from an original Biosignal. + It is such a feature that it is useful to manipulate it as any other Biosignal. + """ + + def __init__(self, timeseries, source = None, patient = None, acquisition_location = None, name = None, original: Biosignal = None): + if original is not None: + super().__init__(timeseries, original.source, original._Biosignal__patient, original.acquisition_location, original.name) + else: + super().__init__(timeseries, source, patient, acquisition_location, name) + + self.original = original # Save reference + + +class ACCMAG(DerivedBiosignal): + + DEFAULT_UNIT = G(Multiplier._) + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: ACC | None = None): + super().__init__(timeseries, source, patient, acquisition_location, name, original) + + @classmethod + def fromACC(cls): + pass + + def plot_summary(self, show: bool = True, save_to: str = None): + pass + +class RRI(DerivedBiosignal): + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: ECG | None = None): + super().__init__(timeseries, source, patient, acquisition_location, name, original) + + @classmethod + def fromECG(cls): + pass + + def plot_summary(self, show: bool = True, save_to: str = None): + pass + + +class HR(DerivedBiosignal): + + DEFAULT_UNIT = BeatsPerMinute() + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: RRI | IBI | ECG | PPG | None = None): + super(HR, self).__init__(timeseries, source, patient, acquisition_location, name, original) + + @classmethod + def fromRRI(cls): + pass + + @classmethod + def fromIBI(cls): + pass + + def plot_summary(self, show: bool = True, save_to: str = None): + pass + + def acceptable_quality(self): # -> Timeline + """ + Acceptable physiological values + """ + return self.when(lambda x: 40 <= x <= 200) # between 40-200 bpm + + +class IBI(DerivedBiosignal): + + DEFAULT_UNIT = Second() + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: PPG | None = None): + super().__init__(timeseries, source, patient, acquisition_location, name, original) + + @classmethod + def fromPPG(cls): + pass + + def plot_summary(self, show: bool = True, save_to: str = None): + pass + diff --git a/src/ltbio/biosignals/modalities/ECG.py b/src/ltbio/biosignals/modalities.py similarity index 78% rename from src/ltbio/biosignals/modalities/ECG.py rename to src/ltbio/biosignals/modalities.py index aec91950..7179b02d 100644 --- a/src/ltbio/biosignals/modalities/ECG.py +++ b/src/ltbio/biosignals/modalities.py @@ -1,19 +1,22 @@ -# -*- encoding: utf-8 -*- +# -- encoding: utf-8 -- # =================================== # IT - LongTermBiosignals -# Package: biosignals -# Module: ECG -# Description: Class ECG, a type of Biosignal named Electrocardiogram. +# Package: src/ltbio/biosignals +# Module: modalities +# Description: -# Contributors: João Saraiva, Mariana Abreu, Rafael Silva +# Contributors: João Saraiva, Mariana Abreu # Created: 12/05/2022 -# Last Updated: 10/08/2022 +# Last Updated: 07/03/2023 # =================================== +from ltbio.biosignals.modalities.Biosignal import Biosignal, DerivedBiosignal +from ltbio.biosignals.timeseries.Unit import * + from datetime import timedelta from statistics import mean from typing import Callable @@ -32,7 +35,38 @@ from ltbio.biosignals.timeseries.Unit import Volt, Multiplier, BeatsPerMinute, Second +# =================================== +# Mechanical Modalities +# =================================== + +class ACC(Biosignal): + + DEFAULT_UNIT = G(Multiplier._) + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): + super(ACC, self).__init__(timeseries, source, patient, acquisition_location, name) + + def plot_summary(self, show: bool = True, save_to: str = None): + pass + + +class RESP(Biosignal): + + DEFAULT_UNIT = Volt(Multiplier.m) + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): + super(RESP, self).__init__(timeseries, source, patient, acquisition_location, name) + + def plot_summary(self, show:bool=True, save_to:str=None): + pass + + +# =================================== +# Electrical Modalities +# =================================== + class ECG(Biosignal): + DEFAULT_UNIT = Volt(Multiplier.m) def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): @@ -505,15 +539,143 @@ def aux(signal, p1, p2, **kwargs): res[channel_name] = average(array(res[channel_name]), weights=list(map(lambda subdomain: subdomain.timedelta.total_seconds(), channel.domain))) +class EDA(Biosignal): + + DEFAULT_UNIT = Volt(Multiplier.m) + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): + super(EDA, self).__init__(timeseries, source, patient, acquisition_location, name) + + def plot_summary(self, show: bool = True, save_to: str = None): + pass + + @property + def preview(self): + """Returns 2 minutes of the middle of the signal.""" + domain = self.domain + middle_of_domain: DateTimeRange = domain[len(domain) // 2] + middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) + try: + return self[middle - timedelta(seconds=2): middle + timedelta(minutes=2)] + except IndexError: + raise AssertionError( + f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") + + @staticmethod + def racSQI(samples): + """ + Rate of Amplitude change (RAC) + It is recomended to be analysed in windows of 2 seconds. + """ + max_, min_ = max(samples), min(samples) + amplitude = max_ - min_ + return abs(amplitude / max_) + + def acceptable_quality(self): # -> Timeline + """ + Suggested by Böttcher et al. Scientific Reports, 2022, for wearable wrist EDA. + """ + return self.when(lambda x: mean(x) > 0.05 and EDA.racSQI(x) < 0.2, window=timedelta(seconds=2)) + -class RRI(DerivedBiosignal): +class EEG(Biosignal): - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: ECG | None = None): - super().__init__(timeseries, source, patient, acquisition_location, name, original) + DEFAULT_UNIT = Volt(Multiplier.m) + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): + super(EEG, self).__init__(timeseries, source, patient, acquisition_location, name) - @classmethod - def fromECG(cls): + def plot_summary(self, show: bool = True, save_to: str = None): pass + +class EMG(Biosignal): + + DEFAULT_UNIT = Volt(Multiplier.m) + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): + super(EMG, self).__init__(timeseries, source, patient, acquisition_location, name) + def plot_summary(self, show: bool = True, save_to: str = None): pass + +# =================================== +# Optical modalities +# =================================== + +class PPG(Biosignal): + + DEFAULT_UNIT = None + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, **options): + super(PPG, self).__init__(timeseries, source, patient, acquisition_location, name, **options) + + def plot_summary(self, show: bool = True, save_to: str = None): + pass + + def acceptable_quality(self): # -> Timeline + """ + Suggested for wearable wrist PPG by: + - Glasstetter et al. MDPI Sensors, 21, 2021 + - Böttcher et al. Scientific Reports, 2022 + """ + + sfreq = self.sampling_frequency + nperseg = int(4 * self.sampling_frequency) # 4 s window + fmin = 0.1 # Hz + fmax = 5 # Hz + + def spectral_entropy(x, sfreq, nperseg, fmin, fmax): + if len(x) < nperseg: # if segment smaller than 4s + nperseg = len(x) + noverlap = int(0.9375 * nperseg) # if nperseg = 4s, then 3.75 s of overlap + f, psd = welch(x, sfreq, nperseg=nperseg, noverlap=noverlap) + idx_min = np.argmin(np.abs(f - fmin)) + idx_max = np.argmin(np.abs(f - fmax)) + psd = psd[idx_min:idx_max] + psd /= np.sum(psd) # normalize the PSD + entropy = -np.sum(psd * np.log2(psd)) + N = idx_max - idx_min + entropy_norm = entropy / np.log2(N) + return entropy_norm + + return self.when(lambda x: spectral_entropy(x, sfreq, nperseg, fmin, fmax) < 0.8, window=timedelta(seconds=4)) + + +class TEMP(Biosignal): + + DEFAULT_UNIT = DegreeCelsius(Multiplier._) + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): + super(TEMP, self).__init__(timeseries, source, patient, acquisition_location, name) + + def plot_summary(self, show: bool = True, save_to: str = None): + pass + + @property + def preview(self): + """Returns 2 minutes of the middle of the signal.""" + domain = self.domain + middle_of_domain: DateTimeRange = domain[len(domain) // 2] + middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) + try: + return self[middle - timedelta(seconds=2): middle + timedelta(minutes=2)] + except IndexError: + raise AssertionError( + f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") + + @staticmethod + def racSQI(samples): + """ + Rate of Amplitude change (RAC) + It is recomended to be analysed in windows of 2 seconds. + """ + max_, min_ = max(samples), min(samples) + amplitude = max_ - min_ + return abs(amplitude / max_) + + def acceptable_quality(self): # -> Timeline + """ + Suggested by Böttcher et al. Scientific Reports, 2022, for wearable wrist TEMP. + """ + return self.when(lambda x: 25 < mean(x) < 40 and TEMP.racSQI(x) < 0.2, window=timedelta(seconds=2)) diff --git a/src/ltbio/biosignals/modalities/ACC.py b/src/ltbio/biosignals/modalities/ACC.py deleted file mode 100644 index d1dba368..00000000 --- a/src/ltbio/biosignals/modalities/ACC.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: ACC -# Description: Class ACC, a type of Biosignal named Accelerometer. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 12/05/2022 -# Last Updated: 07/07/2022 - -# =================================== - -from ltbio.biosignals.modalities.Biosignal import Biosignal, DerivedBiosignal -from ltbio.biosignals.timeseries.Unit import G, Multiplier - - -class ACC(Biosignal): - - DEFAULT_UNIT = G(Multiplier._) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(ACC, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - -class ACCMAG(DerivedBiosignal): - - DEFAULT_UNIT = G(Multiplier._) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: ACC | None = None): - super().__init__(timeseries, source, patient, acquisition_location, name, original) - - @classmethod - def fromACC(cls): - pass - - def plot_summary(self, show: bool = True, save_to: str = None): - pass diff --git a/src/ltbio/biosignals/modalities/Biosignal.py b/src/ltbio/biosignals/modalities/Biosignal.py deleted file mode 100644 index 0e47ef3b..00000000 --- a/src/ltbio/biosignals/modalities/Biosignal.py +++ /dev/null @@ -1,1339 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: Biosignal -# Description: The base class holding all data related to a biosignal and its channels. - -# Contributors: João Saraiva, Mariana Abreu -# Last Updated: 26/01/2023 - -# =================================== - - -from abc import ABC, abstractmethod, ABCMeta -from copy import deepcopy -from datetime import datetime, timedelta -from inspect import isclass -from math import ceil -from shutil import rmtree -from tempfile import mkdtemp -from typing import Dict, Tuple, Collection, Set, ClassVar, Callable - -import matplotlib.pyplot as plt -import numpy as np -from datetimerange import DateTimeRange -from dateutil.parser import parse as to_datetime, ParserError -from numpy import ndarray - -from ltbio.biosignals.sources.BiosignalSource import BiosignalSource -from ltbio.biosignals.timeseries.Event import Event -from ltbio.biosignals.timeseries.Unit import Unitless -# from ...processing.filters.Filter import Filter -from ltbio.clinical.BodyLocation import BodyLocation -from ltbio.clinical.Patient import Patient -from ltbio.clinical.conditions.MedicalCondition import MedicalCondition -from ltbio.processing.noises.Noise import Noise -from .. import timeseries -from ..timeseries.Timeline import Timeline - - -class Biosignal(ABC): - """ - A Biosignal is a set of channels (Timeseries), each of which with samples measuring a biological variable. - It may be associated with a source, a patient, and a body location. It can also have a name. - It has an initial and final datetime. Its length is its number of channels. - It can be resampled, filtered, and concatenated to other Biosignals. - Amplitude and spectrum plots can be displayed and saved. - """ - - __SERIALVERSION: int = 2 - - def __init__(self, timeseries: Dict[str|BodyLocation, timeseries.Timeseries] | str | Tuple[datetime], source:BiosignalSource.__subclasses__()=None, patient:Patient=None, acquisition_location:BodyLocation=None, name:str=None, **options): - - # Save BiosignalSource, if given - self.__source = source - - # Create some empty properites - self.__patient = None - self.__acquisition_location = None - self.__name = None - self.__associated_events = {} - self.__added_noise = None - - # Populate property timeseries - # Option 1: timeseries is a filepath -> Read samples from file - if isinstance(timeseries, str): - filepath = timeseries - if source is None: - raise ValueError("To read a biosignal from a file, specify a BiosignalSource in 'source'.") - else: - # BiosignalSource can give the samples (required) and many other optional metadata. - # It's the BiosignalSource that decides what it gives, depending on what it can read. - - # Get all data that the source can read: - data = self.__source._get(filepath, type(self), **options) - - # Unwrap data: - # 'timeseries': dictionary of Timeseries (required) - # 'patient': Patient - # 'acquisition_location': BodyLocation - # 'events': tuple of Events - # 'name': string - self.__timeseries = data['timeseries'] - if data['patient'] is not None: - self.__patient = data['patient'] - if data['acquisition_location'] is not None: - self.__acquisition_location = data['acquisition_location'] - if data['events'] is not None: - self.associate(data['events']) - if data['name'] is not None: - self.__name = data['name'] - - # Option 2: timeseries is a time interval -> Fetch from database - if isinstance(timeseries, datetime): - pass # TODO - - # Option 3: timeseries is dictionary {chanel name: Timeseries} -> Save directly - if isinstance(timeseries, dict): - self.__timeseries = timeseries - # Check if Timeseries come with Events associated - for ts in timeseries.values(): - for event in ts.events: - if event.name in self.__associated_events and self.__associated_events[event.name] != event: - raise AssertionError("There are different Events with the same name among the Timeseries given.") - else: - self.__associated_events[event.name] = event - - # If user gives metadata, override what was given by the source: - if patient is not None: - self.__patient = patient - if acquisition_location is not None: - self.__acquisition_location = acquisition_location - if name is not None: - self.__name = name - - def __copy__(self): - return type(self)({ts: self.__timeseries[ts].__copy__() for ts in self.__timeseries}, self.__source, self.__patient, self.__acquisition_location, str(self.__name)) - - def _new(self, timeseries: Dict[str|BodyLocation, timeseries.Timeseries] | str | Tuple[datetime] = None, source:BiosignalSource.__subclasses__()=None, patient:Patient=None, acquisition_location:BodyLocation=None, name:str=None, events:Collection[Event]=None, added_noise=None): - timeseries = {ts: self.__timeseries[ts] for ts in self.__timeseries} if timeseries is None else timeseries # copy - source = self.__source if source is None else source # no copy - patient = self.__patient if patient is None else patient # no copy - acquisition_location = self.__acquisition_location if acquisition_location is None else acquisition_location # no copy - name = str(self.__name) if name is None else name # copy - - new = type(self)(timeseries, source, patient, acquisition_location, name) - - # Associate events; no need to copy - events = self.__associated_events if events is None else events - events = events.values() if isinstance(events, dict) else events - # Check if some event can be associated - for event in events: - try: - new.associate(event) - except ValueError: # outside the domain of every channel - pass # no problem; the Event will not be associated - - # Associate added noise reference: - if added_noise is not None: - new._Biosignal__added_noise = added_noise - - return new - - def _apply_operation_and_new(self, operation, - source:BiosignalSource.__subclasses__()=None, patient:Patient=None, - acquisition_location:BodyLocation=None, name:str=None, events:Collection[Event]=None, - **kwargs): - new_channels = {} - for channel_name in self.channel_names: - new_channels[channel_name] = self.__timeseries[channel_name]._apply_operation_and_new(operation, **kwargs) - return self._new(new_channels, source=source, patient=patient, acquisition_location=acquisition_location, - name=name, events=events) - - def _apply_operation_and_return(self, operation, **kwargs): - pass # TODO - - @property - def __has_single_channel(self) -> bool: - return len(self) == 1 - - def _get_channel(self, channel_name:str|BodyLocation) -> timeseries.Timeseries: - if channel_name in self.channel_names: - return self.__timeseries[channel_name] - else: - raise AttributeError(f"No channel named '{channel_name}'.") - - def _get_single_channel(self) -> tuple[str|BodyLocation, timeseries.Timeseries]: - """ - :return: channel_name, channel - """ - if not self.__has_single_channel: - raise AttributeError(f"This Biosignal does not have a single channel. It has multiple channels.") - return tuple(self.__timeseries.items())[0] - - def get_event(self, name: str) -> Event: - if name in self.__associated_events: - return self.__associated_events[name] - from_conditions = self.__get_events_from_medical_conditions() - if name in from_conditions: - return from_conditions[name] - else: - raise NameError(f"No Event named '{name}' associated to the Biosignal or its paitent's conditions.") - - @property - def preview(self): - """Returns 5 seconds of the middle of the signal.""" - domain = self.domain - middle_of_domain:DateTimeRange = domain[len(domain)//2] - middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) - try: - return self[middle - timedelta(seconds=2) : middle + timedelta(seconds=3)] - except IndexError: - raise AssertionError(f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") - - def when(self, condition: Callable, window: timedelta = None): - return Timeline(*[Timeline.Group(channel._when(condition, window), name=channel_name) for channel_name, channel in self], - name=self.name + " when '" + condition.__name__ + "' is True" + f" (in windows of {window})" if window else "") - - def __getitem__(self, item): - '''The built-in slicing and indexing operations.''' - - if isinstance(item, datetime): - if len(self) != 1: - raise IndexError("This Biosignal has multiple channels. Index the channel before indexing the datetime.") - return tuple(self.__timeseries.values())[0][item] - - if isinstance(item, (str, BodyLocation)): - if item in self.channel_names: - if len(self) == 1: - raise IndexError("This Biosignal only has 1 channel. Index only the datetimes.") - ts = {item: self.__timeseries[item].__copy__(), } - return self._new(timeseries=ts) - - elif item in self.__associated_events or item in self.__get_events_from_medical_conditions(): - if item in self.__associated_events: # Internal own Events - event = self.__associated_events[item] - else: # Events associated to MedicalConditions - event = self.__get_events_from_medical_conditions()[item] - - if event.has_onset and event.has_offset: - return self[DateTimeRange(event.onset, event.offset)] - elif event.has_onset: - return self[event.onset] - elif event.has_offset: - return self[event.offset] - - else: - try: - self.__timeseries[to_datetime(item)] - except: - raise IndexError("Datetime in incorrect format or '{}' is not a channel nor an event of this Biosignal.".format(item)) - - def __get_events_with_padding(event_name, padding_before=timedelta(seconds=0), padding_after=timedelta(seconds=0), exclude_event = False): - # Get Event object - if event_name in self.__associated_events: - event = self.__associated_events[event_name] - elif event_name in self.__get_events_from_medical_conditions(): - event = self.__get_events_from_medical_conditions()[event_name] - else: - raise IndexError(f"No Event named '{event_name}' associated to this Biosignal.") - - if isinstance(padding_before, datetime) and isinstance(padding_after, datetime) and exclude_event: - if event.has_onset and event.has_offset: - return self[DateTimeRange(padding_before, event.onset)] >> self[DateTimeRange(event.offset + timedelta(seconds=1/self.sampling_frequency), padding_after)] # FIXME: Sampling frequency might not be the same for all channels! - else: - raise IndexError(f"Event {event_name} is a point in time, not an event with a duration.") - - # Convert specific datetimes to timedeltas; is this inneficient? - if isinstance(padding_before, datetime): - if event.has_onset: - padding_before = event.onset - padding_before - elif event.has_offset: - padding_before = event.offset - padding_before - if exclude_event: - padding_after = - event.duration - if isinstance(padding_after, datetime): - if event.has_offset: - padding_after = padding_after - event.offset - elif event.has_onset: - padding_after = padding_after - event.onset - if exclude_event: - padding_before = - event.duration - - # Index - if event.has_onset and event.has_offset: - return self[DateTimeRange(event.onset - padding_before, event.offset + padding_after)] - elif event.has_onset: - return self[DateTimeRange(event.onset - padding_before, event.onset + padding_after)] - elif event.has_offset: - return self[DateTimeRange(event.offset - padding_before, event.offset + padding_after)] - - if isinstance(item, slice): - - # Everything but event - if isinstance(item.stop, str) and item.start is None and item.step is None: - if not item.stop.startswith('-'): - raise ValueError( - "Indexing a Biosignal like x[:'event':] is equivalent to having its entire domain. Did you mean x[:'-event':]?") - return __get_events_with_padding(item.stop[1:], padding_before=self.initial_datetime, padding_after=self.final_datetime, - exclude_event=True) - - # Everything before event - if isinstance(item.stop, str) and item.start is None: - event_name, exclude_event = item.stop, False - if event_name.startswith('-'): - event_name, exclude_event = event_name[1:], True - return __get_events_with_padding(event_name, padding_before=self.initial_datetime, exclude_event=exclude_event) - - # Everything after event - if isinstance(item.start, str) and item.stop is None: - event_name, exclude_event = item.start, False - if event_name.startswith('-'): - event_name, exclude_event = event_name[1:], True - return __get_events_with_padding(event_name, padding_after=self.final_datetime, exclude_event=exclude_event) - - # Event with padding - if isinstance(item.start, (timedelta, int)) and isinstance(item.step, (timedelta, int)) and isinstance(item.stop, str): - start = timedelta(seconds=item.start) if isinstance(item.start, int) else item.start # shortcut for seconds - step = timedelta(seconds=item.step) if isinstance(item.step, int) else item.step # shortcut for seconds - return __get_events_with_padding(item.stop, padding_before=start, padding_after=step) - elif isinstance(item.start, (timedelta, int)) and isinstance(item.stop, str): - start = timedelta(seconds=item.start) if isinstance(item.start, int) else item.start # shortcut for seconds - return __get_events_with_padding(item.stop, padding_before=start) - elif isinstance(item.start, str) and isinstance(item.stop, (timedelta, int)): - stop = timedelta(seconds=item.stop) if isinstance(item.stop, int) else item.stop # shortcut for seconds - return __get_events_with_padding(item.start, padding_after=stop) - - # Index by datetime - if isinstance(item.start, datetime) and isinstance(item.stop, datetime) and item.stop < item.start: - raise IndexError("Given final datetime comes before the given initial datetime.") - - if self.__has_single_channel: # one channel - channel_name = tuple(self.__timeseries.keys())[0] - channel = self.__timeseries[channel_name] - return self._new(timeseries={channel_name: channel[item]}) - - else: # multiple channels - ts = {} - events = set() - for k in self.channel_names: - ts[k] = self.__timeseries[k][item] - # Events outside the new domain get discarded, hence collecting the ones that remained - events.update(set(self.__timeseries[k].events)) - new = self._new(timeseries=ts, events=events) - return new - - if isinstance(item, DateTimeRange): # Pass item directly to each channel - ts = {} - events = set() - for k in self.channel_names: - res = self.__timeseries[k][item] - if res is not None: - ts[k] = res - # Events outside the new domain get discarded, hence collecting the ones that remained - events.update(set(self.__timeseries[k].events)) - - if len(ts) == 0: - raise IndexError(f"Event is outside every channel's domain.") - - new = self._new(timeseries=ts, events=events) - - - try: # to associate events, if they are inside the domain - new.associate(events) - except ValueError: - pass - - - return new - - if isinstance(item, tuple): - - # Structure-related: Channels - if all(isinstance(k, (str, BodyLocation)) and k in self.channel_names for k in item): - ts = {} - events = set() - for k in item: - ts[k] = self.__timeseries[k] - events.update(set(self.__timeseries[k].events)) - new = self._new(timeseries=ts, events=events) - return new - - # Time-related: Slices, Datetimes, Events, ... - else: - item = sorted(item) - res = None - for k in item: - if res is None: - res = self[item[0]] - else: - print(k) - res = res >> self[k] - - res.name = self.name - return res - - if isinstance(item, Timeline): - if item.is_index: - res = self[item._as_index()] - res.name += f" indexed by '{item.name}'" - return res - else: - return IndexError("This Timeline cannot serve as index, because it contains multiple groups of intervals or points.") - - raise IndexError("Index types not supported. Give a datetime (can be in string format), a slice or a tuple of those.") - - - @property - def channel_names(self) -> set[str | BodyLocation]: - '''Returns a tuple with the labels associated to every channel.''' - return set(self.__timeseries.keys()) - - @property - def name(self): - '''Returns the associated name, or 'No Name' if none was provided.''' - return self.__name if self.__name != None else "No Name" - - @name.setter - def name(self, name:str): - self.__name = name - - @property - def patient_code(self): - '''Returns the code of the associated Patient, or 'n.d.' if none was provided.''' - return self.__patient.code if self.__patient != None else 'n.d.' - - @property - def patient_conditions(self) -> Set[MedicalCondition]: - '''Returns the set of medical conditions of the associated Patient, or None if no Patient was associated.''' - return self.__patient.conditions if self.__patient != None else set() - - @property - def acquisition_location(self): - '''Returns the associated acquisition location, or None if none was provided.''' - return self.__acquisition_location - - @property - def source(self) -> BiosignalSource: - '''Returns the BiosignalSource from where the data was read, or None if was not specified.''' - return self.__source - - @property - def type(self) -> ClassVar: - '''Returns the biosignal modality class. E.g.: ECG, EMG, EDA, ...''' - return type(self) - - @property - def initial_datetime(self) -> datetime: - '''Returns the initial datetime of the channel that starts the earliest.''' - return min([ts.initial_datetime for ts in self.__timeseries.values()]) - - @property - def final_datetime(self) -> datetime: - '''Returns the final datetime of the channel that ends the latest.''' - return max([ts.final_datetime for ts in self.__timeseries.values()]) - - @property - def domain(self) -> Tuple[DateTimeRange]: - if len(self) == 1: - return tuple(self.__timeseries.values())[0].domain - else: - channels = tuple(self.__timeseries.values()) - cumulative_intersection:Tuple[DateTimeRange] - for k in range(1, len(self)): - if k == 1: - cumulative_intersection = channels[k].overlap(channels[k-1]) - else: - cumulative_intersection = channels[k].overlap(cumulative_intersection) - return cumulative_intersection - - @property - def domain_timeline(self) -> Timeline: # TODO: mmerge with domain - return Timeline(Timeline.Group(self.domain), name=self.name + ' Domain') - - @property - def subdomains(self) -> Tuple[DateTimeRange]: - if len(self) == 1: - return tuple(self.__timeseries.values())[0].subdomains - else: - raise NotImplementedError() - - def _vblock(self, i: int): - """ - Returns a block of timelly allined segments, vertially alligned for all channels. - Note: This assumes all channels are segmented in the same way, i.e., have exactly the same set of subdomains. - :param i: The block index - :return: ndarray of vertical stacked segmetns - """ - N = self._n_segments - if isinstance(N, int): - if i < N: - return np.vstack([channel[i] for channel in self.__timeseries.values()]) - else: - IndexError(f"This Biosignal as only {N} blocks.") - else: - raise AssertionError("Not all channels are segmented in the same way, hence blocks cannot be created.") - - def _block_subdomain(self, i: int) -> DateTimeRange: - if len(self) == 1: - return tuple(self.__timeseries.values())[0]._block_subdomain(i) - else: - raise NotImplementedError() - - @property - def _n_segments(self) -> int | dict: - """ - Returns the number of segments of each Timeseries. - :rtype: dict, with the number of segments labelled by channel name; or int if they are all the same - """ - n_segments = {} - last_n = None - all_equal = True - for channel_name, channel in self.__timeseries.items(): - x = channel.n_segments - last_n = x - n_segments[channel_name] = x - if last_n is not None and last_n != x: - all_equal = False - if all_equal: - return last_n - else: - return n_segments - - @property - def duration(self): - common_duration = tuple(self.__timeseries.values())[0].duration - for _, channel in self: - if channel.duration != common_duration: - raise AssertionError("Not all channels have the same duration.") - return common_duration - - def __get_events_from_medical_conditions(self): - res = {} - for condition in self.patient_conditions: - res.update(condition._get_events()) - return res - - @property - def events(self): - '''Tuple of associated Events, ordered by datetime.''' - return tuple(sorted(list(self.__associated_events.values()) + list(self.__get_events_from_medical_conditions().values()))) - - @property - def sampling_frequency(self) -> float: - '''Returns the sampling frequency of every channel (if equal), or raises an error if they are not equal.''' - if len(self) == 1: - return tuple(self.__timeseries.values())[0].sampling_frequency - else: - common_sf = None - for _, channel in self: - if common_sf is None: - common_sf = channel.sampling_frequency - elif channel.sampling_frequency != common_sf: - raise AttributeError("Biosignal contains 2+ channels, all not necessarly with the same sampling frequency.") - return common_sf - - @property - def added_noise(self): - '''Returns a reference to the noisy component, if the Biosignal was created with added noise; else the property does not exist.''' - if self.__added_noise is not None: - return self.__added_noise - else: - raise AttributeError("No noise was added to this Biosignal.") - - def __len__(self): - '''Returns the number of channels.''' - return len(self.__timeseries) - - def __repr__(self): - '''Returns a textual description of the Biosignal.''' - res = "Name: {}\nType: {}\nLocation: {}\nNumber of Channels: {}\nChannels: {}\nUseful Duration: {}\nSource: {}\n".format( - self.name, - self.type.__name__, - self.acquisition_location, - len(self), - ''.join([(x + ', ') for x in self.channel_names]), - self.duration, - self.source.__str__(None) if isinstance(self.source, ABCMeta) else str(self.source)) - - if len(self.__associated_events) != 0: - res += "Events:\n" - for event in sorted(self.__associated_events.values()): - res += '- ' + str(event) + '\n' - events_from_medical_conditions = dict(sorted(self.__get_events_from_medical_conditions().items(), key=lambda item: item[1])) - if len(events_from_medical_conditions) != 0: - res += "Events associated to Medical Conditions:\n" - for key, event in events_from_medical_conditions.items(): - res += f"- {key}:\n{event}\n" - return res - - def _to_dict(self) -> Dict[str|BodyLocation, timeseries.Timeseries]: - return deepcopy(self.__timeseries) - - def _to_array(self) -> ndarray: - """ - Converts Biosignal to a NumPy ndarray. - :return: A C x M x N array, where C is the number of channels, M the number of segments of each, and N their length. - :rtype: list[numpy.ndarray] - """ - x = [channel._to_array() for channel in self.__timeseries.values()] - return np.stack(x) - - def __iter__(self): - return self.__timeseries.items().__iter__() - - def __contains__(self, item): - if isinstance(item, str): - if item in self.__timeseries.keys(): # if channel exists - return True - if item in self.__associated_events: # if Event occurs - return True - events_from_consitions = self.__get_events_from_medical_conditions() - for label, event in events_from_consitions: - if item == label and event.domain in self: - return True - return False - elif isinstance(item, (datetime, DateTimeRange)): - for _, channel in self: - if item in channel: # if at least one channel defines this point in time - return True - return False - else: - raise TypeError(f'Cannot apply this operation with {type(item)}.') - - def __mul__(self, other): - if isinstance(other, (float, int)): - suffix = f' (dilated up by {str(other)})' if other > 1 else f' (compressed up by {str(other)})' - return self._apply_operation_and_new(lambda x: x*other, name=self.name + suffix) - - def __sub__(self, other): - return self + (other * -1) - - def __neg__(self): - return self * -1 - - def __add__(self, other): - """ - If a float or int: - Add constant to every channel. Translation of the signal. - If Biosignal: - Adds both sample-by-sample, if they have the same domain. - Notes: - - If the two Biosignals have two distinct acquisition locations, they will be lost. - - If the two Biosignals have two distinct sources, they will be lost. - - If the two Biosignals have the distict patients, they will be lost. - Raises: - - TypeError if Biosignals are not of the same type. - - ArithmeticError if Biosignals do not have the same domain. - """ - - if isinstance(other, (float, int)): - return self._apply_operation_and_new(lambda x: x+other, name=self.name + f' (shifted up by) {str(other)}') - - if isinstance(other, Biosignal): - # Check errors - if self.type != other.type: - while True: - answer = input(f"Trying to add an {self.type.__name__} with an {other.type.__name__}. Do you mean to add templeates of the second as noise? (y/n)") - if answer.lower() in ('y', 'n'): - if answer.lower() == 'y': - return Biosignal.withAdditiveNoise(self, other) - else: - raise TypeError("Cannot add a {0} to a {1} if not as noise.".format(other.type.__name__, self.type.__name__)) - if self.channel_names != other.channel_names: - raise ArithmeticError("Biosignals to add must have the same channel names.") - if self.domain != other.domain: - raise ArithmeticError("Biosignals to add must have the same domains.") - - # Prepare common metadata - name = f"{self.name} + {other.name}" - acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None - patient = self.__patient if self.patient_code == other.patient_code else None - if isclass(self.source) and isclass(other.source): # Un-instatiated sources - if self.source == other.source: - source = self.__source - else: - source = None - else: - if type(self.source) == type(other.source) and self.source == other.source: - source = self.__source - else: - source = None - - # Perform addition - res_timeseries = {} - for channel_name in self.channel_names: - res_timeseries[channel_name] = self._to_dict()[channel_name] + other._to_dict()[channel_name] - - # Union of Events - events = set(self.events).union(set(other.events)) - - return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, name=name, events=events) - - raise TypeError(f"Addition operation not valid with Biosignal and object of type {type(other)}.") - - def __and__(self, other): - """ - Joins the channels of two Biosignals of the same type, if they do not have the same set of channel names. - Notes: - - If the two Biosignals have two distinct acquisition locations, they will be lost. - - If the two Biosignals have two distinct sources, they will be lost. - - If the two Biosignals have the distict patients, they will be lost. - Raises: - - TypeError if Biosignals are not of the same type. - - ArithmeticError if both Biosignals have any channel name in common. - """ - - # Check errors - if not isinstance(other, Biosignal): - raise TypeError(f"Operation join channels is not valid with object of type {type(other)}.") - if self.type != other.type: - raise TypeError("Cannot join a {0} to a {1}".format(other.type.__name__, self.type.__name__)) - if len(self.channel_names.intersection(other.channel_names)) != 0: - raise ArithmeticError("Channels to join cannot have the same names.") - - # Prepare common metadata - name = f"{self.name} and {other.name}" - acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None - patient = self.__patient if self.patient_code == other.patient_code else None - if isclass(self.source) and isclass(other.source): # Un-instatiated sources - if self.source == other.source: - source = self.__source - else: - source = None - else: - if type(self.source) == type(other.source) and self.source == other.source: - source = self.__source - else: - source = None - - # Join channels - res_timeseries = {} - res_timeseries.update(self._to_dict()) - res_timeseries.update(other._to_dict()) - - # Union of Events - events = set(self.events).union(set(other.events)) - - return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, name=name, events=events) - - def __rshift__(self, other): - """ - Temporally concatenates two Biosignal, if they have the same set of channel names. - Notes: - - If the two Biosignals have two distinct acquisition locations, they will be lost. - - If the two Biosignals have two distinct sources, they will be lost. - - If the two Biosignals have the distict patients, they will be lost. - Raises: - - TypeError if Biosignals are not of the same type. - - ArithmeticError if both Biosignals do not have the same channel names. - - ArithmeticError if the second comes before the first. - """ - - # Check errors - if not isinstance(other, Biosignal): - raise TypeError(f"Operation join channels is not valid with object of type {type(other)}.") - if self.type != other.type: - raise TypeError("Cannot join a {0} to a {1}".format(other.type.__name__, self.type.__name__)) - if self.channel_names != other.channel_names: - raise ArithmeticError("Biosignals to concatenate must have the same channel names.") - if other.initial_datetime < self.final_datetime: - raise ArithmeticError("The second Biosignal comes before (in time) the first Biosignal.") - - # Prepare common metadata - name = f"{self.name} >> {other.name}" - acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None - patient = self.__patient if self.patient_code == other.patient_code else None - if isclass(self.source) and isclass(other.source): # Un-instatiated sources - if self.source == other.source: - source = self.__source - else: - source = None - else: - if type(self.source) == type(other.source) and self.source == other.source: - source = self.__source - else: - source = None - - # Perform concatenation - res_timeseries = {} - for channel_name in self.channel_names: - res_timeseries[channel_name] = self._to_dict()[channel_name] >> other._to_dict()[channel_name] - - # Union of Events - events = set(self.__associated_events).union(set(other._Biosignal__associated_events)) - - return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, name=name, - events=events) - - # =================================== - # Binary Logic using Time and Conditions - - def __lt__(self, other): - if isinstance(other, Biosignal): - return self.final_datetime < other.initial_datetime - else: - res = self.when(lambda x: x < other) - res.name(self.name + ' < ' + str(other)) - return res - - def __le__(self, other): - if isinstance(other, Biosignal): - return self.final_datetime <= other.initial_datetime - else: - res = self.when(lambda x: x <= other) - res.name(self.name + ' >= ' + str(other)) - return res - - def __gt__(self, other): - if isinstance(other, Biosignal): - return self.initial_datetime > other.final_datetime - else: - res = self.when(lambda x: x > other) - res.name(self.name + ' >= ' + str(other)) - return res - - def __ge__(self, other): - if isinstance(other, Biosignal): - return self.initial_datetime >= other.final_datetime - else: - res = self.when(lambda x: x >= other) - res.name(self.name + ' >= ' + str(other)) - return res - - def __eq__(self, other): - if isinstance(other, Biosignal): - return self.initial_datetime == other.initial_datetime and self.final_datetime == other.final_datetime - else: - res = self.when(lambda x: x == other) - res.name(self.name + ' >= ' + str(other)) - return res - - def __ne__(self, other): - if isinstance(other, Biosignal): - return not self.__eq__(other) - else: - res = self.when(lambda x: x != other) - res.name(self.name + ' >= ' + str(other)) - return res - - ######## Events - - def set_channel_name(self, current:str|BodyLocation, new:str|BodyLocation): - if current in self.__timeseries.keys(): - self.__timeseries[new] = self.__timeseries[current] - del self.__timeseries[current] - else: - raise AttributeError(f"Channel named '{current}' does not exist.") - - def set_event_name(self, current:str, new:str): - if current in self.__associated_events.keys(): - event = self.__associated_events[current] - self.__associated_events[new] = Event(new, event._Event__onset, event._Event__offset) - del self.__associated_events[current] - else: - raise AttributeError(f"Event named '{current}' is not associated.") - - def delete_events(self): - for _, channel in self: - channel.delete_events() - self.__associated_events = {} - - def filter(self, filter_design) -> int: - ''' - Filters every channel with to the given filter_design. - - @param filter_design: A Filter object specifying the designed filter to be applied. - @return: 0 if the filtering is applied successfully. - @rtype: int - ''' - for channel in self.__timeseries.values(): - channel._accept_filtering(filter_design) - return 0 - - def undo_filters(self): - ''' - Restores the raw samples of every channel, eliminating the action of any applied filter. - ''' - for channel in self.__timeseries.values(): - channel._undo_filters() - - def resample(self, frequency:float): - ''' - Resamples every channel to the new sampling frequency given, using Fourier method. - @param frequency: New sampling frequency (in Hertz). - ''' - for channel in self.__timeseries.values(): - channel._resample(frequency) - - def __draw_plot(self, timeseries_plotting_method, title, xlabel, ylabel, grid_on:bool, show:bool=True, save_to:str=None): - ''' - Draws a base plot to display every channel in a subplot. It is independent of the content that is plotted. - - @param timeseries_plotting_method: The method to be called in Timeseries, that defines what content to plot. - @param title: What the content is about. The Biosignal's name and patient code will be added. - @param xlabel: Label for the horizontal axis. - @param ylabel: Label for the vertical axis. - @param grid_on: True if grid in to be drawn or not; False otherwise. - @param show: True if plot is to be immediately displayed; False otherwise. - @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. - @return: - ''' - fig = plt.figure(figsize=(13, 2.5*len(self))) - - all_events = self.events - all_onsets = [e.onset for e in all_events if e.has_onset] - all_offsets = [e.offset for e in all_events if e.has_offset] - all_vlines = all_onsets+all_offsets - - for i, channel_name in zip(range(len(self)), self.channel_names): - channel = self.__timeseries[channel_name] - ax = plt.subplot(len(self), 1, i+1, title=channel_name) - ax.title.set_size(10) - ax.margins(x=0) - ax.set_xlabel(xlabel, fontsize=8, rotation=0, loc="right") - ax.set_ylabel(ylabel, fontsize=8, rotation=90, loc="top") - plt.xticks(fontsize=9) - plt.yticks(fontsize=9) - if grid_on: - ax.grid() - timeseries_plotting_method(self=channel) - - _vlines = [int((t - channel.initial_datetime).total_seconds() * channel.sampling_frequency) for t in all_vlines if t in channel] - plt.vlines(_vlines, ymin=channel.min(), ymax=channel.max(), colors='red') - - fig.suptitle((title + ' ' if title is not None else '') + self.name + ' from patient ' + str(self.patient_code), fontsize=11) - fig.tight_layout() - if save_to is not None: - fig.savefig(save_to) - plt.show() if show else plt.close() - - #return fig - - def plot_spectrum(self, show:bool=True, save_to:str=None): - ''' - Plots the Bode plot of every channel. - @param show: True if plot is to be immediately displayed; False otherwise. - @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. - ''' - self.__draw_plot(timeseries.Timeseries._plot_spectrum, 'Power Spectrum of', 'Frequency (Hz)', 'Power (dB)', True, show, save_to) - - def plot(self, show:bool=True, save_to:str=None): - ''' - Plots the amplitude in time of every channel. - @param show: True if plot is to be immediately displayed; False otherwise. - @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. - ''' - return self.__draw_plot(timeseries.Timeseries._plot, None, 'Time', 'Amplitude (n.d.)', False, show, save_to) - - @abstractmethod - def plot_summary(self, show:bool=True, save_to:str=None): - ''' - Plots a summary of relevant aspects of common analysis of the Biosignal. - ''' - pass # Implemented in each type - - def apply_operation(self, operation, **kwargs): - for channel in self.__timeseries.values(): - channel._apply_operation(operation, **kwargs) - - def invert(self, channel_label:str=None): - inversion = lambda x: -1*x - if channel_label is None: # apply to all channels - self.apply_operation(inversion) - else: # apply only to one channel - self.__timeseries[channel_label]._apply_operation(inversion) - - def associate(self, events: Event | Collection[Event] | Dict[str, Event]): - ''' - Associates an Event to all Timeseries. - Events have names that serve as keys. If keys are given, - i.e. if 'events' is a dict, then the Event names are overridden. - @param events: One or multiple Event objects. - @rtype: None - ''' - - def __add_event(event: Event): - n_channels_associated = 0 - for _, channel in self: - try: - channel.associate(event) - n_channels_associated += 1 - except ValueError: - pass - if n_channels_associated > 0: # If at least one association was possible - self.__associated_events[event.name] = event - else: - raise ValueError(f"Event '{event.name}' is outside of every channel's domain.") - - if isinstance(events, Event): - __add_event(events) - elif isinstance(events, dict): - for event_key in events: - event = events[event_key] - __add_event(Event(event_key, event._Event__onset, event._Event__offset)) # rename with given key - else: - for event in events: - __add_event(event) - - def disassociate(self, event_name:str): - ''' - Disassociates an Event from all Timeseries. - @param event_name: The name of the Event to be removed. - @rtype: None - ''' - if event_name in self.__associated_events: - for _, channel in self: - try: - channel.disassociate(event_name) - except NameError: - pass - del self.__associated_events[event_name] - else: - raise NameError(f"There's no Event '{event_name}' associated to this Biosignal.") - - @classmethod - def withAdditiveNoise(cls, original, noise, name:str = None): - """ - Creates a new Biosignal from 'original' with added 'noise'. - - :param original: (Biosignal) The original Biosignal to be contaminated with noise. - :param noise: (Noise | Timeseries | Biosignal) The noise to add to the original Biosignal. - :param name: (str) The name to associate to the resulting Biosignal. - - When 'noise' is a Noise: - - A trench of noise, with the duration of the channel, will be generated to be added to each channel. - - 'noise' should be configured with the same sampling frequency has the channels. - - When 'noise' is a Biosignal: - When it has the same set of channels as 'original', sampled at the same frequency: - - Each noisy channel will be added to the corresponding channel of 'original', in a template-wise manner. - When it has a unique channel: - - That noisy channel will be added to every channel of 'original', in a template-wise manner. - - That noisy channel should have the same sampling frequency has every channel of 'original'. - - If 'noise' has multiple segments, they are concatenated to make a hyper-template. - - Exception: in the case where both Timeseries having the same domain, the noisy samples will be added in a - segment-wise manner. - - When 'noise' is a Timeseries sampled at the same frequency of 'original': - - Its samples will be added to every channel of 'original', in a template-wise manner. - - If 'noise' has multiple segments, they are concatenated to make a hyper-template. - - Exception: in the case where both Timeseries having the same domain, the noisy samples will be added in a - segment-wise manner. - - 'noise' should have been sampled at the same frequency as 'original'. - - What is "template-wise manner"? - - If the template segment is longer than any original segment, the template segment will be trimmed accordingly. - - If the template segment is shorter than any original segment, the template will repeated in time. - - If the two segments are of equal length, they are added as they are. - - :return: A Biosignal with the same properties as the 'original', but with noise added to the samples of every channel. - :rtype: Biosignal subclass - """ - - if not isinstance(original, Biosignal): - raise TypeError(f"Parameter 'original' must be of type Biosignal; but {type(original)} was given.") - - if not isinstance(noise, (Noise, timeseries.Timeseries, Biosignal)): - raise TypeError(f"Parameter 'noise' must be of types Noise, Timeseries or Biosignal; but {type(noise)} was given.") - - if name is not None and not isinstance(name, str): - raise TypeError( - f"Parameter 'name' must be of type str; but {type(name)} was given.") - - def __add_template_noise(samples: ndarray, template: ndarray): - # Case A - if len(samples) < len(template): - _template = template[:len(samples)] # cut where it is enough - return samples + _template # add values - # Case B - elif len(samples) > len(template): - _template = np.tile(template, ceil(len(samples)/len(template))) # repeat full-pattern - _template = _template[:len(samples)] # cut where it is enough - return samples + _template # add values - # Case C - else: # equal lengths - return samples + template # add values - - def __noisy_timeseries(original:timeseries.Timeseries, noise:timeseries.Timeseries) -> timeseries.Timeseries: - # Case 1: Segment-wise - if original.domain == noise.domain: - template = [noise.samples, ] if noise.is_contiguous else noise.samples - return original._apply_operation_and_new(__add_template_noise, template=template, - iterate_over_each_segment_key='template') - # Case 2: Template-wise - elif noise.is_contiguous: - template = noise.samples - return original._apply_operation_and_new(__add_template_noise, template=template) - # Case 3: Template-wise, with hyper-template - else: - template = np.concatenate(noise.samples) # concatenate as a hyper-template - return original._apply_operation_and_new(__add_template_noise, template=template) - - noisy_channels = {} - - # Case Noise - if isinstance(noise, Noise): - for channel_name in original.channel_names: - channel = original._get_channel(channel_name) - if channel.sampling_frequency == noise.sampling_frequency: - template = noise[channel.duration] - noisy_channels[channel_name] = channel._apply_operation_and_new(__add_template_noise, template=template) - else: - raise AssertionError( - f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." - f"Suggestion: Resample one of them first.") - - # Case Timeseries - elif isinstance(noise, timeseries.Timeseries): - for channel_name in original.channel_names: - channel = original._get_channel(channel_name) - if channel.units != noise.units and channel.units != None and channel.units != Unitless and noise.units != None and noise.units != Unitless: - raise AssertionError( - f"Noise does not have the same units as channel '{channel_name}' of 'original'." - f"Suggestion: If possible, convert one of them first or drop units.") - if channel.sampling_frequency == noise.sampling_frequency: - noisy_channel = __noisy_timeseries(channel, noise) - noisy_channels[channel_name] = noisy_channel - else: - raise AssertionError( - f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." - f"Suggestion: Resample one of them first.") - - - elif isinstance(noise, Biosignal): - # Case Biosignal channel-wise - if original.channel_names == noise.channel_names: - for channel_name in original.channel_names: - original_channel = original._get_channel(channel_name) - noise_channel = noise._get_channel(channel_name) - if original_channel.units != noise_channel.units and original_channel.units != None and original_channel.units != Unitless and noise_channel.units != None and noise_channel.units != Unitless: - raise AssertionError( - f"Noise does not have the same units as channel '{channel_name}' of 'original'." - f"Suggestion: If possible, convert one of them first or drop units.") - if original_channel.sampling_frequency == noise_channel.sampling_frequency: - noisy_channel = __noisy_timeseries(original_channel, noise_channel) - noisy_channels[channel_name] = noisy_channel - else: - raise AssertionError(f"Channels '{channel_name}' do not have the same sampling frequency in 'original' and 'noise'." - f"Suggestion: Resample one of them first.") - - # Case Biosignal unique channel - elif len(noise) == 1: - _, x = tuple(iter(noise))[0] - for channel_name in original.channel_names: - channel = original._get_channel(channel_name) - if channel.units != x.units and channel.units != None and channel.units != Unitless and x.units != None and x.units != Unitless: - raise AssertionError( - f"Noise does not have the same units as channel '{channel_name}' of 'original'." - f"Suggestion: If possible, convert one of them first or drop units.") - if channel.sampling_frequency == x.sampling_frequency: - noisy_channel = __noisy_timeseries(channel, x) - noisy_channels[channel_name] = noisy_channel - else: - raise AssertionError(f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." - f"Suggestion: Resample one of them first.") - - else: - raise ArithmeticError("Noise should have 1 channel only (to be added to every channel of 'original') " - "or the same channels as 'original' (for each to be added to the corresponding channel of 'original'.") - - events = set.union(set(original.events), set(noise.events)) if isinstance(noise, (Biosignal, timeseries.Timeseries)) else None - - return original._new(timeseries = noisy_channels, name = name if name is not None else 'Noisy ' + original.name, - events = events, added_noise=noise) - - def restructure_domain(self, time_intervals:tuple[DateTimeRange]): - domain = self.domain - if len(domain) >= len(time_intervals): - for _, channel in self: - # 1. Concatenate segments - channel._concatenate_segments() - # 2. Partition according to new domain - channel._partition(time_intervals) - else: - NotImplementedError("Not yet implemented.") - - def tag(self, tags: str | tuple[str]): - """ - Mark all channels with a tag. Useful to mark machine learning targets. - :param tags: The label or labels to tag the channels. - :return: None - """ - if isinstance(tags, str): - for _, channel in self: - channel.tag(tags) - elif isinstance(tags, tuple) and all(isinstance(x, str) for x in tags): - for x in tags: - for _, channel in self: - channel.tag(x) - else: - raise TypeError("Give one or multiple string labels to tag the channels.") - - @classmethod - def fromNoise(cls, - noises: Noise | Dict[str|BodyLocation, Noise], - time_intervals: DateTimeRange | tuple[DateTimeRange], - name: str = None): - """ - Creates a type of Biosignal from a noise source. - - :param noises: - - If a Noise object is given, the Biosignal will have 1 channel for the specified time interval. - - If a dictionary of Noise objects is given, the Biosignal will have multiple channels, with different - generated samples, for the specified time interval, named after the dictionary keys. - - :param time_interval: Interval [x, y[ where x will be the initial date and time of every channel, and y will be - the final date and time of every channel; on a union of intervals, in case a tuple is given. - - :param name: The name to be associated to the Biosignal. Optional. - - :return: Biosignal subclass - """ - - if not isinstance(time_intervals, DateTimeRange) and isinstance(time_intervals, tuple) and \ - not all([isinstance(x, DateTimeRange) for x in time_intervals]): - raise TypeError(f"Parameter 'time_interval' should be of type DateTimeRange or a tuple of them.") - - if isinstance(time_intervals, tuple) and len(time_intervals) == 1: - time_intervals = time_intervals[0] - - channels = {} - - if isinstance(noises, Noise): - if isinstance(time_intervals, DateTimeRange): - samples = noises[time_intervals.timedelta] - channels[noises.name] = timeseries.Timeseries(samples, time_intervals.start_datetime, noises.sampling_frequency, - units=Unitless(), name=noises.name) - else: - segments = {x.start_datetime: noises[x.timedelta] for x in time_intervals} - channels[noises.name] = timeseries.Timeseries.withDiscontiguousSegments(segments, noises.sampling_frequency, - units=Unitless(), name=noises.name) - - elif isinstance(noises, dict): - if isinstance(time_intervals, DateTimeRange): - for channel_name, noise in noises.items(): - samples = noise[time_intervals.timedelta] - channels[channel_name] = timeseries.Timeseries(samples, time_intervals.start_datetime, noise.sampling_frequency, - units=Unitless(), name=noise.name + f" : {channel_name}") - else: - for channel_name, noise in noises.items(): - segments = {x.start_datetime: noise[x.timedelta] for x in time_intervals} - channels[channel_name] = timeseries.Timeseries.withDiscontiguousSegments(segments, noise.sampling_frequency, - units=Unitless(), name=noise.name + f" : {channel_name}") - - return cls(channels, name=name) - - def acquisition_scores(self): - print(f"Acquisition scores for '{self.name}'") - completness_score = self.completeness_score() - print("Completness Score = " + ("%.2f" % (completness_score*100) if completness_score else "n.d.") + "%") - onbody_score = self.onbody_score() - print("On-body Score = " + ("%.2f" % (onbody_score*100) if onbody_score else "n.d.") + "%") - quality_score = self.quality_score(_onbody_duration=onbody_score*self.duration) - print("Quality Score = " + ("%.2f" % (quality_score*100) if quality_score else "n.d.") + "%") - - def completeness_score(self): - recorded_duration = self.duration - expected_duration = self.final_datetime - self.initial_datetime - return recorded_duration / expected_duration - - def onbody_score(self): - if hasattr(self.source, 'onbody'): # if the BiosignalSource defines an 'onbody' method, then this score exists, it's computed and returned - x = self.source.onbody(self) - if x: - return self.source.onbody(self).duration / self.duration - - def quality_score(self, _onbody_duration=None): - if _onbody_duration: - if hasattr(self, 'acceptable_quality'): # if the Biosignal modality defines an 'acceptable_quality' method, then this score exists, it's computed and returned - return self.acceptable_quality().duration / _onbody_duration - else: - if hasattr(self, 'acceptable_quality') and hasattr(self.source, 'onbody'): - return self.acceptable_quality().duration / self.source.onbody(self).duration - - # =================================== - # SERIALIZATION - - def __getstate__(self): - """ - 1: __name (str) - 2: __source (BiosignalSource subclass (instantiated or not)) - 3: __patient (Patient) - 4: __acquisition_location (BodyLocation) - 5: __associated_events (tuple) - 6: __timeseries (dict) - """ - return (self.__SERIALVERSION, self.__name, self.__source, self.__patient, self.__acquisition_location, - tuple(self.__associated_events.values()), self.__timeseries) - - def __setstate__(self, state): - if state[0] in (1, 2): - self.__name, self.__source, self.__patient, self.__acquisition_location = state[1:5] - self.__timeseries = state[6] - self.__associated_events = {} - self.associate(state[5]) - else: - raise IOError( - f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' - f'Supported versions: 1 and 2.') - - EXTENSION = '.biosignal' - - def save(self, save_to:str): - # Check extension - if not save_to.endswith(Biosignal.EXTENSION): - save_to += Biosignal.EXTENSION - - # Make memory maps - temp_dir = mkdtemp(prefix='ltbio.') - for _, channel in self: - channel._memory_map(temp_dir) - - # Write - from _pickle import dump - with open(save_to, 'wb') as f: - dump(self, f) - - # Clean up memory maps - rmtree(temp_dir) - - @classmethod - def load(cls, filepath:str): - # Check extension - if not filepath.endswith(Biosignal.EXTENSION): - raise IOError("Only .biosignal files are allowed.") - - from _pickle import load - from _pickle import UnpicklingError - - # Read - try: # Versions >= 2023.0: - f = open(filepath, 'rb') - biosignal = load(f) - except UnpicklingError as e: # Versions 2022.0, 2022.1 and 2022.2: - from bz2 import BZ2File - # print("Loading...\nNote: Loading a version older than 2023.0 takes significantly more time. It is suggested you save this Biosignal again, so you can have it in the newest fastest format.") - f = BZ2File(filepath, 'rb') - biosignal = load(f) - f.close() - return biosignal - - -class DerivedBiosignal(Biosignal): - """ - A DerivedBiosignal is a set of Timeseries of some extracted feature from an original Biosignal. - It is such a feature that it is useful to manipulate it as any other Biosignal. - """ - - def __init__(self, timeseries, source = None, patient = None, acquisition_location = None, name = None, original: Biosignal = None): - if original is not None: - super().__init__(timeseries, original.source, original._Biosignal__patient, original.acquisition_location, original.name) - else: - super().__init__(timeseries, source, patient, acquisition_location, name) - - self.original = original # Save reference diff --git a/src/ltbio/biosignals/modalities/EDA.py b/src/ltbio/biosignals/modalities/EDA.py deleted file mode 100644 index 75b9d677..00000000 --- a/src/ltbio/biosignals/modalities/EDA.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: EDA -# Description: Class EDA, a type of Biosignal named Electrodermal Activity. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 12/05/2022 -# Last Updated: 07/07/2022 - -# =================================== -from datetime import timedelta - -from datetimerange import DateTimeRange -from numpy import mean - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.biosignals.timeseries.Unit import Volt, Multiplier - - -class EDA(Biosignal): - - DEFAULT_UNIT = Volt(Multiplier.m) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(EDA, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - @property - def preview(self): - """Returns 2 minutes of the middle of the signal.""" - domain = self.domain - middle_of_domain: DateTimeRange = domain[len(domain) // 2] - middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) - try: - return self[middle - timedelta(seconds=2): middle + timedelta(minutes=2)] - except IndexError: - raise AssertionError( - f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") - - @staticmethod - def racSQI(samples): - """ - Rate of Amplitude change (RAC) - It is recomended to be analysed in windows of 2 seconds. - """ - max_, min_ = max(samples), min(samples) - amplitude = max_ - min_ - return abs(amplitude / max_) - - def acceptable_quality(self): # -> Timeline - """ - Suggested by Böttcher et al. Scientific Reports, 2022, for wearable wrist EDA. - """ - return self.when(lambda x: mean(x) > 0.05 and EDA.racSQI(x) < 0.2, window=timedelta(seconds=2)) diff --git a/src/ltbio/biosignals/modalities/EEG.py b/src/ltbio/biosignals/modalities/EEG.py deleted file mode 100644 index f45fef76..00000000 --- a/src/ltbio/biosignals/modalities/EEG.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: EEG -# Description: Class EEG, a type of Biosignal named Electroencephalogram. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 12/05/2022 -# Last Updated: 07/07/2022 - -# =================================== - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.biosignals.timeseries.Unit import Volt, Multiplier - - -class EEG(Biosignal): - - DEFAULT_UNIT = Volt(Multiplier.m) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(EEG, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass diff --git a/src/ltbio/biosignals/modalities/EMG.py b/src/ltbio/biosignals/modalities/EMG.py deleted file mode 100644 index 7f1462ce..00000000 --- a/src/ltbio/biosignals/modalities/EMG.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: EMG -# Description: Class EMG, a type of Biosignal named Electromyogram. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 12/05/2022 -# Last Updated: 07/07/2022 - -# =================================== - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.biosignals.timeseries.Unit import Volt, Multiplier - - -class EMG(Biosignal): - - DEFAULT_UNIT = Volt(Multiplier.m) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(EMG, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass diff --git a/src/ltbio/biosignals/modalities/HR.py b/src/ltbio/biosignals/modalities/HR.py deleted file mode 100644 index ae28401a..00000000 --- a/src/ltbio/biosignals/modalities/HR.py +++ /dev/null @@ -1,45 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: HR -# Description: Class HR, a pseudo-type of Biosignal named Heart Rate. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 02/06/2022 -# Last Updated: 16/07/2022 - -# =================================== - -from ltbio.biosignals.modalities.Biosignal import DerivedBiosignal -from ltbio.biosignals.modalities.ECG import ECG, RRI -from ltbio.biosignals.modalities.PPG import PPG, IBI -from ltbio.biosignals.timeseries.Unit import BeatsPerMinute - - -class HR(DerivedBiosignal): - - DEFAULT_UNIT = BeatsPerMinute() - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: RRI | IBI | ECG | PPG | None = None): - super(HR, self).__init__(timeseries, source, patient, acquisition_location, name, original) - - @classmethod - def fromRRI(cls): - pass - - @classmethod - def fromIBI(cls): - pass - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - def acceptable_quality(self): # -> Timeline - """ - Acceptable physiological values - """ - return self.when(lambda x: 40 <= x <= 200) # between 40-200 bpm diff --git a/src/ltbio/biosignals/modalities/MultimodalBiosignal.py b/src/ltbio/biosignals/modalities/MultimodalBiosignal.py deleted file mode 100644 index ae8cce34..00000000 --- a/src/ltbio/biosignals/modalities/MultimodalBiosignal.py +++ /dev/null @@ -1,104 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: MultimodalBiosignal -# Description: Class MultimodalBiosignal that can hold multiple modalities of Biosignals. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 08/07/2022 - -# =================================== - -from typing import Set - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.biosignals.sources.BiosignalSource import BiosignalSource -from ltbio.clinical.BodyLocation import BodyLocation - - -class MultimodalBiosignal(Biosignal): - - def __init__(self, **biosignals): - - timeseries = {} - #sources = {} - patient = None - #locations = {} - name = "Union of" - events = {} - - for label, biosignal in biosignals.items(): - if patient is None: - patient = biosignal._Biosignal__patient - elif patient != biosignal._Biosignal__patient: - raise ValueError("When joining Biosignals, they all must be from the same Patient.") - - for channel_label, ts in biosignal._to_dict().items(): - timeseries[label+':'+channel_label] = ts # Join Timeseries in a single dictionary - - #sources[label] = biosignal.source # Join sources - - #if biosignal.acquisition_location is not None: - # locations[label] = biosignal.acquisition_location - - name += f" '{biosignal.name}'," if biosignal.name != "No Name" else f" '{label}'," - - for event in biosignal.events: - if event.name in events and events[event.name] != event: - raise ValueError("There are two event names associated to different onsets/offsets in this set of Biosignals.") - else: - events[event.name] = event - - super(MultimodalBiosignal, self).__init__(timeseries, None, patient, None, name[:-1]) - self.associate(events) - self.__biosignals = biosignals - - if (len(self.type)) == 1: - raise TypeError("Cannot create Multimodal Biosignal of just 1 modality.") - - @property - def type(self): - return {biosignal.type for biosignal in self.__biosignals.values()} - - @property - def source(self) -> Set[BiosignalSource]: - return {biosignal.source for biosignal in self.__biosignals.values()} - - @property - def acquisition_location(self) -> Set[BodyLocation]: - return {biosignal.acquisition_location for biosignal in self.__biosignals.values()} - - def __getitem__(self, item): - if isinstance(item, tuple): - if len(item) == 2: - biosignal = self.__biosignals[item[0]] - return biosignal[item[1]] - - elif isinstance(item, str) and item in self.__biosignals.keys(): - return self.__biosignals[item] - - raise IndexError("Indexing a Multimodal Biosignal should have two arguments, like 'multisignal['ecg'][V5]," - "where 'ecg' is the Biosignal to address and 'V5' is the channel to get.") - - def __contains__(self, item): - if isinstance(item, str) and item in self.__biosignals.keys(): - return True - if isinstance(item, Biosignal) and item in self.__biosignals.values(): - return True - - super(MultimodalBiosignal, self).__contains__(item) - - def __str__(self): - '''Returns a textual description of the MultimodalBiosignal.''' - res = f"MultimodalBiosignal containing {len(self.__biosignals)}:\n" - for i, biosignal in enumerate(self.__biosignals): - res += "({})\n{}".format(i, str(biosignal)) - return res - - def plot_summary(self, show: bool = True, save_to: str = None): - raise TypeError("Functionality not available for Multimodal Biosignals.") - diff --git a/src/ltbio/biosignals/modalities/PPG.py b/src/ltbio/biosignals/modalities/PPG.py deleted file mode 100644 index 7129d713..00000000 --- a/src/ltbio/biosignals/modalities/PPG.py +++ /dev/null @@ -1,76 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: PPG -# Description: Class PPG, a type of Biosignal named Photoplethysmogram. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 12/05/2022 -# Last Updated: 09/07/2022 - -# =================================== -from datetime import timedelta - -import numpy as np -from scipy.signal import welch - -from ltbio.biosignals.modalities.Biosignal import Biosignal, DerivedBiosignal -from ltbio.biosignals.timeseries.Unit import Second - - -class PPG(Biosignal): - - DEFAULT_UNIT = None - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, **options): - super(PPG, self).__init__(timeseries, source, patient, acquisition_location, name, **options) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - def acceptable_quality(self): # -> Timeline - """ - Suggested for wearable wrist PPG by: - - Glasstetter et al. MDPI Sensors, 21, 2021 - - Böttcher et al. Scientific Reports, 2022 - """ - - sfreq = self.sampling_frequency - nperseg = int(4 * self.sampling_frequency) # 4 s window - fmin = 0.1 # Hz - fmax = 5 # Hz - - def spectral_entropy(x, sfreq, nperseg, fmin, fmax): - if len(x) < nperseg: # if segment smaller than 4s - nperseg = len(x) - noverlap = int(0.9375 * nperseg) # if nperseg = 4s, then 3.75 s of overlap - f, psd = welch(x, sfreq, nperseg=nperseg, noverlap=noverlap) - idx_min = np.argmin(np.abs(f - fmin)) - idx_max = np.argmin(np.abs(f - fmax)) - psd = psd[idx_min:idx_max] - psd /= np.sum(psd) # normalize the PSD - entropy = -np.sum(psd * np.log2(psd)) - N = idx_max - idx_min - entropy_norm = entropy / np.log2(N) - return entropy_norm - - return self.when(lambda x: spectral_entropy(x, sfreq, nperseg, fmin, fmax) < 0.8, window=timedelta(seconds=4)) - - -class IBI(DerivedBiosignal): - - DEFAULT_UNIT = Second() - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: PPG | None = None): - super().__init__(timeseries, source, patient, acquisition_location, name, original) - - @classmethod - def fromPPG(cls): - pass - - def plot_summary(self, show: bool = True, save_to: str = None): - pass diff --git a/src/ltbio/biosignals/modalities/RESP.py b/src/ltbio/biosignals/modalities/RESP.py deleted file mode 100644 index a4a852c4..00000000 --- a/src/ltbio/biosignals/modalities/RESP.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: RESP -# Description: Class RESP, a type of Biosignal named Respiration. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 12/05/2022 -# Last Updated: 29/06/2022 - -# =================================== - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.biosignals.timeseries.Unit import Volt, Multiplier - - -class RESP(Biosignal): - - DEFAULT_UNIT = Volt(Multiplier.m) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(RESP, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show:bool=True, save_to:str=None): - pass \ No newline at end of file diff --git a/src/ltbio/biosignals/modalities/TEMP.py b/src/ltbio/biosignals/modalities/TEMP.py deleted file mode 100644 index 838c14c4..00000000 --- a/src/ltbio/biosignals/modalities/TEMP.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignalss -# Module: TEMP -# Description: Class TEMP, a type of Biosignal named Temperature. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 15/06/2022 -# Last Updated: 09/07/2022 - -# =================================== -from datetime import timedelta - -from datetimerange import DateTimeRange -from numpy import mean - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.biosignals.timeseries.Unit import DegreeCelsius, Multiplier - - -class TEMP(Biosignal): - - DEFAULT_UNIT = DegreeCelsius(Multiplier._) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(TEMP, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - @property - def preview(self): - """Returns 2 minutes of the middle of the signal.""" - domain = self.domain - middle_of_domain: DateTimeRange = domain[len(domain) // 2] - middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) - try: - return self[middle - timedelta(seconds=2): middle + timedelta(minutes=2)] - except IndexError: - raise AssertionError( - f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") - - @staticmethod - def racSQI(samples): - """ - Rate of Amplitude change (RAC) - It is recomended to be analysed in windows of 2 seconds. - """ - max_, min_ = max(samples), min(samples) - amplitude = max_ - min_ - return abs(amplitude / max_) - - def acceptable_quality(self): # -> Timeline - """ - Suggested by Böttcher et al. Scientific Reports, 2022, for wearable wrist TEMP. - """ - return self.when(lambda x: 25 < mean(x) < 40 and TEMP.racSQI(x) < 0.2, window=timedelta(seconds=2)) diff --git a/src/ltbio/biosignals/modalities/__init__.py b/src/ltbio/biosignals/modalities/__init__.py deleted file mode 100644 index 7155f885..00000000 --- a/src/ltbio/biosignals/modalities/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Quick shortcuts to classes -from ltbio.biosignals.modalities.ACC import ACC, ACCMAG -from ltbio.biosignals.modalities.ECG import ECG, RRI -from ltbio.biosignals.modalities.EDA import EDA -from ltbio.biosignals.modalities.EEG import EEG -from ltbio.biosignals.modalities.EMG import EMG -from ltbio.biosignals.modalities.HR import HR -from ltbio.biosignals.modalities.PPG import PPG, IBI -from ltbio.biosignals.modalities.RESP import RESP -from ltbio.biosignals.modalities.TEMP import TEMP diff --git a/src/ltbio/biosignals/sources.py b/src/ltbio/biosignals/sources.py new file mode 100644 index 00000000..2a3502ef --- /dev/null +++ b/src/ltbio/biosignals/sources.py @@ -0,0 +1,1311 @@ +# -- encoding: utf-8 -- + +# =================================== + +# IT - LongTermBiosignals + +# Package: src/ltbio/biosignals +# Module: sources +# Description: + +# Contributors: João Saraiva, Mariana Abreu +# Created: 25/04/2022 +# Last Updated: 29/06/2022 + +# =================================== + +from abc import ABC, abstractmethod + +from . import Event +from numpy import array + + +class __BiosignalSource(ABC): + + __SERIALVERSION: int = 1 + + def __init__(self): + pass + + @abstractmethod + def __repr__(self): + pass + + def __eq__(self, other): + return type(self) == type(other) + + @staticmethod + @abstractmethod + def _timeseries(path:str, type, **options): + pass + + @staticmethod + def _events(path:str, **options) -> tuple[Event] | None: + return None # Override implementation is optional + + @staticmethod + @abstractmethod + def _write(path:str, timeseries:dict): + pass + + @staticmethod + @abstractmethod + def _transfer(samples:array, type) -> array: + pass + + @classmethod + def _get(cls, path:str, type, **options): + return { + 'timeseries': cls._timeseries(path, type, **options), + 'patient': cls._patient(path, **options), + 'acquisition_location': cls._acquisition_location(path, type, **options), + 'events': cls._events(path, **options), + 'name': cls._name(path, type, **options) + } + + @staticmethod + def _patient(path, **options): + return None # Override implementation is optional + + @staticmethod + def _acquisition_location(path, type, **options): + return None # Override implementation is optional + + @staticmethod + def _name(path, type, **options): + return None # Override implementation is optional + + def __getstate__(self): + """ + 1: other... (dict) + """ + other_attributes = self.__dict__.copy() + return (self.__SERIALVERSION, ) if len(other_attributes) == 0 else (self.__SERIALVERSION, other_attributes) + + def __setstate__(self, state): + if state[0] == 1: + if len(state) == 2: + self.__dict__.update(state[1]) + else: + raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' + f'Supported versions: 1.') + + +# =================================== +# Hospitals and Clinics +# =================================== + + +from neo import MicromedIO +from numpy import array + +from ..sources.BiosignalSource import BiosignalSource + + +class HEM(BiosignalSource): + '''This class represents the source of Hospital de Santa Maria (Lisboa, PT) and includes methods to read and write + biosignal files provided by them. Usually they are in the European EDF/EDF+ format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "Hospital Egas Moniz" + + @staticmethod + def __read_trc(list, metadata=False): + """ + Return trc file information, whether it is the values or the metadata, according to boolean metadata + :param list + :param metadata + + """ + dirfile = list[0] + sensor = list[1] + # get edf data + seg_micromed = MicromedIO(dirfile) + hem_data = seg_micromed.read_segment() + hem_sig = hem_data.analogsignals[0] + ch_list = seg_micromed.header['signal_channels']['name'] + # get channels that correspond to type (POL Ecg = type ecg) + find_idx = [hch for hch in range(len(ch_list)) if sensor.lower() in ch_list[hch].lower()] + # returns ch_list of interest, sampling frequency, initial datetime + if metadata: + return ch_list[find_idx], float(hem_sig.sampling_rate), hem_data.rec_datetime, hem_sig.units + # returns initial date and samples + print(ch_list[find_idx]) + return array(hem_sig[:, find_idx].T), hem_data.rec_datetime, ch_list[find_idx] + + @staticmethod + def _timeseries(dir, type, **options): + '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient.''' + # first a list is created with all the filenames that end in .edf and are inside the chosen dir + # this is a list of lists where the second column is the type of channel to extract + if type is modalities.ECG: + label = 'ecg' + all_files = sorted([[path.join(dir, file), label] for file in listdir(dir) if file.lower().endswith('.trc')]) + # run the edf read function for all files in list all_files + channels, sfreq, start_datetime, units = HEM.__read_trc(all_files[0], metadata=True) + all_trc = list(map(HEM.__read_trc, all_files)) + # run the trc read function for all files in list all_files + new_dict, first_time = {}, all_trc[0][1] + # TODO ADD UNITS TO TIMESERIES + for channel in channels: + last_start = all_trc[0][1] + segments = {last_start: all_trc[0][0][list(all_trc[0][2]).index(channel)]} + for at, trc_data in enumerate(all_trc[1:]): + if channel not in trc_data[2]: + continue + ch = list(trc_data[2]).index(channel) + final_time = all_trc[at][1] + timedelta(seconds=len(all_trc[at][0][ch])/sfreq) + if trc_data[1] <= final_time: + if (final_time - trc_data[1]) < timedelta(seconds=1): + segments[last_start] = np.append(segments[last_start], trc_data[0][ch]) + else: + continue + print('here') + else: + segments[trc_data[1]] = trc_data[0][ch] + last_start = trc_data[1] + + if len(segments) > 1: + new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch]) + else: + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch]) + new_dict[channels[ch]] = new_timeseries + + return new_dict + + @staticmethod + def _write(path: str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass + + from ..sources.BiosignalSource import BiosignalSource + + class HSM(BiosignalSource): + '''This class represents the source of Hospital de Santa Maria (Lisboa, PT) and includes methods to read and write + biosignal files provided by them. Usually they are in the European EDF/EDF+ format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "Hospital de Santa Maria" + + @staticmethod + def __read_edf(list, metadata=False): + + """ + Reads one edf file + If metadata is True - returns list of channels and sampling frequency and initial datetime + Else return arrays one for each channel + """ + dirfile = list[0] + sensor = list[1] + # get edf data + hsm_data = read_raw_edf(dirfile) + # get channels that correspond to type (POL Ecg = type ecg) + channel_list = [hch for hch in hsm_data.ch_names if sensor.lower() in hch.lower()] + # initial datetime + if metadata: + return channel_list, hsm_data.info['sfreq'] + # structure of hsm_sig is two arrays, the 1st has one array for each channel and the 2nd is an int-time array + hsm_sig = hsm_data[channel_list] + + return hsm_sig[0], hsm_data.info['meas_date'].replace(tzinfo=None) + + @staticmethod + def _timeseries(dir, type, **options): + '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient.''' + if type is modalities.ECG: + label = 'ecg' + if type is modalities.EMG: + label = 'emg' + if type is modalities.EEG: + label = 'eeg' + # first a list is created with all the filenames that end in .edf and are inside the chosen dir + # this is a list of lists where the second column is the type of channel to extract + all_files = sorted([[path.join(dir, file), label] for file in listdir(dir) if file.endswith('.edf')]) + # run the edf read function for all files in list all_files + channels, sfreq = HSM.__read_edf(all_files[0], metadata=True) + all_edf = list(map(HSM.__read_edf, all_files)) + new_dict = {} + for ch in range(len(channels)): + segments = {edf_data[1]: edf_data[0][ch] for edf_data in all_edf} + if len(segments) > 1: + new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch]) + else: + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch]) + new_dict[channels[ch]] = new_timeseries + return new_dict + + @staticmethod + def _write(path: str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass + + +# =================================== +# General-purpose Devices +# =================================== + + +import configparser +from ast import literal_eval +from datetime import timedelta +from json import load +from os import listdir, path, access, R_OK +from os.path import getsize +from warnings import warn + +import numpy as np +from dateutil.parser import parse as to_datetime + +from .. import timeseries +from .. import modalities +from ..sources.BiosignalSource import BiosignalSource +from ltbio.clinical.BodyLocation import BodyLocation + + +class Sense(BiosignalSource): + + # Sense Defaults files use these keys: + MODALITIES = 'modalities' + CHANNEL_LABELS = 'labels' + BODY_LOCATION = 'location' + + # Sense csv data files use these keys: + KEY_CH_LABELS_IN_HEADER = 'Channel Labels' + KEY_HZ_IN_HEADER = 'Sampling rate (Hz)' + KEY_TIME_IN_HEADER = 'ISO 8601' + ANALOGUE_LABELS_FORMAT = 'AI{0}_raw' + + # These are needed to map channels to biosignal modalities + DEFAULTS_PATH: str + DEVICE_ID: str + + # Flag to deal with badly-formatted CSV files + BAD_FORMAT = False + + def __init__(self, device_id:str, defaults_path:str=None): + super().__init__() + self.__device_id = device_id + Sense.DEVICE_ID = device_id + if defaults_path is not None: + Sense.DEFAULTS_PATH = defaults_path + else: + if not path.exists('resources/config.ini'): + raise FileNotFoundError('No config.ini was found.') + try: + config = configparser.ConfigParser() + config.read('resources/config.ini') + Sense.DEFAULTS_PATH = config['DEFAULT']['Sense'] + print(f"Getting default mapping from {Sense.DEFAULTS_PATH}") + except IndexError: + raise KeyError("No defaults file indicated 'Sense' devices in config.ini.") + self.__defaults_path = defaults_path + + Sense.BAD_FORMAT = False + + def __repr__(self): + return "ScientISST Sense" + + + @staticmethod + def __aux_date(header): + """ Get starting time from header. """ + return to_datetime(header[Sense.KEY_TIME_IN_HEADER], ignoretz=True) + + @staticmethod + def __check_empty(len_, type=''): + """ Confirm if the length is acceptable and return the desired output. """ + if type == 'file_size': + if len_ <= 50: + return True + else: + if len_ < 1: + return True + return False + + @staticmethod + def __get_mapping(biosignal_type, channel_labels, modalities_available): + """ + Given a header, find all indexes that correspond to biosignal modality of interest. + It REQUIRES a default mapping to be specified in a JSON file, otherwise a mapping will be requested on the stdin and saved for future use. + + @param header: A list of strings corresponding to column names. + @param biosignal_type: Biosignal subclass indicating which modality is of interest. + @param defaults_path: The path to the JSON file containing the mapping in the correct syntax. + + @rtype: tuple + @return: A tuple with: + a) A dictionary with the indexes corresponding to the biosignal modality of interest mapped to a channel label. Optionally, it can have a key Sense.BODY_LOCATION mapped to some body location. + E.g.: {1: 'Label of channel 1', 3: 'Label of channel 3'} + b) A body location (in str) or None + """ + + mapping = {} + + if biosignal_type.__name__ in str(modalities_available): + for index in modalities_available[biosignal_type.__name__]: + # Map each analogue channel of interest to a label + mapping[index] = channel_labels[str(index)] + else: + raise IOError(f"There are no analogue channels associated with {biosignal_type.__name__}") + + return mapping + + @staticmethod + def __get_defaults(): + """ + Gets the default mapping of channels for a device. + + @return: A tuple with + a) modalities: A dictionary mapping biosignal modalities to column indexes; + b) channel_labels: A dictionary mapping each column index to a meaningful channel label; + c) body_location: A string associated with a body location. + @rtype: tuple of size 3 + """ + + if not hasattr(Sense, 'DEVICE_ID'): + raise IOError("Unlike other BiosignalSource(s), Sense needs to be instantiated and a 'device_id' must be provided on instantiation.") + + # Check if file exists and it is readable + if path.isfile(Sense.DEFAULTS_PATH) and access(Sense.DEFAULTS_PATH, R_OK): + + # OPTION A: Use the mapping in the json file + with open(Sense.DEFAULTS_PATH, 'r') as json_file: + json_string = load(json_file) + + # Get mapping of modalities + if Sense.MODALITIES in json_string[Sense.DEVICE_ID]: + modalities = json_string[Sense.DEVICE_ID][Sense.MODALITIES] + else: + raise IOError(f"Key {Sense.MODALITIES} is mandatory for each device default mapping.") + + # Get mapping of channel labels, if any + if Sense.CHANNEL_LABELS in json_string[Sense.DEVICE_ID]: + channel_labels = json_string[Sense.DEVICE_ID][Sense.CHANNEL_LABELS] + else: + channel_labels = None + + # Get body location, if any + if Sense.BODY_LOCATION in json_string[Sense.DEVICE_ID]: + body_location = json_string[Sense.DEVICE_ID][Sense.BODY_LOCATION] + if body_location.startswith('BodyLocation.'): + body_location:BodyLocation = eval(body_location) + else: + body_location = None + + return modalities, channel_labels, body_location + + # File does not exist; creates one + else: + print("Either Sense defaults file is missing or it is not readable. Creating new defaults...") + # OPTION B: Ask and save a new mapping + json_string = {} + json_string[Sense.DEVICE_ID] = {} # Create a new object for a new device mapping + # B1. Input modalities + # B2. Input Channel labels + # B3. Input Body Location + # TODO: Use stdin to ask for default, save it, and return it + + @staticmethod + def __get_header(file_path): + """ + Auxiliary procedures to find the header (1st line) and column names (2nd line) of the file in the given path. + @param file_path: The path of the file to look for a header. + @return: A tuple with: + a) header: A dictionary with the header metadata. + b) column_names: A list of the column names. + @raise: + IOError: If the given file path does not exist. + """ + with open(file_path) as fh: + header = next(fh)[1:] # Read first line + header = literal_eval(header) # Get a dictionary of the header metadata + column_names = next(fh)[1:] # Read second line + column_names = column_names.split() # Get a list of the column names + return header, column_names + + @staticmethod + def __get_samples(file_path): + """ + Auxiliary procedures to find the samples (> 3rd line) of the file in the given path. + @param file_path: The path of the file to look for a header. + @return: A np.array of the data. + @raise: + IOError: If the given file path does not exist. + """ + with open(file_path) as fh: + # Dismiss header (it is in the first line) + header = next(fh)[1:] + next(fh) + # Get the remaining data, i.e., the samples + data = [line.strip().split() for line in fh] + try: + return np.array(data, float) + except ValueError: # In July 2022, it could occur that SENSE files could present Bad Format. + Sense.BAD_FORMAT = True + all_segments = [] + start_indices = [0, ] + # In that case, we need to separate each valid segment of samples. + correct_length = len(data[0]) # FIXME: Assuming first line is syntax-valid. Poor verification, though. + for i in range(len(data)): + if len(data[i]) != correct_length: # Bad syntax found + warn(f"File '{file_path}' has bad syntax on line {i}. This portion was dismissed.") + # Trim the end of data + for j in range(i-1, 0, -1): + if data[j][0] == '15': # Look for NSeq == 15 + all_segments.append(np.array(data[start_indices[-1]:j + 1], float)) # append "old" segment + break + # Trim the beginning of new segment + for j in range(i+1, len(data), 1): + if data[j][0] == '0': # Look for NSeq == 0 + start_indices.append(j) + break + + all_segments.append(np.array(data[start_indices[-1]:], float)) # append last "new" segment + return all_segments, start_indices + + + @staticmethod + def __read_file(file_path, type, channel_labels, modalities_available): + """ + Reads one csv file + Args: + list_ (list): contains the file path + metadata (bool): defines whether only metadata or actual timeseries values should be returned + sensor_idx (list): list of indexes that correspond to the columns of sensor to extract + sensor_names (list): list of names that correspond to the sensor label + ex: sensor='ECG', sensor_names=['ECG_chest'] + ex: sensor='ACC', options['location']='wrist', sensor_names=['ACCX_wrist','ACCY_wrist','ACCZ_wrist'] + device (str): device MacAddress, this is used to get the specific header, specially when using 2 devices + **options (dict): equal to _read arg + + @return: A tuple with: + a) sensor_data (np.array): 2-dimensional array of time over sensors columns. + b) date (datetime): initial datetime of samples. + d) sampling_frequency (float): The sampling frequency, in Hertz, of the read samples. + + @raise: + IOError: if sensor_names is empty, meaning no channels could be retrieved for chosen sensor + """ + + # STEP 1 + # Get header + header, column_names = Sense.__get_header(file_path) + + # STEP 2 + # Get all samples + all_samples = Sense.__get_samples(file_path) + + # STEP 3 + # Raise Error if file is empty + if not Sense.BAD_FORMAT and Sense.__check_empty(len(all_samples)): + raise IOError(f'Empty file: {file_path}.') + + # STEP 4 + # Get analogue channels of interest, mapped to labels, and a body location (if any associated) + mapping = Sense.__get_mapping(type, channel_labels, modalities_available) + + # STEP 5 + # Get initial date and sampling frequency + date = Sense.__aux_date(header) + sf = header[Sense.KEY_HZ_IN_HEADER] + + # STEP 6 + # Filtering only the samples of the channels of interest + if not Sense.BAD_FORMAT: + samples_of_interest = {} + for ix in mapping: + label = mapping[ix] + samples_of_interest[label] = all_samples[:, column_names.index(Sense.ANALOGUE_LABELS_FORMAT.format(str(ix)))] + # return dict, start date, sampling frequency + return samples_of_interest, date, sf + else: + samples_of_interest_by_segment, start_dates = [], [] + all_segments, start_indices = all_samples + for segment, start_index in zip(all_segments, start_indices): + start_dates.append(date + timedelta(seconds=start_index/sf)) + samples_of_interest = {} + for ix in mapping: + label = mapping[ix] + samples_of_interest[label] = segment[:, column_names.index(Sense.ANALOGUE_LABELS_FORMAT.format(str(ix)))] + samples_of_interest_by_segment.append(samples_of_interest) + # return segments, start dates, sampling frequency + return samples_of_interest_by_segment, start_dates, sf + + + @staticmethod + def _timeseries(dir, type, **options): + """Reads multiple csv files on the directory 'path' and returns a Biosignal associated with a Patient. + @param dir (str): directory that contains Sense files in csv format + @param type (subclass of Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG + @param **options (dict): + defaults_path (str): if the user wants to use a json to save and load bitalino configurations + device_id (str): directory to json file. If not defined, a default will be set automatically + + @return: A typical dictionary like {str: Timeseries}. + + @raise: + IOError: If there are no Sense files in the given directory. + IOError: If Sense files have no header. + """ + + # STEP 0 - Get defaults + modalities_available, channel_labels, _ = Sense.__get_defaults() + + # STEP 1 - Get files + # A list is created with all the filenames that end with '.csv' inside the given directory. + # E.g. [ file1.csv, file.2.csv, ... ] + all_files = [path.join(dir, file) for file in listdir(dir) if file.endswith('.csv')] + if not all_files: + raise IOError(f"No files in {dir}.") + + # STEP 2 - Convert channel labels to BodyLocations, if any + for position, label in channel_labels.items(): + if label.startswith('BodyLocation.'): + channel_labels[position]:BodyLocation = eval(label) + + # STEP 3 - Read files + # Get samples of analogue channels of interest from each file + data = [] + for file in all_files: + if getsize(file) == 0: + warn(f"File '{file}' has 0 bytes. Its reading was dismissed.") + continue + what_is_read = Sense.__read_file(file, type, channel_labels, modalities_available) + if not Sense.BAD_FORMAT: + data.append(what_is_read) + else: + samples_of_interest_by_segment, start_dates, sf = what_is_read + for segment, start_date in zip(samples_of_interest_by_segment, start_dates): + data.append((segment, start_date, sf)) + Sense.BAD_FORMAT = False # done dealing with a bad format + + # E.g.: data[k] = samples_of_interest, start_date, sampling_frequency + + # STEP 4 - Restructuring + # Listing all Segments of the same channel together, labelled to the same channel label. + res = {} + segments = {} + for samples, date, sf in data: + for channel in samples: + # instantiating or appending + if channel not in res: + segments[channel] = {date: samples[channel]} + else: + segments[channel][date] = samples[channel] + res[channel] = sf # save sampling frequency here to be used on the next loop + + # Encapsulating the list of Segments of the same channel in a Timeseries + for channel in segments: + if len(segments[channel]) > 1: + res[channel] = timeseries.Timeseries.withDiscontiguousSegments(segments[channel], sampling_frequency=res[channel]) + else: + res[channel] = timeseries.Timeseries(tuple(segments[channel].values())[0], tuple(segments[channel].keys())[0], sampling_frequency=res[channel]) + + return res + + @staticmethod + def _acquisition_location(path, type, **options): + _, _, bl = Sense.__get_defaults() + return bl + + @staticmethod + def _write(dir, timeseries): + pass # TODO + + @staticmethod + def _transfer(samples, to_unit): + pass + + + +class Bitalino(BiosignalSource): + def __init__(self): + super().__init__() + + def __repr__(self): + return "Bitalino" + + def __aux_date(header): + """ + Get starting time from header + """ + time_key = [key for key in header.keys() if 'time' in key][0] + try: + return to_datetime(header['date'].strip('\"') + ' ' + header[time_key].strip('\"')) + except Exception as e: + print(e) + + def __check_empty(len_, type=''): + """ + Confirm if the length is acceptable and return the desired output + """ + if type == 'file_size': + if len_ <= 50: + return True + else: + if len_ < 1: + return True + return False + + def __change_sens_list(sens, device, channels): + """ + Confirm if the list of sensors has only RAW as labels, and ask the user for new labels in that case. + """ + if list(set(sens)) == ['RAW']: + print(f'Please update sens according to the sensors used:') + analogs = channels[-len(sens):] + for se in range(len(sens)): + new_se = str(input(f'{device} -- {sens[se]} -- {analogs[se]}')).upper() + sens[se] = new_se + return sens + + def __analog_idx(header, sensor, **options): + """ + From a header choose analog sensor key idx that correspond to a specific sensor. + This also runs read json to save configurations to facilitate implementation + This function leads with several devices and it returns a list that may contain one or several integers + """ + sensor_idx, sensor_names, json_bool, chosen_device = [], [], False, '' + # if options and json key, get json to calculate + if options: + if 'json' in options.keys(): + json_bool = options['json'] + json_dir = options['json_dir'] if 'json_dir' in options.keys() \ + else path.join(getcwd(), 'bitalino.json') + len_ch = 0 + for device in header.keys(): + chosen_device = device + sens_id = '' + # iterate over each device + if json_bool: + sens, ch, location = Bitalino.__read_json(json_dir, header[device]) + else: + sens = header[device][str(input(f'What is the header key of sensor names? {header}\n ')).strip().lower()] + ch = header[device][str(input(f'What is the header key for analog channels? {header}\n ')).strip().lower()] + location = str(input(f'What is the body location of this device {device}? \n')) + sens = Bitalino.__change_sens_list(sens, device, ch) + analogs = ch[-len(sens):] + + if sensor in str(sens): + # add other column devices as offset to the column to retrieve + location_bool = True + if 'location' in options.keys(): + if location.lower() not in options['location'].lower(): + location_bool = False + sens_id = [lab + '_' + location for lab in sens if sensor in lab.upper() and location_bool] + sensor_idx += [len_ch + ch.index(analogs[sens.index(sid.split('_')[0])]) for sid in sens_id] + if sens_id != '': + chosen_device = device + len_ch = len(ch) + sensor_names += sens_id + + return sensor_idx, sensor_names, chosen_device + + def __read_json(dir_, header): + # check if bitalino json exists and returns the channels and labels and location + if path.isfile(dir_) and access(dir_, + R_OK): + # checks if file exists + with open(dir_, 'r') as json_file: + json_string = load(json_file) + else: + print("Either file is missing or is not readable, creating file...") + json_string = {} + if 'device connection' in header.keys(): + device = header['device connection'] + else: + device = input('Enter device id (string): ') + if device not in json_string.keys(): + json_string[device] = {} + + for key in ['column', 'label', 'firmware version', 'device', 'resolution', 'channels', 'sensor', 'location']: + if key not in json_string[device].keys(): + if key in header.keys(): + json_string[device][key] = header[key] + else: + print(header['device connection'], header['label']) + new_info = str(input(f'{key}: ')).lower() + json_string[device][key] = new_info + if key == 'label': + sens = Bitalino.__change_sens_list(json_string[device]['label'], device, header['column']) + json_string[device][key] = sens + with open(dir_, 'w') as db_file: + dump(json_string, db_file, indent=2) + return json_string[device]['label'], json_string[device]['column'], json_string[device]['location'] + + @staticmethod + def __read_metadata(dirfile, sensor, **options): + """ + Read metadata of a single file + Args: + dirfile (str): contains the file path + sensor (str): contains the sensor label to look for + Returns: + sensor_idx (list), sensor_names (list), device (str), header (dict) + **options (dict): equal to _read arg + """ + # size of bitalino file + file_size = path.getsize(dirfile) + if file_size <= 50: + return {} + + with open(dirfile) as fh: + next(fh) + header = next(fh)[2:] + next(fh) + + header = ast.literal_eval(header) + sensor_idx, sensor_names, device = Bitalino.__analog_idx(header, sensor, **options) + return sensor_idx, sensor_names, device, header[device] + + # @staticmethod + def __read_bit(dirfile, sensor, sensor_idx=[], sensor_names=[], device='', **options): + """ + Reads one edf file + Args: + dirfile (str): contains the file path + sensor (str): contains the sensor label to look for + sensor_idx (list): list of indexes that correspond to the columns of sensor to extract + sensor_names (list): list of names that correspond to the sensor label + ex: sensor='ECG', sensor_names=['ECG_chest'] + ex: sensor='ACC', options['location']='wrist', sensor_names=['ACCX_wrist','ACCY_wrist','ACCZ_wrist'] + device (str): device MacAddress, this is used to get the specific header, specially when using 2 devices + **options (dict): equal to _read arg + + Returns: + sensor_data (array): 2-dimensional array of time over sensors columns + date (datetime): initial datetime of array + + Raises: + IOError: if sensor_names is empty, meaning no channels could be retrieved for chosen sensor + """ + # size of bitalino file + file_size = path.getsize(dirfile) + if file_size <= 50: + return '', [] + with open(dirfile) as fh: + next(fh) + header = next(fh)[2:] + next(fh) + # signal + data = np.array([line.strip().split() for line in fh], float) + # if file is empty, return + if Bitalino.__check_empty(len(data)): + return None + + header = ast.literal_eval(header) + if len(sensor_names) > 0: + sensor_data = data[:, sensor_idx] + date = Bitalino.__aux_date(header[device]) + print(date) + return sensor_data, date + else: + raise IOError(f"Sensor {sensor} was not found in this acquisition, please insert another") + + @staticmethod + def _timeseries(dir, type, startkey='A20', **options): + """Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. + Args: + dir (str): directory that contains bitalino files in txt format + type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG + startkey (str): default is A20. the key that appears in all bitalino file names to extract from directory + **options (dict): only the keys json, json_dir and location are being evaluated. + options[json] (bool): if the user wants to use a json to save and load bitalino configurations + options[json_dir] (str): directory to json file. If not defined, a default will be set automatically + options[location] (str): if given, only the devices with that body location will be retrieved + + Returns: + dict: A dictionary where keys are the sensors associated to the Biosignal with a Timeseries to each key + + Raises: + IOError: if the Biosignal is not one of the ones mentioned + IOError: if the list of bitalino files from dir returns empty + IOError: if header is still empty after going through all Bitalino files + """ + options = {'json_bool': True, 'json_dir': 'bitalino.json'} + sensor = 'ECG' if type is modalities.ECG else 'EDA' if type is modalities.EDA else 'PPG' if type is modalities.PPG else 'ACC' if type is modalities.ACC else 'PZT' if type is modalities.RESP else 'EMG' if type is modalities.EMG else '' + if sensor == '': + raise IOError(f'Type {type} does not have label associated, please insert one') + # first a list is created with all the filenames that end in .edf and are inside the chosen dir + # this is a list of lists where the second column is the type of channel to extract + all_files = sorted([path.join(dir, file) for file in listdir(dir) if startkey in file]) + # get header and sensor positions by running the bitalino files until a header is found + if not all_files: + raise IOError(f'No files in dir="{dir}" that start with {startkey}') + header, h = {}, 0 + while len(header) < 1: + ch_idx, channels, device, header = Bitalino.__read_metadata(all_files[h], sensor, **options) + h += 1 + if header == {}: + raise IOError(f'The files in {dir} did not contain a bitalino type {header}') + new_dict = {} + segments = [Bitalino.__read_bit(file, sensor=sensor, sensor_idx=ch_idx, sensor_names=channels, + device=device, **options) for file in all_files[h - 1:]] + for ch, channel in enumerate(channels): + + samples = {segment[1]: segment[0][:, ch] for segment in segments if segment} + if len(samples) > 1: + new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(samples, sampling_frequency=header['sampling rate'], + name=channels[ch]) + else: + new_timeseries = timeseries.Timeseries(tuple(samples.values())[0], tuple(samples.keys())[0], header['sampling rate'], + name=channels[ch]) + new_dict[channel] = new_timeseries + return new_dict + + @staticmethod + def _write(dir, timeseries): + '''Writes multiple TXT files on the directory 'path' so they can be opened in Opensignals.''' + # TODO + + @staticmethod + def _transfer(samples, to_unit): + pass + + # -*- encoding: utf-8 -*- + + # =================================== + + # IT - LongTermBiosignals + + # Package: biosignals + # Module: E4 + # Description: Class E4, a type of BiosignalSource, with static procedures to read and write datafiles from + # an Empatica E4 wristband. + + # Contributors: João Saraiva, Mariana Abreu + # Created: 15/06/2022 + # Last Updated: 22/07/2022 + + # =================================== + + from ..sources.BiosignalSource import BiosignalSource + + class E4(BiosignalSource): + '''This class represents the source of Seer Epilepsy Database and includes methods to read and write + biosignal files provided by them. Usually they are in .edf format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "Empatica E4 - Epilepsy Wristband" + + @staticmethod + def _aux_date(date): + """ Receives a string that contains a unix timestamp in UTC + Returns a datetime after convertion + """ + + ts = float(date) + return datetime.utcfromtimestamp(ts) + + @staticmethod + def __get_header(file_path): + """ + Auxiliary procedures to find the initial datetimes (1st line) and sampling frequencies (2nd line) of the file in the given path. + @param file_path: The path of the file to look for a header. + @return: A tuple with: + a) channel_labels: A dictionary with the header metadata. + b) column_names: A list of the column names. + @raise: + IOError: If the given file path does not exist. + """ + with open(file_path) as fh: + header = next(fh)[1:] # Read first line + header = literal_eval(header) # Get a dictionary of the header metadata + column_names = next(fh)[1:] # Read second line + column_names = column_names.split() # Get a list of the column names + return header, column_names + + @staticmethod + def __read_file(file_path): + """ + Reads one csv file. + @param: file_path (str) path to one csv file + @return: A tuple with: + a) A dictionary with arrays of samples associated with channel labels (like {'label': [...], }) + b) The initial datetime (in datetime) + c) The sampling frequency (in float) + + """ + with open(file_path, 'r') as f: + reader = csv.reader(f, dialect=csv.excel_tab, delimiter=',') + a = list(reader) + + # Channel label comes from the file name, or (x, y, z) in case of ACC + channel_labels = file_path.split(sep)[-1].split('.csv')[0].lower() + channel_labels = (channel_labels,) if len(a[0]) == 1 else ('x', 'y', 'z') + + # First row is the initial datetime + datetime = E4._aux_date(a[0][0]) + + # Second row is sampling frequency + sampling_frequency = float(a[1][0]) + + # Form third row and on are the sample values + samples = vstack(a[2:]).astype('float32').T + + return {label: samples[i] for i, label in enumerate(channel_labels)}, datetime, sampling_frequency + + @staticmethod + def _timeseries(dir, type, **options): + ''' + Reads multiple CSV files on multiple subdirectories of 'path' and returns a Biosignal associated with a Patient. + Args: + dir (str): directory that contains subdirectories of E4 files in csv format + type (Biosignal): type of biosignal to extract can be one of HR, EDA, PPG and ACC + ''' + sensor = 'EDA' if type is modalities.EDA else 'BVP' if type is modalities.PPG else 'ACC' if type is modalities.ACC else 'HR' if type is modalities.HR else 'TEMP' \ + if type is modalities.TEMP else '' + if sensor == '': + raise IOError(f'Type {type} does not have label associated, please insert one') + + # STEP 1 + # Get list of subdirectories + all_subdirectories = list([path.join(dir, d) for d in listdir(dir)]) + + res = {} + segments = {} + # STEP 2 + # Get list of files of interest, i.e., the ones corresponding to the modality of interest + for subdir in all_subdirectories: + if isdir(subdir): + file = list([path.join(subdir, file) for file in listdir(subdir) if sensor in file])[0] + if not file: + raise IOError(f'Files were not found in path {subdir} for {sensor=} ') + + # STEP 3 + # Read each file + samples, datetime, sf = E4.__read_file(file) + + # STEP 4 - Restructuring + # Listing all Segments of the same channel together, labelled to the same channel label. + for channel_label in samples: + # instantiating or appending + if channel_label not in res: + segments[channel_label] = {datetime: samples[channel_label]} + else: + segments[channel_label][datetime] = samples[channel_label] + res[channel_label] = sf # save sampling frequency here to be used on the next loop + + # Encapsulating the list of Segments of the same channel in a Timeseries + for channel in segments: + if len(segments[channel]) > 1: + res[channel] = timeseries.Timeseries.withDiscontiguousSegments(segments[channel], sampling_frequency=res[channel]) + else: + res[channel] = timeseries.Timeseries(tuple(segments[channel].values())[0], tuple(segments[channel].keys())[0], + sampling_frequency=res[channel]) + + return res + + @staticmethod + def _events(dir: str, file_key='tag'): + """ Extracts onsets from tags file + First we check if a tags file exists in directory. Then it will be opened and passed as a list "a". + Each date in a will be transformed from unix timestamp str to datetime using aux_date function. + Returns: A List of Event objects. + """ + + # STEP 1 + # Get list of subdirectories + all_subdirectories = list([path.join(dir, d) for d in listdir(dir)]) + + # STEP 2 + # Get tag file + res = [] + n_events = 0 # counter of events + for subdir in all_subdirectories: + if isdir(subdir): + onsets_file = [path.join(subdir, file) for file in listdir(subdir) if file_key in file] + if not onsets_file: + raise IOError(f"No tag file was found in path '{subdir}'.") + if len(onsets_file) > 1: + raise IOError(f'{len(onsets_file)} tag files were found, rather than just 1.') + else: + # STEP 3 + # Get onsets + with open(onsets_file[0], 'r') as f: + reader = csv.reader(f, dialect=csv.excel_tab) + a = list(reader) + # Events are named numerically + for i in range(len(a)): + n_events += 1 + res.append(timeseries.Event('event' + str(n_events), E4._aux_date(a[i][0]))) + return res + + @staticmethod + def _fetch(source_dir='', type=None, patient_code=None): + pass + + @staticmethod + def _write(path: str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass + + @staticmethod + def onbody(biosignal): + + window = timedelta(minutes=1) + + def condition_is_met_1_percent(x, condition): + count = np.count_nonzero(condition) + return count / len(x) >= 0.01 + + if type(biosignal) is modalities.ACC: + biosignal = biosignal['x'] + biosignal['y'] + biosignal['z'] # sum sample-by-sample the 3 axes + window_size = int(10 * biosignal.sampling_frequency) # 10 s moving standard deviation + + def moving_std(x): + cumsum = np.cumsum(x, dtype=float) + cumsum[window_size:] = cumsum[window_size:] - cumsum[:-window_size] + moving_averages = cumsum[window_size - 1:] / window_size + moving_sq_averages = np.cumsum(x ** 2, dtype=float) + moving_sq_averages[window_size:] = moving_sq_averages[window_size:] - moving_sq_averages[:-window_size] + moving_sq_averages = moving_sq_averages[window_size - 1:] / window_size + return np.sqrt(moving_sq_averages - moving_averages ** 2) + + x = biosignal.when(lambda x: condition_is_met_1_percent(x, moving_std(x) > 0.2), window=window) + x.name = biosignal.name + " Onbody Domain" + return x + + if type(biosignal) is modalities.EDA: + x = biosignal.when(lambda x: condition_is_met_1_percent(x, x > 0.05), window=window) + x.name = biosignal.name + " Onbody Domain" + return x + + if type(biosignal) is modalities.TEMP: + x = biosignal.when(lambda x: condition_is_met_1_percent(x, (x > 25) & (x < 40)), window=window) + x.name = biosignal.name + " Onbody Domain" + return x + + return None + +# =================================== +# Public Databases +# =================================== + + class MITDB(BiosignalSource): + '''This class represents the source of MIT-BIH Arrhythmia Database and includes methods to read and write + biosignal files provided by them. Usually they are in .dat format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "MIT-BIH Arrhythmia Database" + + def __aux_date(header): + """ + Get starting time from header + """ + time_key = [key for key in header.keys() if 'time' in key][0] + time_date = [key for key in header.keys() if 'date' in key][0] + try: + return to_datetime(header[time_date].strip('\"') + ' ' + header[time_key].strip('\"')) + except Exception as e: + print(f'Date is {header[time_date]} and Time is {header[time_key]} so the default will be used') + print('Default start date: 2000-1-1 00:00:00') + return datetime(2000, 1, 1, 00, 00, 00) + + @staticmethod + def __read_dat(dirfile, metadata=False): + + """ + Reads one dat file + param: dirfile (str) path to one file that ends in dat + param: sensor (str) name of the channel to extract (ex: ECG) + If metadata is True - returns list of channels and sampling frequency and initial datetime + Else return arrays one for each channel + """ + + # get edf data + signal, fields = wfdb.rdsamp(dirfile) + # get channels + channel_list = fields['sig_name'] + if metadata: + return channel_list, fields['fs'], fields['units'] + # structure of signal is two arrays, one array for each channel + return signal, MITDB.__aux_date(fields) + + @staticmethod + def _timeseries(dir, type, **options): + '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. + Args: + dir (str): directory that contains bitalino files in txt format + type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG + ''' + if type != modalities.ECG: + raise IOError(f'Type {type} must be ECG') + # first a list is created with all the filenames that end in .dat and are inside the chosen dir + all_files = sorted(list(set([path.join(dir, di.split('.')[0]) for di in sorted(listdir(dir)) if di.endswith('dat')]))) + + # run the dat read function for all files in list all_files + channels, sfreq, units = MITDB.__read_dat(all_files[0], metadata=True) + + all_edf = list(map(MITDB.__read_dat, all_files)) + new_dict = {} + for ch in range(len(channels)): + segments = {edf_data[1]: edf_data[0][:, ch] for edf_data in all_edf} + unit = Volt(Multiplier.m) if 'mV' in units[ch] else None + name = BodyLocation.MLII if channels[ch].strip() == 'MLII' else BodyLocation.V5 if channels[ch].strip() == 'V5' else \ + channels[ch] + if len(segments) > 1: + new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch], + units=unit) + else: + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch], + units=unit) + new_dict[channels[ch]] = new_timeseries + + return new_dict + + @staticmethod + def _fetch(type=None, patient_code=None): + """ Fetch one patient from the database + Args: + patient_code (int): number of patient to select + """ + # Transform patient code to the patient folder name + if not patient_code: + raise IOError('Please give a patient code (int)') + + temp_dir = '.cache' + if not path.isdir(temp_dir): + makedirs(temp_dir) + temp_dir = wget.download('https://physionet.org/content/mitdb/1.0.0/' + str(patient_code) + '.dat', out=temp_dir) + if temp_dir != '': + print(f'{temp_dir=}') + files = MITDB._timeseries(temp_dir, type) + return files + elif len(temp_dir) == '': + raise IOError(f'No patient was found {patient_code=}') + + @staticmethod + def _write(path: str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass + + def _write(path: str, timeseries: dict): + pass + + from ..sources.BiosignalSource import BiosignalSource + + class Seer(BiosignalSource): + '''This class represents the source of Seer Epilepsy Database and includes methods to read and write + biosignal files provided by them. Usually they are in .edf format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "Seer Epilepsy Database" + + @staticmethod + def __read_file(dirfile, metadata=False): + """ + Reads one dat file + param: dirfile (str) path to one file that ends in dat + param: sensor (str) name of the channel to extract (ex: ECG) + If metadata is True - returns list of channels and sampling frequency and initial datetime + Else return arrays one for each channel + """ + # get edf data + edf = read_raw_edf(dirfile) + # get channels that correspond to type (HR = type HR) + channel_list = edf.ch_names + # initial datetime + if metadata: + return channel_list, edf.info['sfreq'], None + # structure of signal is two arrays, one array for each channel + signal = edf.get_data() + date = edf.info['meas_date'].replace(tzinfo=None) + edf.close() + return signal, date + + @staticmethod + def _timeseries(dir, type, **options): + '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. + Args: + dir (str): directory that contains bitalino files in txt format + type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG + ''' + sensor = 'ECG' if type is modalities.ECG else 'EDA' if type is modalities.EDA else 'PPG' if type is modalities.PPG else 'ACC' if type is modalities.ACC \ + else 'PZT' if type is modalities.RESP else 'EMG' if type is modalities.EMG else 'HR' if modalities.HR else '' + if sensor == '': + raise IOError(f'Type {type} does not have label associated, please insert one') + # first a list is created with all the filenames that end in .dat and are inside the chosen dir + all_files = sorted(list(set([path.join(dir, di) for di in sorted(listdir(dir)) if sensor in di.upper()]))) + # devices example "Byteflies, Empatica" + devices = set([file.split(' - ')[-1] for file in all_files]) + # run the dat read function for all files in list all_files + new_dict = {} + for device in devices: + # select only device files + device_files = [file for file in all_files if device in file] + channels, sfreq, units = Seer.__read_file(device_files[0], metadata=True) + all_edf = list(map(Seer.__read_file, device_files)) + for ch in range(len(channels)): + segments = {edf_data[1]: edf_data[0][ch] for edf_data in all_edf} + unit = units + name = f'{channels[ch]} from {device.split("-")[0]}' + dict_key = f'{device.split("-")[0]}-{channels[ch].upper()}' if len(devices) > 1 else channels[ch].upper() + if len(segments) > 1: + new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=name, + units=unit) + else: + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=name, + units=unit) + new_dict[dict_key] = new_timeseries + + return new_dict + + @staticmethod + def _fetch(source_dir='', type=None, patient_code=None): + """ Fetch one patient from the database + Args: + patient_code (int): number of patient to select + """ + # Transform patient code to the patient folder name + if not patient_code: + raise IOError('Please give a patient code (int)') + if source_dir == '': + raise IOError('Please give patients location') + list_patients = listdir(source_dir) + selected_patient = [pat for pat in list_patients if str(patient_code) in pat] + if len(selected_patient) == 1: + print(f'{selected_patient=}') + path_ = path.join(source_dir, selected_patient[0]) + files = Seer._timeseries(path_, type) + return files + elif len(selected_patient) > 1: + raise IOError(f'More than one patient found {selected_patient=}') + else: + raise IOError(f'No patient was found {selected_patient=}') + + @staticmethod + def _write(path: str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass + diff --git a/src/ltbio/biosignals/sources/BiosignalSource.py b/src/ltbio/biosignals/sources/BiosignalSource.py deleted file mode 100644 index 7664e9a5..00000000 --- a/src/ltbio/biosignals/sources/BiosignalSource.py +++ /dev/null @@ -1,94 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: BiosignalSource -# Description: Abstract class BiosignalSource, with static procedures to ease the reading and writting files from any -# source (hospitals, devices, datasets ...). - -# Contributors: João Saraiva, Mariana Abreu -# Created: 25/04/2022 -# Last Updated: 29/06/2022 - -# =================================== - -from abc import ABC, abstractmethod -from typing import Collection - -from numpy import array - -from ltbio.biosignals.timeseries.Event import Event - - -class BiosignalSource(ABC): - - __SERIALVERSION: int = 1 - - def __init__(self): - pass - - @abstractmethod - def __repr__(self): - pass - - def __eq__(self, other): - return type(self) == type(other) - - @staticmethod - @abstractmethod - def _timeseries(path:str, type, **options): - pass - - @staticmethod - def _events(path:str, **options) -> tuple[Event] | None: - return None # Override implementation is optional - - @staticmethod - @abstractmethod - def _write(path:str, timeseries:dict): - pass - - @staticmethod - @abstractmethod - def _transfer(samples:array, type) -> array: - pass - - @classmethod - def _get(cls, path:str, type, **options): - return { - 'timeseries': cls._timeseries(path, type, **options), - 'patient': cls._patient(path, **options), - 'acquisition_location': cls._acquisition_location(path, type, **options), - 'events': cls._events(path, **options), - 'name': cls._name(path, type, **options) - } - - @staticmethod - def _patient(path, **options): - return None # Override implementation is optional - - @staticmethod - def _acquisition_location(path, type, **options): - return None # Override implementation is optional - - @staticmethod - def _name(path, type, **options): - return None # Override implementation is optional - - def __getstate__(self): - """ - 1: other... (dict) - """ - other_attributes = self.__dict__.copy() - return (self.__SERIALVERSION, ) if len(other_attributes) == 0 else (self.__SERIALVERSION, other_attributes) - - def __setstate__(self, state): - if state[0] == 1: - if len(state) == 2: - self.__dict__.update(state[1]) - else: - raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' - f'Supported versions: 1.') diff --git a/src/ltbio/biosignals/sources/Bitalino.py b/src/ltbio/biosignals/sources/Bitalino.py deleted file mode 100644 index 73d21e68..00000000 --- a/src/ltbio/biosignals/sources/Bitalino.py +++ /dev/null @@ -1,272 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: Bitalino -# Description: Class Bitalino, a type of BiosignalSource, with static procedures to read and write datafiles from -# any Bitalino device. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 25/04/2022 -# Last Updated: 22/07/2022 - -# =================================== - -import ast -from json import load, dump -from os import listdir, path, getcwd, access, R_OK - -import numpy as np -from dateutil.parser import parse as to_datetime - -from .. import timeseries -from .. import modalities -from ..sources.BiosignalSource import BiosignalSource - - -class Bitalino(BiosignalSource): - def __init__(self): - super().__init__() - - def __repr__(self): - return "Bitalino" - - def __aux_date(header): - """ - Get starting time from header - """ - time_key = [key for key in header.keys() if 'time' in key][0] - try: - return to_datetime(header['date'].strip('\"') + ' ' + header[time_key].strip('\"')) - except Exception as e: - print(e) - - def __check_empty(len_, type=''): - """ - Confirm if the length is acceptable and return the desired output - """ - if type == 'file_size': - if len_ <= 50: - return True - else: - if len_ < 1: - return True - return False - - def __change_sens_list(sens, device, channels): - """ - Confirm if the list of sensors has only RAW as labels, and ask the user for new labels in that case. - """ - if list(set(sens)) == ['RAW']: - print(f'Please update sens according to the sensors used:') - analogs = channels[-len(sens):] - for se in range(len(sens)): - new_se = str(input(f'{device} -- {sens[se]} -- {analogs[se]}')).upper() - sens[se] = new_se - return sens - - def __analog_idx(header, sensor, **options): - """ - From a header choose analog sensor key idx that correspond to a specific sensor. - This also runs read json to save configurations to facilitate implementation - This function leads with several devices and it returns a list that may contain one or several integers - """ - sensor_idx, sensor_names, json_bool, chosen_device = [], [], False, '' - # if options and json key, get json to calculate - if options: - if 'json' in options.keys(): - json_bool = options['json'] - json_dir = options['json_dir'] if 'json_dir' in options.keys() \ - else path.join(getcwd(), 'bitalino.json') - len_ch = 0 - for device in header.keys(): - chosen_device = device - sens_id = '' - # iterate over each device - if json_bool: - sens, ch, location = Bitalino.__read_json(json_dir, header[device]) - else: - sens = header[device][str(input(f'What is the header key of sensor names? {header}\n ')).strip().lower()] - ch = header[device][str(input(f'What is the header key for analog channels? {header}\n ')).strip().lower()] - location = str(input(f'What is the body location of this device {device}? \n')) - sens = Bitalino.__change_sens_list(sens, device, ch) - analogs = ch[-len(sens):] - - if sensor in str(sens): - # add other column devices as offset to the column to retrieve - location_bool = True - if 'location' in options.keys(): - if location.lower() not in options['location'].lower(): - location_bool = False - sens_id = [lab + '_' + location for lab in sens if sensor in lab.upper() and location_bool] - sensor_idx += [len_ch + ch.index(analogs[sens.index(sid.split('_')[0])]) for sid in sens_id] - if sens_id != '': - chosen_device = device - len_ch = len(ch) - sensor_names += sens_id - - return sensor_idx, sensor_names, chosen_device - - def __read_json(dir_, header): - # check if bitalino json exists and returns the channels and labels and location - if path.isfile(dir_) and access(dir_, - R_OK): - # checks if file exists - with open(dir_, 'r') as json_file: - json_string = load(json_file) - else: - print("Either file is missing or is not readable, creating file...") - json_string = {} - if 'device connection' in header.keys(): - device = header['device connection'] - else: - device = input('Enter device id (string): ') - if device not in json_string.keys(): - json_string[device] = {} - - for key in ['column', 'label', 'firmware version', 'device', 'resolution', 'channels', 'sensor', 'location']: - if key not in json_string[device].keys(): - if key in header.keys(): - json_string[device][key] = header[key] - else: - print(header['device connection'], header['label']) - new_info = str(input(f'{key}: ')).lower() - json_string[device][key] = new_info - if key == 'label': - sens = Bitalino.__change_sens_list(json_string[device]['label'], device, header['column']) - json_string[device][key] = sens - with open(dir_, 'w') as db_file: - dump(json_string, db_file, indent=2) - return json_string[device]['label'], json_string[device]['column'], json_string[device]['location'] - - @staticmethod - def __read_metadata(dirfile, sensor, **options): - """ - Read metadata of a single file - Args: - dirfile (str): contains the file path - sensor (str): contains the sensor label to look for - Returns: - sensor_idx (list), sensor_names (list), device (str), header (dict) - **options (dict): equal to _read arg - """ - # size of bitalino file - file_size = path.getsize(dirfile) - if file_size <= 50: - return {} - - with open(dirfile) as fh: - next(fh) - header = next(fh)[2:] - next(fh) - - header = ast.literal_eval(header) - sensor_idx, sensor_names, device = Bitalino.__analog_idx(header, sensor, **options) - return sensor_idx, sensor_names, device, header[device] - - # @staticmethod - def __read_bit(dirfile, sensor, sensor_idx=[], sensor_names=[], device='', **options): - """ - Reads one edf file - Args: - dirfile (str): contains the file path - sensor (str): contains the sensor label to look for - sensor_idx (list): list of indexes that correspond to the columns of sensor to extract - sensor_names (list): list of names that correspond to the sensor label - ex: sensor='ECG', sensor_names=['ECG_chest'] - ex: sensor='ACC', options['location']='wrist', sensor_names=['ACCX_wrist','ACCY_wrist','ACCZ_wrist'] - device (str): device MacAddress, this is used to get the specific header, specially when using 2 devices - **options (dict): equal to _read arg - - Returns: - sensor_data (array): 2-dimensional array of time over sensors columns - date (datetime): initial datetime of array - - Raises: - IOError: if sensor_names is empty, meaning no channels could be retrieved for chosen sensor - """ - # size of bitalino file - file_size = path.getsize(dirfile) - if file_size <= 50: - return '', [] - with open(dirfile) as fh: - next(fh) - header = next(fh)[2:] - next(fh) - # signal - data = np.array([line.strip().split() for line in fh], float) - # if file is empty, return - if Bitalino.__check_empty(len(data)): - return None - - header = ast.literal_eval(header) - if len(sensor_names) > 0: - sensor_data = data[:, sensor_idx] - date = Bitalino.__aux_date(header[device]) - print(date) - return sensor_data, date - else: - raise IOError(f"Sensor {sensor} was not found in this acquisition, please insert another") - - @staticmethod - def _timeseries(dir, type, startkey='A20', **options): - """Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. - Args: - dir (str): directory that contains bitalino files in txt format - type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG - startkey (str): default is A20. the key that appears in all bitalino file names to extract from directory - **options (dict): only the keys json, json_dir and location are being evaluated. - options[json] (bool): if the user wants to use a json to save and load bitalino configurations - options[json_dir] (str): directory to json file. If not defined, a default will be set automatically - options[location] (str): if given, only the devices with that body location will be retrieved - - Returns: - dict: A dictionary where keys are the sensors associated to the Biosignal with a Timeseries to each key - - Raises: - IOError: if the Biosignal is not one of the ones mentioned - IOError: if the list of bitalino files from dir returns empty - IOError: if header is still empty after going through all Bitalino files - """ - options = {'json_bool': True, 'json_dir': 'bitalino.json'} - sensor = 'ECG' if type is modalities.ECG else 'EDA' if type is modalities.EDA else 'PPG' if type is modalities.PPG else 'ACC' if type is modalities.ACC else 'PZT' if type is modalities.RESP else 'EMG' if type is modalities.EMG else '' - if sensor == '': - raise IOError(f'Type {type} does not have label associated, please insert one') - # first a list is created with all the filenames that end in .edf and are inside the chosen dir - # this is a list of lists where the second column is the type of channel to extract - all_files = sorted([path.join(dir, file) for file in listdir(dir) if startkey in file]) - # get header and sensor positions by running the bitalino files until a header is found - if not all_files: - raise IOError(f'No files in dir="{dir}" that start with {startkey}') - header, h = {}, 0 - while len(header) < 1: - ch_idx, channels, device, header = Bitalino.__read_metadata(all_files[h], sensor, **options) - h += 1 - if header == {}: - raise IOError(f'The files in {dir} did not contain a bitalino type {header}') - new_dict = {} - segments = [Bitalino.__read_bit(file, sensor=sensor, sensor_idx=ch_idx, sensor_names=channels, - device=device, **options) for file in all_files[h-1:]] - for ch, channel in enumerate(channels): - - samples = {segment[1]: segment[0][:, ch] for segment in segments if segment} - if len(samples) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(samples, sampling_frequency=header['sampling rate'], - name=channels[ch]) - else: - new_timeseries = timeseries.Timeseries(tuple(samples.values())[0], tuple(samples.keys())[0], header['sampling rate'], - name=channels[ch]) - new_dict[channel] = new_timeseries - return new_dict - - @staticmethod - def _write(dir, timeseries): - '''Writes multiple TXT files on the directory 'path' so they can be opened in Opensignals.''' - # TODO - - @staticmethod - def _transfer(samples, to_unit): - pass diff --git a/src/ltbio/biosignals/sources/E4.py b/src/ltbio/biosignals/sources/E4.py deleted file mode 100644 index 8c93ad79..00000000 --- a/src/ltbio/biosignals/sources/E4.py +++ /dev/null @@ -1,232 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: E4 -# Description: Class E4, a type of BiosignalSource, with static procedures to read and write datafiles from -# an Empatica E4 wristband. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 15/06/2022 -# Last Updated: 22/07/2022 - -# =================================== - -import csv -from ast import literal_eval -from datetime import datetime, timedelta -from os import listdir, path, sep -from os.path import isdir - -import numpy as np -from numpy import vstack - -from .. import timeseries -from .. import modalities -from ..sources.BiosignalSource import BiosignalSource - - -class E4(BiosignalSource): - '''This class represents the source of Seer Epilepsy Database and includes methods to read and write - biosignal files provided by them. Usually they are in .edf format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "Empatica E4 - Epilepsy Wristband" - - @staticmethod - def _aux_date(date): - """ Receives a string that contains a unix timestamp in UTC - Returns a datetime after convertion - """ - - ts = float(date) - return datetime.utcfromtimestamp(ts) - - @staticmethod - def __get_header(file_path): - """ - Auxiliary procedures to find the initial datetimes (1st line) and sampling frequencies (2nd line) of the file in the given path. - @param file_path: The path of the file to look for a header. - @return: A tuple with: - a) channel_labels: A dictionary with the header metadata. - b) column_names: A list of the column names. - @raise: - IOError: If the given file path does not exist. - """ - with open(file_path) as fh: - header = next(fh)[1:] # Read first line - header = literal_eval(header) # Get a dictionary of the header metadata - column_names = next(fh)[1:] # Read second line - column_names = column_names.split() # Get a list of the column names - return header, column_names - - @staticmethod - def __read_file(file_path): - """ - Reads one csv file. - @param: file_path (str) path to one csv file - @return: A tuple with: - a) A dictionary with arrays of samples associated with channel labels (like {'label': [...], }) - b) The initial datetime (in datetime) - c) The sampling frequency (in float) - - """ - with open(file_path, 'r') as f: - reader = csv.reader(f, dialect=csv.excel_tab, delimiter=',') - a = list(reader) - - # Channel label comes from the file name, or (x, y, z) in case of ACC - channel_labels = file_path.split(sep)[-1].split('.csv')[0].lower() - channel_labels = (channel_labels, ) if len(a[0]) == 1 else ('x', 'y', 'z') - - # First row is the initial datetime - datetime = E4._aux_date(a[0][0]) - - # Second row is sampling frequency - sampling_frequency = float(a[1][0]) - - # Form third row and on are the sample values - samples = vstack(a[2:]).astype('float32').T - - return {label: samples[i] for i, label in enumerate(channel_labels)}, datetime, sampling_frequency - - @staticmethod - def _timeseries(dir, type, **options): - ''' - Reads multiple CSV files on multiple subdirectories of 'path' and returns a Biosignal associated with a Patient. - Args: - dir (str): directory that contains subdirectories of E4 files in csv format - type (Biosignal): type of biosignal to extract can be one of HR, EDA, PPG and ACC - ''' - sensor = 'EDA' if type is modalities.EDA else 'BVP' if type is modalities.PPG else 'ACC' if type is modalities.ACC else 'HR' if type is modalities.HR else 'TEMP' \ - if type is modalities.TEMP else '' - if sensor == '': - raise IOError(f'Type {type} does not have label associated, please insert one') - - # STEP 1 - # Get list of subdirectories - all_subdirectories = list([path.join(dir, d) for d in listdir(dir)]) - - res = {} - segments = {} - # STEP 2 - # Get list of files of interest, i.e., the ones corresponding to the modality of interest - for subdir in all_subdirectories: - if isdir(subdir): - file = list([path.join(subdir, file) for file in listdir(subdir) if sensor in file])[0] - if not file: - raise IOError(f'Files were not found in path {subdir} for {sensor=} ') - - # STEP 3 - # Read each file - samples, datetime, sf = E4.__read_file(file) - - # STEP 4 - Restructuring - # Listing all Segments of the same channel together, labelled to the same channel label. - for channel_label in samples: - # instantiating or appending - if channel_label not in res: - segments[channel_label] = {datetime: samples[channel_label]} - else: - segments[channel_label][datetime] = samples[channel_label] - res[channel_label] = sf # save sampling frequency here to be used on the next loop - - # Encapsulating the list of Segments of the same channel in a Timeseries - for channel in segments: - if len(segments[channel]) > 1: - res[channel] = timeseries.Timeseries.withDiscontiguousSegments(segments[channel], sampling_frequency=res[channel]) - else: - res[channel] = timeseries.Timeseries(tuple(segments[channel].values())[0], tuple(segments[channel].keys())[0], sampling_frequency=res[channel]) - - return res - - @staticmethod - def _events(dir:str, file_key='tag'): - """ Extracts onsets from tags file - First we check if a tags file exists in directory. Then it will be opened and passed as a list "a". - Each date in a will be transformed from unix timestamp str to datetime using aux_date function. - Returns: A List of Event objects. - """ - - # STEP 1 - # Get list of subdirectories - all_subdirectories = list([path.join(dir, d) for d in listdir(dir)]) - - # STEP 2 - # Get tag file - res = [] - n_events = 0 # counter of events - for subdir in all_subdirectories: - if isdir(subdir): - onsets_file = [path.join(subdir, file) for file in listdir(subdir) if file_key in file] - if not onsets_file: - raise IOError(f"No tag file was found in path '{subdir}'.") - if len(onsets_file) > 1: - raise IOError(f'{len(onsets_file)} tag files were found, rather than just 1.') - else: - # STEP 3 - # Get onsets - with open(onsets_file[0], 'r') as f: - reader = csv.reader(f, dialect=csv.excel_tab) - a = list(reader) - # Events are named numerically - for i in range(len(a)): - n_events += 1 - res.append(timeseries.Event('event' + str(n_events), E4._aux_date(a[i][0]))) - return res - - @staticmethod - def _fetch(source_dir='', type=None, patient_code=None): - pass - - @staticmethod - def _write(path:str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass - - @staticmethod - def onbody(biosignal): - - window = timedelta(minutes=1) - - def condition_is_met_1_percent(x, condition): - count = np.count_nonzero(condition) - return count / len(x) >= 0.01 - - if type(biosignal) is modalities.ACC: - biosignal = biosignal['x'] + biosignal['y'] + biosignal['z'] # sum sample-by-sample the 3 axes - window_size = int(10 * biosignal.sampling_frequency) # 10 s moving standard deviation - - def moving_std(x): - cumsum = np.cumsum(x, dtype=float) - cumsum[window_size:] = cumsum[window_size:] - cumsum[:-window_size] - moving_averages = cumsum[window_size - 1:] / window_size - moving_sq_averages = np.cumsum(x ** 2, dtype=float) - moving_sq_averages[window_size:] = moving_sq_averages[window_size:] - moving_sq_averages[:-window_size] - moving_sq_averages = moving_sq_averages[window_size - 1:] / window_size - return np.sqrt(moving_sq_averages - moving_averages ** 2) - - x = biosignal.when(lambda x: condition_is_met_1_percent(x, moving_std(x) > 0.2), window=window) - x.name = biosignal.name + " Onbody Domain" - return x - - if type(biosignal) is modalities.EDA: - x = biosignal.when(lambda x: condition_is_met_1_percent(x, x > 0.05), window=window) - x.name = biosignal.name + " Onbody Domain" - return x - - if type(biosignal) is modalities.TEMP: - x = biosignal.when(lambda x: condition_is_met_1_percent(x, (x > 25) & (x < 40)), window=window) - x.name = biosignal.name + " Onbody Domain" - return x - - return None \ No newline at end of file diff --git a/src/ltbio/biosignals/sources/HEM.py b/src/ltbio/biosignals/sources/HEM.py deleted file mode 100644 index 8d963d87..00000000 --- a/src/ltbio/biosignals/sources/HEM.py +++ /dev/null @@ -1,109 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: HEM -# Description: Class HEM, a type of BiosignalSource, with static procedures to read and write datafiles from -# Hospital Egas Moniz, Portugal. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 25/04/2022 -# Last Updated: 22/07/2022 - -# =================================== -from datetime import timedelta -from os import listdir, path - -import numpy as np -from neo import MicromedIO -from numpy import array - -from .. import timeseries -from .. import modalities -from ..sources.BiosignalSource import BiosignalSource - - -class HEM(BiosignalSource): - '''This class represents the source of Hospital de Santa Maria (Lisboa, PT) and includes methods to read and write - biosignal files provided by them. Usually they are in the European EDF/EDF+ format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "Hospital Egas Moniz" - - @staticmethod - def __read_trc(list, metadata=False): - """ - Return trc file information, whether it is the values or the metadata, according to boolean metadata - :param list - :param metadata - - """ - dirfile = list[0] - sensor = list[1] - # get edf data - seg_micromed = MicromedIO(dirfile) - hem_data = seg_micromed.read_segment() - hem_sig = hem_data.analogsignals[0] - ch_list = seg_micromed.header['signal_channels']['name'] - # get channels that correspond to type (POL Ecg = type ecg) - find_idx = [hch for hch in range(len(ch_list)) if sensor.lower() in ch_list[hch].lower()] - # returns ch_list of interest, sampling frequency, initial datetime - if metadata: - return ch_list[find_idx], float(hem_sig.sampling_rate), hem_data.rec_datetime, hem_sig.units - # returns initial date and samples - print(ch_list[find_idx]) - return array(hem_sig[:, find_idx].T), hem_data.rec_datetime, ch_list[find_idx] - - @staticmethod - def _timeseries(dir, type, **options): - '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient.''' - # first a list is created with all the filenames that end in .edf and are inside the chosen dir - # this is a list of lists where the second column is the type of channel to extract - if type is modalities.ECG: - label = 'ecg' - all_files = sorted([[path.join(dir, file), label] for file in listdir(dir) if file.lower().endswith('.trc')]) - # run the edf read function for all files in list all_files - channels, sfreq, start_datetime, units = HEM.__read_trc(all_files[0], metadata=True) - all_trc = list(map(HEM.__read_trc, all_files)) - # run the trc read function for all files in list all_files - new_dict, first_time = {}, all_trc[0][1] - # TODO ADD UNITS TO TIMESERIES - for channel in channels: - last_start = all_trc[0][1] - segments = {last_start: all_trc[0][0][list(all_trc[0][2]).index(channel)]} - for at, trc_data in enumerate(all_trc[1:]): - if channel not in trc_data[2]: - continue - ch = list(trc_data[2]).index(channel) - final_time = all_trc[at][1] + timedelta(seconds=len(all_trc[at][0][ch])/sfreq) - if trc_data[1] <= final_time: - if (final_time - trc_data[1]) < timedelta(seconds=1): - segments[last_start] = np.append(segments[last_start], trc_data[0][ch]) - else: - continue - print('here') - else: - segments[trc_data[1]] = trc_data[0][ch] - last_start = trc_data[1] - - if len(segments) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch]) - else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch]) - new_dict[channels[ch]] = new_timeseries - - return new_dict - - @staticmethod - def _write(path: str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass diff --git a/src/ltbio/biosignals/sources/HSM.py b/src/ltbio/biosignals/sources/HSM.py deleted file mode 100644 index eb0819ed..00000000 --- a/src/ltbio/biosignals/sources/HSM.py +++ /dev/null @@ -1,90 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: HSM -# Description: Class HSM, a type of BiosignalSource, with static procedures to read and write datafiles from -# Hospital de Santa Maria, Portugal. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 25/04/2022 -# Last Updated: 22/07/2022 - -# =================================== - -from os import listdir, path - -from mne.io import read_raw_edf - -from .. import timeseries -from .. import modalities -from ..sources.BiosignalSource import BiosignalSource - - -class HSM(BiosignalSource): - '''This class represents the source of Hospital de Santa Maria (Lisboa, PT) and includes methods to read and write - biosignal files provided by them. Usually they are in the European EDF/EDF+ format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "Hospital de Santa Maria" - - @staticmethod - def __read_edf(list, metadata=False): - - """ - Reads one edf file - If metadata is True - returns list of channels and sampling frequency and initial datetime - Else return arrays one for each channel - """ - dirfile = list[0] - sensor = list[1] - # get edf data - hsm_data = read_raw_edf(dirfile) - # get channels that correspond to type (POL Ecg = type ecg) - channel_list = [hch for hch in hsm_data.ch_names if sensor.lower() in hch.lower()] - # initial datetime - if metadata: - return channel_list, hsm_data.info['sfreq'] - # structure of hsm_sig is two arrays, the 1st has one array for each channel and the 2nd is an int-time array - hsm_sig = hsm_data[channel_list] - - return hsm_sig[0], hsm_data.info['meas_date'].replace(tzinfo=None) - - @staticmethod - def _timeseries(dir, type, **options): - '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient.''' - if type is modalities.ECG: - label = 'ecg' - if type is modalities.EMG: - label = 'emg' - if type is modalities.EEG: - label = 'eeg' - # first a list is created with all the filenames that end in .edf and are inside the chosen dir - # this is a list of lists where the second column is the type of channel to extract - all_files = sorted([[path.join(dir, file), label] for file in listdir(dir) if file.endswith('.edf')]) - # run the edf read function for all files in list all_files - channels, sfreq = HSM.__read_edf(all_files[0], metadata=True) - all_edf = list(map(HSM.__read_edf, all_files)) - new_dict = {} - for ch in range(len(channels)): - segments = {edf_data[1]: edf_data[0][ch] for edf_data in all_edf} - if len(segments) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch]) - else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch]) - new_dict[channels[ch]] = new_timeseries - return new_dict - - @staticmethod - def _write(path:str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass diff --git a/src/ltbio/biosignals/sources/MITDB.py b/src/ltbio/biosignals/sources/MITDB.py deleted file mode 100644 index 2ab5652d..00000000 --- a/src/ltbio/biosignals/sources/MITDB.py +++ /dev/null @@ -1,134 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: MITDB -# Description: Class MITDB, a type of BiosignalSource, with static procedures to read and write datafiles from the -# MIT-BIH Arrhythmia dataset at https://physionet.org/content/mitdb/1.0.0/. - -# Contributors: João Saraiva, Mariana Abreu -# Created: 31/05/2022 -# Last Updated: 22/07/2022 - -# =================================== - -from datetime import datetime -from os import listdir, path, makedirs - -import wfdb -import wget -from dateutil.parser import parse as to_datetime - -from .. import timeseries -from .. import modalities -from ..sources.BiosignalSource import BiosignalSource -from ltbio.clinical.BodyLocation import BodyLocation -from ltbio.biosignals.timeseries.Unit import * - - -class MITDB(BiosignalSource): - '''This class represents the source of MIT-BIH Arrhythmia Database and includes methods to read and write - biosignal files provided by them. Usually they are in .dat format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "MIT-BIH Arrhythmia Database" - - def __aux_date(header): - """ - Get starting time from header - """ - time_key = [key for key in header.keys() if 'time' in key][0] - time_date = [key for key in header.keys() if 'date' in key][0] - try: - return to_datetime(header[time_date].strip('\"') + ' ' + header[time_key].strip('\"')) - except Exception as e: - print(f'Date is {header[time_date]} and Time is {header[time_key]} so the default will be used') - print('Default start date: 2000-1-1 00:00:00') - return datetime(2000, 1, 1, 00, 00, 00) - - @staticmethod - def __read_dat(dirfile, metadata=False): - - """ - Reads one dat file - param: dirfile (str) path to one file that ends in dat - param: sensor (str) name of the channel to extract (ex: ECG) - If metadata is True - returns list of channels and sampling frequency and initial datetime - Else return arrays one for each channel - """ - - # get edf data - signal, fields = wfdb.rdsamp(dirfile) - # get channels - channel_list = fields['sig_name'] - if metadata: - return channel_list, fields['fs'], fields['units'] - # structure of signal is two arrays, one array for each channel - return signal, MITDB.__aux_date(fields) - - @staticmethod - def _timeseries(dir, type, **options): - '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. - Args: - dir (str): directory that contains bitalino files in txt format - type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG - ''' - if type != modalities.ECG: - raise IOError(f'Type {type} must be ECG') - # first a list is created with all the filenames that end in .dat and are inside the chosen dir - all_files = sorted(list(set([path.join(dir, di.split('.')[0]) for di in sorted(listdir(dir)) if di.endswith('dat')]))) - - # run the dat read function for all files in list all_files - channels, sfreq, units = MITDB.__read_dat(all_files[0], metadata=True) - - all_edf = list(map(MITDB.__read_dat, all_files)) - new_dict = {} - for ch in range(len(channels)): - segments = {edf_data[1]: edf_data[0][:, ch] for edf_data in all_edf} - unit = Volt(Multiplier.m) if 'mV' in units[ch] else None - name = BodyLocation.MLII if channels[ch].strip() == 'MLII' else BodyLocation.V5 if channels[ch].strip() == 'V5' else channels[ch] - if len(segments) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch], units=unit) - else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch], units=unit) - new_dict[channels[ch]] = new_timeseries - - return new_dict - - @staticmethod - def _fetch(type=None, patient_code=None): - """ Fetch one patient from the database - Args: - patient_code (int): number of patient to select - """ - # Transform patient code to the patient folder name - if not patient_code: - raise IOError('Please give a patient code (int)') - - temp_dir = '.cache' - if not path.isdir(temp_dir): - makedirs(temp_dir) - temp_dir = wget.download('https://physionet.org/content/mitdb/1.0.0/'+str(patient_code)+'.dat', out=temp_dir) - if temp_dir != '': - print(f'{temp_dir=}') - files = MITDB._timeseries(temp_dir, type) - return files - elif len(temp_dir) == '': - raise IOError(f'No patient was found {patient_code=}') - - @staticmethod - def _write(path:str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass - - def _write(path:str, timeseries: dict): - pass diff --git a/src/ltbio/biosignals/sources/Multisource.py b/src/ltbio/biosignals/sources/Multisource.py deleted file mode 100644 index 66cedd82..00000000 --- a/src/ltbio/biosignals/sources/Multisource.py +++ /dev/null @@ -1,45 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: Multisource.py -# Description: Class Multisource.py, a collection of BiosignalSources. - -# Contributors: João Saraiva -# Created: 08/07/2022 - -# NOT READY. UNDER DEVELOPMENT. - -# =================================== - -from typing import Dict - -from ltbio.biosignals.sources.BiosignalSource import BiosignalSource - - -class Multisource(BiosignalSource): - def __init__(self, **sources:Dict[str:BiosignalSource]): - super().__init__() - self.sources = sources - - def __repr__(self): - res = "Multisource: " - for source in self.sources: - res += str(source) + ', ' - return res[:-2] - - @staticmethod - def _timeseries(dir, type, **options): - pass - - @staticmethod - def _write(dir, timeseries): - '''Writes multiple TXT files on the directory 'path' so they can be opened in Opensignals.''' - # TODO - - @staticmethod - def _transfer(samples, to_unit): - pass diff --git a/src/ltbio/biosignals/sources/Seer.py b/src/ltbio/biosignals/sources/Seer.py deleted file mode 100644 index 96d9a990..00000000 --- a/src/ltbio/biosignals/sources/Seer.py +++ /dev/null @@ -1,127 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: Seer -# Description: Class Seer, a type of BiosignalSource, with static procedures to read and write datafiles from the -# Seer dataset at https://seermedical.com. - -# Contributors: Mariana Abreu -# Created: 02/06/2022 -# Last Updated: 22/07/2022 - -# =================================== - -from os import listdir, path - -from mne.io import read_raw_edf - -from .. import timeseries -from .. import modalities -from ..sources.BiosignalSource import BiosignalSource - - -class Seer(BiosignalSource): - '''This class represents the source of Seer Epilepsy Database and includes methods to read and write - biosignal files provided by them. Usually they are in .edf format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "Seer Epilepsy Database" - - @staticmethod - def __read_file(dirfile, metadata=False): - """ - Reads one dat file - param: dirfile (str) path to one file that ends in dat - param: sensor (str) name of the channel to extract (ex: ECG) - If metadata is True - returns list of channels and sampling frequency and initial datetime - Else return arrays one for each channel - """ - # get edf data - edf = read_raw_edf(dirfile) - # get channels that correspond to type (HR = type HR) - channel_list = edf.ch_names - # initial datetime - if metadata: - return channel_list, edf.info['sfreq'], None - # structure of signal is two arrays, one array for each channel - signal = edf.get_data() - date = edf.info['meas_date'].replace(tzinfo=None) - edf.close() - return signal, date - - @staticmethod - def _timeseries(dir, type, **options): - '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. - Args: - dir (str): directory that contains bitalino files in txt format - type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG - ''' - sensor = 'ECG' if type is modalities.ECG else 'EDA' if type is modalities.EDA else 'PPG' if type is modalities.PPG else 'ACC' if type is modalities.ACC \ - else 'PZT' if type is modalities.RESP else 'EMG' if type is modalities.EMG else 'HR' if modalities.HR else '' - if sensor == '': - raise IOError(f'Type {type} does not have label associated, please insert one') - # first a list is created with all the filenames that end in .dat and are inside the chosen dir - all_files = sorted(list(set([path.join(dir, di) for di in sorted(listdir(dir)) if sensor in di.upper()]))) - # devices example "Byteflies, Empatica" - devices = set([file.split(' - ')[-1] for file in all_files]) - # run the dat read function for all files in list all_files - new_dict = {} - for device in devices: - # select only device files - device_files = [file for file in all_files if device in file] - channels, sfreq, units = Seer.__read_file(device_files[0], metadata=True) - all_edf = list(map(Seer.__read_file, device_files)) - for ch in range(len(channels)): - segments = {edf_data[1]: edf_data[0][ch] for edf_data in all_edf} - unit = units - name = f'{channels[ch]} from {device.split("-")[0]}' - dict_key = f'{device.split("-")[0]}-{channels[ch].upper()}' if len(devices) > 1 else channels[ch].upper() - if len(segments) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=name, units=unit) - else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=name, units=unit) - new_dict[dict_key] = new_timeseries - - return new_dict - - @staticmethod - def _fetch(source_dir='', type=None, patient_code=None): - """ Fetch one patient from the database - Args: - patient_code (int): number of patient to select - """ - # Transform patient code to the patient folder name - if not patient_code: - raise IOError('Please give a patient code (int)') - if source_dir == '': - raise IOError('Please give patients location') - list_patients = listdir(source_dir) - selected_patient = [pat for pat in list_patients if str(patient_code) in pat] - if len(selected_patient) == 1: - print(f'{selected_patient=}') - path_ = path.join(source_dir, selected_patient[0]) - files = Seer._timeseries(path_, type) - return files - elif len(selected_patient) > 1: - raise IOError(f'More than one patient found {selected_patient=}') - else: - raise IOError(f'No patient was found {selected_patient=}') - - @staticmethod - def _write(path:str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass - - -# path_ = 'C:\\Users\\Mariana\\OneDrive - Universidade de Lisboa\\PreEpiseizures\\BD-SEER' -# files = Seer._fetch(path_, type=EMG, patient_code="172") diff --git a/src/ltbio/biosignals/sources/Sense.py b/src/ltbio/biosignals/sources/Sense.py deleted file mode 100644 index cf62fdee..00000000 --- a/src/ltbio/biosignals/sources/Sense.py +++ /dev/null @@ -1,388 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: Sense -# Description: Class Sense, a type of BiosignalSource, with static procedures to read and write datafiles from -# any ScientISST Sense device. - -# Contributors: Mariana Abreu, João Saraiva -# Created: 20/06/2022 -# Last Updated: 22/07/2022 - -# =================================== - -import configparser -from ast import literal_eval -from datetime import timedelta -from json import load -from os import listdir, path, access, R_OK -from os.path import getsize -from warnings import warn - -import numpy as np -from dateutil.parser import parse as to_datetime - -from .. import timeseries -from .. import modalities -from ..sources.BiosignalSource import BiosignalSource -from ltbio.clinical.BodyLocation import BodyLocation - - -class Sense(BiosignalSource): - - # Sense Defaults files use these keys: - MODALITIES = 'modalities' - CHANNEL_LABELS = 'labels' - BODY_LOCATION = 'location' - - # Sense csv data files use these keys: - KEY_CH_LABELS_IN_HEADER = 'Channel Labels' - KEY_HZ_IN_HEADER = 'Sampling rate (Hz)' - KEY_TIME_IN_HEADER = 'ISO 8601' - ANALOGUE_LABELS_FORMAT = 'AI{0}_raw' - - # These are needed to map channels to biosignal modalities - DEFAULTS_PATH: str - DEVICE_ID: str - - # Flag to deal with badly-formatted CSV files - BAD_FORMAT = False - - def __init__(self, device_id:str, defaults_path:str=None): - super().__init__() - self.__device_id = device_id - Sense.DEVICE_ID = device_id - if defaults_path is not None: - Sense.DEFAULTS_PATH = defaults_path - else: - if not path.exists('resources/config.ini'): - raise FileNotFoundError('No config.ini was found.') - try: - config = configparser.ConfigParser() - config.read('resources/config.ini') - Sense.DEFAULTS_PATH = config['DEFAULT']['Sense'] - print(f"Getting default mapping from {Sense.DEFAULTS_PATH}") - except IndexError: - raise KeyError("No defaults file indicated 'Sense' devices in config.ini.") - self.__defaults_path = defaults_path - - Sense.BAD_FORMAT = False - - def __repr__(self): - return "ScientISST Sense" - - - @staticmethod - def __aux_date(header): - """ Get starting time from header. """ - return to_datetime(header[Sense.KEY_TIME_IN_HEADER], ignoretz=True) - - @staticmethod - def __check_empty(len_, type=''): - """ Confirm if the length is acceptable and return the desired output. """ - if type == 'file_size': - if len_ <= 50: - return True - else: - if len_ < 1: - return True - return False - - @staticmethod - def __get_mapping(biosignal_type, channel_labels, modalities_available): - """ - Given a header, find all indexes that correspond to biosignal modality of interest. - It REQUIRES a default mapping to be specified in a JSON file, otherwise a mapping will be requested on the stdin and saved for future use. - - @param header: A list of strings corresponding to column names. - @param biosignal_type: Biosignal subclass indicating which modality is of interest. - @param defaults_path: The path to the JSON file containing the mapping in the correct syntax. - - @rtype: tuple - @return: A tuple with: - a) A dictionary with the indexes corresponding to the biosignal modality of interest mapped to a channel label. Optionally, it can have a key Sense.BODY_LOCATION mapped to some body location. - E.g.: {1: 'Label of channel 1', 3: 'Label of channel 3'} - b) A body location (in str) or None - """ - - mapping = {} - - if biosignal_type.__name__ in str(modalities_available): - for index in modalities_available[biosignal_type.__name__]: - # Map each analogue channel of interest to a label - mapping[index] = channel_labels[str(index)] - else: - raise IOError(f"There are no analogue channels associated with {biosignal_type.__name__}") - - return mapping - - @staticmethod - def __get_defaults(): - """ - Gets the default mapping of channels for a device. - - @return: A tuple with - a) modalities: A dictionary mapping biosignal modalities to column indexes; - b) channel_labels: A dictionary mapping each column index to a meaningful channel label; - c) body_location: A string associated with a body location. - @rtype: tuple of size 3 - """ - - if not hasattr(Sense, 'DEVICE_ID'): - raise IOError("Unlike other BiosignalSource(s), Sense needs to be instantiated and a 'device_id' must be provided on instantiation.") - - # Check if file exists and it is readable - if path.isfile(Sense.DEFAULTS_PATH) and access(Sense.DEFAULTS_PATH, R_OK): - - # OPTION A: Use the mapping in the json file - with open(Sense.DEFAULTS_PATH, 'r') as json_file: - json_string = load(json_file) - - # Get mapping of modalities - if Sense.MODALITIES in json_string[Sense.DEVICE_ID]: - modalities = json_string[Sense.DEVICE_ID][Sense.MODALITIES] - else: - raise IOError(f"Key {Sense.MODALITIES} is mandatory for each device default mapping.") - - # Get mapping of channel labels, if any - if Sense.CHANNEL_LABELS in json_string[Sense.DEVICE_ID]: - channel_labels = json_string[Sense.DEVICE_ID][Sense.CHANNEL_LABELS] - else: - channel_labels = None - - # Get body location, if any - if Sense.BODY_LOCATION in json_string[Sense.DEVICE_ID]: - body_location = json_string[Sense.DEVICE_ID][Sense.BODY_LOCATION] - if body_location.startswith('BodyLocation.'): - body_location:BodyLocation = eval(body_location) - else: - body_location = None - - return modalities, channel_labels, body_location - - # File does not exist; creates one - else: - print("Either Sense defaults file is missing or it is not readable. Creating new defaults...") - # OPTION B: Ask and save a new mapping - json_string = {} - json_string[Sense.DEVICE_ID] = {} # Create a new object for a new device mapping - # B1. Input modalities - # B2. Input Channel labels - # B3. Input Body Location - # TODO: Use stdin to ask for default, save it, and return it - - @staticmethod - def __get_header(file_path): - """ - Auxiliary procedures to find the header (1st line) and column names (2nd line) of the file in the given path. - @param file_path: The path of the file to look for a header. - @return: A tuple with: - a) header: A dictionary with the header metadata. - b) column_names: A list of the column names. - @raise: - IOError: If the given file path does not exist. - """ - with open(file_path) as fh: - header = next(fh)[1:] # Read first line - header = literal_eval(header) # Get a dictionary of the header metadata - column_names = next(fh)[1:] # Read second line - column_names = column_names.split() # Get a list of the column names - return header, column_names - - @staticmethod - def __get_samples(file_path): - """ - Auxiliary procedures to find the samples (> 3rd line) of the file in the given path. - @param file_path: The path of the file to look for a header. - @return: A np.array of the data. - @raise: - IOError: If the given file path does not exist. - """ - with open(file_path) as fh: - # Dismiss header (it is in the first line) - header = next(fh)[1:] - next(fh) - # Get the remaining data, i.e., the samples - data = [line.strip().split() for line in fh] - try: - return np.array(data, float) - except ValueError: # In July 2022, it could occur that SENSE files could present Bad Format. - Sense.BAD_FORMAT = True - all_segments = [] - start_indices = [0, ] - # In that case, we need to separate each valid segment of samples. - correct_length = len(data[0]) # FIXME: Assuming first line is syntax-valid. Poor verification, though. - for i in range(len(data)): - if len(data[i]) != correct_length: # Bad syntax found - warn(f"File '{file_path}' has bad syntax on line {i}. This portion was dismissed.") - # Trim the end of data - for j in range(i-1, 0, -1): - if data[j][0] == '15': # Look for NSeq == 15 - all_segments.append(np.array(data[start_indices[-1]:j + 1], float)) # append "old" segment - break - # Trim the beginning of new segment - for j in range(i+1, len(data), 1): - if data[j][0] == '0': # Look for NSeq == 0 - start_indices.append(j) - break - - all_segments.append(np.array(data[start_indices[-1]:], float)) # append last "new" segment - return all_segments, start_indices - - - @staticmethod - def __read_file(file_path, type, channel_labels, modalities_available): - """ - Reads one csv file - Args: - list_ (list): contains the file path - metadata (bool): defines whether only metadata or actual timeseries values should be returned - sensor_idx (list): list of indexes that correspond to the columns of sensor to extract - sensor_names (list): list of names that correspond to the sensor label - ex: sensor='ECG', sensor_names=['ECG_chest'] - ex: sensor='ACC', options['location']='wrist', sensor_names=['ACCX_wrist','ACCY_wrist','ACCZ_wrist'] - device (str): device MacAddress, this is used to get the specific header, specially when using 2 devices - **options (dict): equal to _read arg - - @return: A tuple with: - a) sensor_data (np.array): 2-dimensional array of time over sensors columns. - b) date (datetime): initial datetime of samples. - d) sampling_frequency (float): The sampling frequency, in Hertz, of the read samples. - - @raise: - IOError: if sensor_names is empty, meaning no channels could be retrieved for chosen sensor - """ - - # STEP 1 - # Get header - header, column_names = Sense.__get_header(file_path) - - # STEP 2 - # Get all samples - all_samples = Sense.__get_samples(file_path) - - # STEP 3 - # Raise Error if file is empty - if not Sense.BAD_FORMAT and Sense.__check_empty(len(all_samples)): - raise IOError(f'Empty file: {file_path}.') - - # STEP 4 - # Get analogue channels of interest, mapped to labels, and a body location (if any associated) - mapping = Sense.__get_mapping(type, channel_labels, modalities_available) - - # STEP 5 - # Get initial date and sampling frequency - date = Sense.__aux_date(header) - sf = header[Sense.KEY_HZ_IN_HEADER] - - # STEP 6 - # Filtering only the samples of the channels of interest - if not Sense.BAD_FORMAT: - samples_of_interest = {} - for ix in mapping: - label = mapping[ix] - samples_of_interest[label] = all_samples[:, column_names.index(Sense.ANALOGUE_LABELS_FORMAT.format(str(ix)))] - # return dict, start date, sampling frequency - return samples_of_interest, date, sf - else: - samples_of_interest_by_segment, start_dates = [], [] - all_segments, start_indices = all_samples - for segment, start_index in zip(all_segments, start_indices): - start_dates.append(date + timedelta(seconds=start_index/sf)) - samples_of_interest = {} - for ix in mapping: - label = mapping[ix] - samples_of_interest[label] = segment[:, column_names.index(Sense.ANALOGUE_LABELS_FORMAT.format(str(ix)))] - samples_of_interest_by_segment.append(samples_of_interest) - # return segments, start dates, sampling frequency - return samples_of_interest_by_segment, start_dates, sf - - - @staticmethod - def _timeseries(dir, type, **options): - """Reads multiple csv files on the directory 'path' and returns a Biosignal associated with a Patient. - @param dir (str): directory that contains Sense files in csv format - @param type (subclass of Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG - @param **options (dict): - defaults_path (str): if the user wants to use a json to save and load bitalino configurations - device_id (str): directory to json file. If not defined, a default will be set automatically - - @return: A typical dictionary like {str: Timeseries}. - - @raise: - IOError: If there are no Sense files in the given directory. - IOError: If Sense files have no header. - """ - - # STEP 0 - Get defaults - modalities_available, channel_labels, _ = Sense.__get_defaults() - - # STEP 1 - Get files - # A list is created with all the filenames that end with '.csv' inside the given directory. - # E.g. [ file1.csv, file.2.csv, ... ] - all_files = [path.join(dir, file) for file in listdir(dir) if file.endswith('.csv')] - if not all_files: - raise IOError(f"No files in {dir}.") - - # STEP 2 - Convert channel labels to BodyLocations, if any - for position, label in channel_labels.items(): - if label.startswith('BodyLocation.'): - channel_labels[position]:BodyLocation = eval(label) - - # STEP 3 - Read files - # Get samples of analogue channels of interest from each file - data = [] - for file in all_files: - if getsize(file) == 0: - warn(f"File '{file}' has 0 bytes. Its reading was dismissed.") - continue - what_is_read = Sense.__read_file(file, type, channel_labels, modalities_available) - if not Sense.BAD_FORMAT: - data.append(what_is_read) - else: - samples_of_interest_by_segment, start_dates, sf = what_is_read - for segment, start_date in zip(samples_of_interest_by_segment, start_dates): - data.append((segment, start_date, sf)) - Sense.BAD_FORMAT = False # done dealing with a bad format - - # E.g.: data[k] = samples_of_interest, start_date, sampling_frequency - - # STEP 4 - Restructuring - # Listing all Segments of the same channel together, labelled to the same channel label. - res = {} - segments = {} - for samples, date, sf in data: - for channel in samples: - # instantiating or appending - if channel not in res: - segments[channel] = {date: samples[channel]} - else: - segments[channel][date] = samples[channel] - res[channel] = sf # save sampling frequency here to be used on the next loop - - # Encapsulating the list of Segments of the same channel in a Timeseries - for channel in segments: - if len(segments[channel]) > 1: - res[channel] = timeseries.Timeseries.withDiscontiguousSegments(segments[channel], sampling_frequency=res[channel]) - else: - res[channel] = timeseries.Timeseries(tuple(segments[channel].values())[0], tuple(segments[channel].keys())[0], sampling_frequency=res[channel]) - - return res - - @staticmethod - def _acquisition_location(path, type, **options): - _, _, bl = Sense.__get_defaults() - return bl - - @staticmethod - def _write(dir, timeseries): - pass # TODO - - @staticmethod - def _transfer(samples, to_unit): - pass - diff --git a/src/ltbio/biosignals/sources/__init__.py b/src/ltbio/biosignals/sources/__init__.py deleted file mode 100644 index 29dd11f0..00000000 --- a/src/ltbio/biosignals/sources/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# Quick shortcuts to classes -from ltbio.biosignals.sources.Bitalino import Bitalino -from ltbio.biosignals.sources.E4 import E4 -from ltbio.biosignals.sources.HEM import HEM -from ltbio.biosignals.sources.HSM import HSM -from ltbio.biosignals.sources.MITDB import MITDB -from ltbio.biosignals.sources.Seer import Seer -from ltbio.biosignals.sources.Sense import Sense diff --git a/src/ltbio/biosignals/timeseries/Timeseries.py b/src/ltbio/biosignals/timeseries.py similarity index 81% rename from src/ltbio/biosignals/timeseries/Timeseries.py rename to src/ltbio/biosignals/timeseries.py index 2dd8dfd4..ba3a66f1 100644 --- a/src/ltbio/biosignals/timeseries/Timeseries.py +++ b/src/ltbio/biosignals/timeseries.py @@ -1,40 +1,332 @@ -# -*- encoding: utf-8 -*- +# -- encoding: utf-8 -- # =================================== # IT - LongTermBiosignals -# Package: biosignals -# Module: Timeseries -# Description: Class Timeseries, which mathematically conceptualizes timeseries and their behaviour. -# Class OverlappingTimeseries, a special kind of Timeseries for signal processing purposes. +# Package: src/ltbio/biosignals +# Module: timeseries +# Description: -# Contributors: João Saraiva, Mariana Abreu +# Contributors: João Saraiva # Created: 20/04/2022 -# Last Updated: 22/07/2022 +# Last Updated: 07/03/2023 # =================================== -from datetime import datetime, timedelta + from math import ceil from os.path import join from tempfile import mkstemp -from typing import List, Iterable, Collection, Dict, Tuple, Callable, Sequence +from typing import Iterable, Collection, Dict, Tuple, Callable -import matplotlib.pyplot as plt import numpy as np from biosppy.signals.tools import power_spectrum -from datetimerange import DateTimeRange from dateutil.parser import parse as to_datetime +from ..biosignals import Event +from units import Unit from numpy import array, append, ndarray, divide, concatenate, tile, memmap from scipy.signal import resample -from ltbio.biosignals.timeseries.Event import Event -from ltbio.biosignals.timeseries.Frequency import Frequency -from ltbio.biosignals.timeseries.Unit import Unit + +class Frequency(float): + + def __init__(self, value:float): + self.value = float(value) + + def __str__(self): + return str(self.value) + ' Hz' + + def __repr__(self): + return self.__str__() + + def __eq__(self, other): + if isinstance(other, float): + return other == self.value + elif isinstance(other, Frequency): + return other.value == self.value + + def __float__(self): + return self.value + + def __copy__(self): + return Frequency(self.value) + + +from datetime import datetime, timedelta +from functools import reduce +from typing import Sequence, List + +import matplotlib.pyplot as plt +from datetimerange import DateTimeRange +from matplotlib import cm +from matplotlib.dates import date2num +from matplotlib.lines import Line2D +from matplotlib.patches import Rectangle + + +class Timeline(): + + class Group(): + + def __init__(self, intervals: Sequence[DateTimeRange] = [], points: Sequence[datetime] = [], name: str = None, color_hex: str = None): + self.intervals = list(intervals) + self.points = list(points) + self.name = name + self.color_hex = color_hex + + def __repr__(self): + res = '' + if 0 < len(self.intervals): + if len(self.intervals) < 10: + res += ' U '.join(['[' + str(interval) + '[' for interval in self.intervals]) + else: + res += f'{len(self.intervals)} intervals with {self.duration} of total duration' + if 0 < len(self.points): + if len(self.points) < 10: + res += '\nand the following timepoints:\n' + res += ', '.join(['[' + str(point) + '[' for point in self.points]) + else: + res += f'\nand {len(self.points)} timepoints.\n' + return res + + @property + def initial_datetime(self) -> datetime: + return min([interval.start_datetime for interval in self.intervals] + self.points) + + @property + def final_datetime(self) -> datetime: + return max([interval.end_datetime for interval in self.intervals] + self.points) + + @property + def duration(self) -> timedelta: + return sum([interval.timedelta for interval in self.intervals], timedelta()) + + @property + def has_only_intervals(self) -> bool: + return len(self.intervals) > 0 and len(self.points) == 0 + + @property + def has_only_points(self) -> bool: + return len(self.intervals) == 0 and len(self.points) > 0 + + def _as_index(self) -> tuple: + if self.has_only_intervals: + return tuple(self.intervals) + if self.has_only_points: + return tuple(self.points) + return None + + def __init__(self, *groups: Group, name: str = None): + self.groups = list(groups) + self.__name = name + + @property + def name(self): + return self.__name if self.__name else "No Name" + + @name.setter + def name(self, name: str): + self.__name = name + + def __repr__(self): + if len(self.groups) == 1: + return repr(self.groups[0]) + else: + res = '' + for g in self.groups: + res += f'\nGroup {g}\n' + res += repr(g) + return res + + def __and__(self, other): + if isinstance(other, Timeline): + groups = [] + groups += self.groups + groups += other.groups + group_names = [g.name for g in groups] + if len(set(group_names)) != len(group_names): + raise NameError('Cannot join Timelines with groups with the same names.') + return Timeline(*groups, name = self.name + " and " + other.name) + + @property + def initial_datetime(self) -> datetime: + return min([g.initial_datetime for g in self.groups]) + + @property + def final_datetime(self) -> datetime: + return max([g.final_datetime for g in self.groups]) + + @property + def has_single_group(self) -> bool: + return len(self.groups) == 1 + + @property + def single_group(self) -> Group: + return self.groups[0] if self.has_single_group else None + + @property + def duration(self) -> timedelta: + if len(self.groups) == 1: + return self.groups[0].duration + else: + return NotImplementedError() + + @property + def is_index(self) -> bool: + """ + Returns whether or not this can serve as an index to a Biosignal. + A Timeline can be an index when: + - It only contains one interval or a union of intervals (serves as a subdomain) + - It only contains one point or a set of points (serves as set of objects) + """ + return len(self.groups) == 1 and (self.groups[0].has_only_intervals ^ self.groups[0].has_only_points) + + def _as_index(self) -> tuple | None: + if self.is_index: + return self.groups[0]._as_index() + + def plot(self, show:bool=True, save_to:str=None): + fig = plt.figure(figsize=(len(self.groups)*10, len(self.groups)*2)) + ax = plt.gca() + legend_elements = [] + + cmap = cm.get_cmap('tab20b') + for y, g in enumerate(self.groups): + color = g.color_hex + if color is None: + color = cmap(y/len(self.groups)) + + for interval in g.intervals: + start = date2num(interval.start_datetime) + end = date2num(interval.end_datetime) + rect = Rectangle((start, y + 0.4), end - start, 0.4, facecolor=color, alpha=0.5) + ax.add_patch(rect) + + for point in g.points: + ax.scatter(date2num(point), y + 0.95, color=color, alpha=0.5, marker = 'o', markersize=10) + + if len(self.groups) > 1: + legend_elements.append(Line2D([0], [0], marker='o', color=color, label=g.name, markerfacecolor='g', markersize=10)) + + ax.set_xlim(date2num(self.initial_datetime), date2num(self.final_datetime)) + ax.set_ylim(0, len(self.groups)) + ax.get_yaxis().set_visible(False) + for pos in ['right', 'top', 'left']: + plt.gca().spines[pos].set_visible(False) + ax.xaxis_date() + fig.autofmt_xdate() + + if len(self.groups) > 1: + ax.legend(handles=legend_elements, loc='center') + + if self.name: + fig.suptitle(self.name, fontsize=11) + fig.tight_layout() + if save_to is not None: + fig.savefig(save_to) + plt.show() if show else plt.close() + + def _repr_png_(self): + self.plot() + + @classmethod + def union(cls, *timelines): + # Check input + if not all(isinstance(tl, Timeline) for tl in timelines): + raise TypeError("Give objects Timeline to Timeline.union.") + if len(timelines) < 2: + raise ValueError("Give at least 2 Timelines to compute their union.") + + # Get sets of intervals of each Timeline + tl_intervals = [] + for i, tl in enumerate(timelines): + if tl.has_single_group and tl.single_group.has_only_intervals: + tl_intervals.append(tl.single_group.intervals) + else: + raise AssertionError(f"The {i+1}th Timeline does not have a single group with only intervals.") + + # Binary function + def union_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): + intervals = intervals1 + intervals2 + intervals.sort(key=lambda x: x.start_datetime) + union = [intervals[0]] + for i in range(1, len(intervals)): + if union[-1].end_datetime >= intervals[i].start_datetime: + union[-1].set_end_datetime(max(union[-1].end_datetime, intervals[i].end_datetime)) + else: + union.append(intervals[i]) + return union + + res_intervals = reduce(union_of_two_timelines, tl_intervals) + return Timeline(Timeline.Group(res_intervals), name=f"Union of " + ', '.join(tl.name for tl in timelines)) + + @classmethod + def intersection(cls, *timelines): + # Check input + if not all(isinstance(tl, Timeline) for tl in timelines): + raise TypeError("Give objects Timeline to Timeline.union.") + if len(timelines) < 2: + raise ValueError("Give at least 2 Timelines to compute their union.") + + # Get sets of intervals of each Timeline + tl_intervals = [] + for i, tl in enumerate(timelines): + if tl.has_single_group and tl.single_group.has_only_intervals: + tl_intervals.append(tl.single_group.intervals) + else: + raise AssertionError(f"The {i + 1}th Timeline does not have a single group with only intervals.") + + # Binary function + def intersection_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): + intervals1.sort(key=lambda x: x.start) + intervals2.sort(key=lambda x: x.start) + + intersection = [] + i, j = 0, 0 + while i < len(intervals1) and j < len(intervals2): + if intervals1[i].end_datetime <= intervals2[j].start_datetime: + i += 1 + elif intervals2[j].end_datetime <= intervals1[i].start_datetime: + j += 1 + else: + start = max(intervals1[i].start_datetime, intervals2[j].start_datetime) + end = min(intervals1[i].end_datetime, intervals2[j].end_datetime) + intersection.append(DateTimeRange(start, end)) + if intervals1[i].end_datetime <= intervals2[j].end_datetime: + i += 1 + else: + j += 1 + + return intersection + + res_intervals = reduce(intersection_of_two_timelines, tl_intervals) + return Timeline(Timeline.Group(res_intervals), name=f"Intersection of " + ', '.join(tl.name for tl in timelines)) + + EXTENSION = '.timeline' + + def save(self, save_to: str): + # Check extension + if not save_to.endswith(Timeline.EXTENSION): + save_to += Biosignal.EXTENSION + # Write + from _pickle import dump + with open(save_to, 'wb') as f: + dump(self, f) + + @classmethod + def load(cls, filepath: str): + # Check extension + if not filepath.endswith(Timeline.EXTENSION): + raise IOError("Only .timeline files are allowed.") + + # Read + from _pickle import load + with open(filepath, 'rb') as f: + timeline = load(f) + return timeline -#from ltbio.processing.filters.Filter import Filter class Timeseries(): """ @@ -306,18 +598,25 @@ def __contains__(self, item): # Operand 'in' === belongs to # A Segment contains other Segment if its start is less than the other's and its end is greater than the other's. return self.initial_datetime < item.initial_datetime and self.final_datetime > item.final_datetime - def __getitem__(self, position): + def __getitem__(self, item): '''The built-in slicing and indexing (segment[x:y]) operations.''' - if isinstance(position, (int, tuple)): - return self.__samples[position] - elif isinstance(position, slice): - if position.start is None: + if isinstance(item, tuple): + return [self[k] for k in item] + if isinstance(item, int): + return self.__samples[item] + elif isinstance(item, slice): + if item.start is None: new_initial_datetime = self.__initial_datetime else: new_initial_datetime = self.__initial_datetime + timedelta( - seconds=position.start / self.__sampling_frequency.value) - return self._new(samples=self.__samples[position], initial_datetime=new_initial_datetime, - raw_samples=self.__raw_samples[position]) + seconds=item.start / self.__sampling_frequency.value) + return self._new(samples=self.__samples[item], initial_datetime=new_initial_datetime, + raw_samples=self.__raw_samples[item]) + + def sliding_window(self, window_length: int): + assert window_length > 0 + for i in range(0, len(self.__samples), window_length): + yield self.__samples[i: i + window_length] # =================================== # Amplitude methods @@ -381,7 +680,8 @@ def adjacent(self, other): """Returns True if the Segments' start or end touch.""" return self.final_datetime == other.initial_datetime or self.initial_datetime == other.final_datetime - def __when(self, condition): + @staticmethod + def __when(condition): intervals = [] true_interval = False start, end = None, None @@ -547,6 +847,22 @@ def _partition(self, individual_length: int, overlap_length: int = 0): return res + @classmethod + def _merge(cls, *segments): + """ + It's assummed `segments` is timely ordered and are all of the same sampling frequency. + """ + if len(segments) == 1: + return segments[0] + else: + try: + samples = concatenate([seg.samples for seg in segments]) + except Exception as e: + pass + initial_datetime = segments[0].initial_datetime + sampling_frequency = segments[0]._Segment__sampling_frequency + return Timeseries._Timeseries__Segment(samples, initial_datetime, sampling_frequency) + # =================================== # SERIALIZATION @@ -806,6 +1122,36 @@ def __contains__(self, item): def __getitem__(self, item): '''The built-in slicing and indexing ([x:y]) operations.''' + + if isinstance(item, tuple): + if isinstance(item[0], (datetime, str)): + res = list() + for timepoint in item: + if isinstance(timepoint, datetime): + res.append(self.__get_sample(timepoint)) + elif isinstance(timepoint, str): + res.append(self.__get_sample(to_datetime(timepoint))) + else: + raise IndexError("Index types not supported. Give a tuple of datetimes (can be in string format).") + return tuple(res) + + if isinstance(item[0], DateTimeRange): # This is not publicly documented. Only Biosignal sends a tuple of DateTimeRanges, when it is dealing with Timelines. + segments = [] + for i, interval in enumerate(item): + if i == 193: + pass + try: + x = self.__get_samples(interval.start_datetime, interval.end_datetime) + if x is None: + raise AssertionError(f"x is None for interval {interval}") + segments += x + print(f"Indexed interval {i}") + except IndexError: # one interval was outside of boundaries + pass # there's no problem + if len(segments) == 0: + raise IndexError("All intervals given are outside of the Timeseries domain.") + return self.__new(segments) + if isinstance(item, int): return self.__segments[item].samples @@ -818,28 +1164,14 @@ def __getitem__(self, item): if isinstance(item, slice): if item.step is not None: raise IndexError("Indexing with step is not allowed for Timeseries. Try resampling it first.") - initial = to_datetime(item.start) if isinstance(item.start, - str) else self.initial_datetime if item.start is None else item.start - final = to_datetime(item.stop) if isinstance(item.stop, - str) else self.final_datetime if item.stop is None else item.stop + initial = to_datetime(item.start) if isinstance(item.start, str) else self.initial_datetime if item.start is None else item.start + final = to_datetime(item.stop) if isinstance(item.stop, str) else self.final_datetime if item.stop is None else item.stop if isinstance(initial, datetime) and isinstance(final, datetime): return self.__new(segments=self.__get_samples(initial, final)) else: raise IndexError("Index types not supported. Give a slice of datetimes (can be in string format).") - if isinstance(item, tuple): - res = list() - for timepoint in item: - if isinstance(timepoint, datetime): - res.append(self.__get_sample(timepoint)) - elif isinstance(timepoint, str): - res.append(self.__get_sample(to_datetime(timepoint))) - else: - raise IndexError("Index types not supported. Give a tuple of datetimes (can be in string format).") - return tuple(res) - - if isinstance(item, - DateTimeRange): # This is not publicly documented. Only Biosignal sends DateTimeRanges, when it is dealing with Events. + if isinstance(item, DateTimeRange): # This is not publicly documented. Only Biosignal sends DateTimeRanges, when it is dealing with Events. # First, trim the start and end limits of the interval. start, end = None, None for subdomain in self.domain: # ordered subdomains @@ -1079,12 +1411,11 @@ def __get_sample(self, datetime: datetime) -> float: def __get_samples(self, initial_datetime: datetime, final_datetime: datetime) -> List[__Segment]: '''Returns the samples between the given initial and end datetimes.''' - self.__check_boundaries(initial_datetime) - self.__check_boundaries(final_datetime) + self.__check_boundaries(DateTimeRange(initial_datetime, final_datetime)) res_segments = [] for i in range(len(self.__segments)): # finding the first Segment segment = self.__segments[i] - if segment.initial_datetime <= initial_datetime <= segment.final_datetime: + if segment.initial_datetime <= initial_datetime <= segment.final_datetime or segment.initial_datetime <= final_datetime <= segment.final_datetime: if final_datetime <= segment.final_datetime: trimmed_segment = segment[int(( initial_datetime - segment.initial_datetime).total_seconds() * self.sampling_frequency):int( @@ -1107,25 +1438,43 @@ def __get_samples(self, initial_datetime: datetime, final_datetime: datetime) -> trimmed_segment = segment[:] res_segments.append(trimmed_segment) + return res_segments + def __check_boundaries(self, datetime_or_range: datetime | DateTimeRange) -> None: intersects = False if isinstance(datetime_or_range, datetime): - for subdomain in self.domain: - if datetime_or_range in subdomain: + if datetime_or_range < self.initial_datetime: + raise IndexError(f"Datetime given, {datetime_or_range}, is outside of Timeseries domain, which starts at {self.initial_datetime}.") + if datetime_or_range > self.final_datetime: + raise IndexError(f"Datetime given, {datetime_or_range}, is outside of Timeseries domain, which precisely ends at {self.final_datetime}.") + + domain = self.domain + for i, subdomain in enumerate(domain): + if datetime_or_range in subdomain: # success case intersects = True break - if not intersects: - raise IndexError( - f"Datetime given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") + if datetime_or_range < subdomain.start_datetime: # already passed + raise IndexError("Datetime given is outside of Timeseries domain. " + f"Timeseries is defined in [{domain[i-1].start_datetime}, {domain[i-1].end_datetime}[ " + f"and in [{subdomain.start_datetime}, {subdomain.end_datetime}[, " + f"but not at {datetime_or_range}.") elif isinstance(datetime_or_range, DateTimeRange): - for subdomain in self.domain: - if subdomain.is_intersection(datetime_or_range) and datetime_or_range.start_datetime != subdomain.end_datetime: + if datetime_or_range.end_datetime < self.initial_datetime: + raise IndexError(f"Interval given, {datetime_or_range}, is outside of Timeseries domain, which starts at {self.initial_datetime}.") + if datetime_or_range.start_datetime >= self.final_datetime: + raise IndexError(f"Interval given, {datetime_or_range}, is outside of Timeseries domain, which precisely ends at {self.final_datetime}.") + + domain = self.domain + for i, subdomain in enumerate(domain): + if subdomain.is_intersection(datetime_or_range) and datetime_or_range.start_datetime != subdomain.end_datetime: # success case intersects = True break - if not intersects: - raise IndexError( - f"Interval given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") + if datetime_or_range.end_datetime < subdomain.start_datetime: # already passed + raise IndexError("Interval given is outside of Timeseries domain. " + f"Timeseries is defined in [{domain[i-1].start_datetime}, {domain[i-1].end_datetime}[ " + f"and in [{subdomain.start_datetime}, {subdomain.end_datetime}[, " + f"but not in {datetime_or_range}.") def _indices_to_timepoints(self, indices: Sequence[Sequence[int]] | Sequence[Sequence[Sequence[int]]], by_segment=False) -> Sequence[datetime] | Sequence[Sequence[datetime]] | Sequence[DateTimeRange] | Sequence[Sequence[DateTimeRange]]: all_timepoints = [] @@ -1526,6 +1875,32 @@ def _partition(self, time_intervals:tuple[DateTimeRange]): def _delete_segments(self, selection_function: Callable[[ndarray], bool]): self.__segments = list(filter(lambda seg: selection_function(seg.samples), self.__segments)) + def _merge(self, time_intervals:tuple[DateTimeRange]): + res_segments = [] + begin_search = 0 + for t in time_intervals: + start, end = None, None + for i in range(begin_search, len(self.__segments)): + seg = self.__segments[i] + if seg.initial_datetime >= t.start_datetime: + if not start: + start = i + else: + pass + if seg.final_datetime > t.end_datetime: + if start: + end = i + to_merge = self.__segments[start: end + 1] + if len(to_merge) > 0: + res_segments.append(Timeseries.__Segment._merge(*to_merge)) + begin_search = end + 1 + break + else: + pass + + + + # =================================== # SERIALIZATION diff --git a/src/ltbio/biosignals/timeseries/Event.py b/src/ltbio/biosignals/timeseries/Event.py deleted file mode 100644 index a7d9af07..00000000 --- a/src/ltbio/biosignals/timeseries/Event.py +++ /dev/null @@ -1,156 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: Event -# Description: Class Event, which is a point in time with some meaning associated. - -# Contributors: João Saraiva -# Created: 26/06/2022 -# Last Updated: 11/07/2022 - -# =================================== - -from datetime import datetime, timedelta - -from datetimerange import DateTimeRange -from dateutil.parser import parse as to_datetime - - -class Event(): - - __SERIALVERSION: int = 1 - - def __init__(self, name:str, onset:datetime|str=None, offset:datetime|str=None): - if onset is None and offset is None: # at least one - raise AssertionError("At least an onset or an offset must be given to create an Event.") - self.__onset = to_datetime(onset) if isinstance(onset, str) else onset - self.__offset = to_datetime(offset) if isinstance(offset, str) else offset - if onset is not None and offset is not None and offset < onset: - raise AssertionError(f"In Event '{name}', the offset cannot come before the onset.") - self.__name = name - - @property - def has_onset(self) -> bool: - return self.__onset != None - - @property - def has_offset(self) -> bool: - return self.__offset != None - - @property - def onset(self) -> datetime: - if self.has_onset: - return self.__onset - else: - raise AttributeError(f"Event {self.name} has no onset.") - - @onset.setter - def onset(self, datetime: datetime): - self.__onset = datetime - - @property - def offset(self) -> datetime: - if self.has_offset: - return self.__offset - else: - raise AttributeError(f"Event {self.name} has no offset.") - - @offset.setter - def offset(self, datetime: datetime): - self.__offset = datetime - - @property - def duration(self) -> timedelta: - if self.__onset is None: - raise AttributeError(f"Event has no duration, only an {self.name} has no offset.") - if self.__offset is None: - raise AttributeError(f"Event has no duration, only an {self.name} has no onset.") - return self.__offset - self.__onset - - @property - def domain(self) -> DateTimeRange: - if self.__onset is None: - raise AttributeError(f"Event has no duration, only an {self.name} has no offset.") - if self.__offset is None: - raise AttributeError(f"Event has no duration, only an {self.name} has no onset.") - return DateTimeRange(self.__onset, self.__offset) - - @property - def name(self) -> str: - return self.__name - - def domain_with_padding(self, before: timedelta = timedelta(seconds=0), after: timedelta = timedelta(seconds=0)): - """ - The Event domain with before, after, or both paddings. Negative paddings go back in time; positive paddings go forward in time. - :param before: Padding before onset if defined, or offset otherwised. - :param after: Padding after offset if defined, or onset otherwised. - :return: DateTimeRange of the padded domain. - """ - - if not isinstance(before, timedelta) or not isinstance(after, timedelta): - raise TypeError('At least one padding (before or after) is necessary. Also, they should be timedelta objects.') - - # return: event [start, end[ - start = self.__onset if self.__onset is not None else self.__offset - end = self.__offset if self.__offset is not None else self.__onset - - # return: event [start + before, end + after[ - start, end = start + before, end + after - - return DateTimeRange(start, end) - - def __repr__(self): - if self.__offset is None: - return self.__name + ': Starts at ' + self.__onset.strftime("%d %b, %H:%M:%S") - elif self.__onset is None: - return self.__name + ': Ends at ' + self.__offset.strftime("%d %b, %H:%M:%S") - else: - return self.__name + ': [' + self.__onset.strftime("%d %b, %H:%M:%S") + '; ' + self.__offset.strftime("%d %b, %H:%M:%S") + ']' - - def __hash__(self): - return hash((self.__name, self.__onset, self.__offset)) - - def __eq__(self, other): - return self.__name == other.name and self.__onset == other._Event__onset and self.__offset == other._Event__offset - - def __ne__(self, other): - return not self == other - - def __lt__(self, other): # A Segment comes before other Segment if its end is less than the other's start. - after = other._Event__onset if other._Event__onset is not None else other._Event__offset - before = self.__offset if self.__offset is not None else self.__onset - return before < after - - def __le__(self, other): - return self < other or self == other - - def __gt__(self, other): - return not self < other - - def __ge__(self, other): - return self > other or self == other - - def __getstate__(self): - """ - 1: name (str) - 2: onset (datetime) - 3: offset (datetime) - 4: other... (dict) - """ - other_attributes = self.__dict__.copy() - del other_attributes['_Event__name'], other_attributes['_Event__onset'], other_attributes['_Event__offset'] - return (self.__SERIALVERSION, self.__name, self.__onset, self.__offset) if len(other_attributes) == 0 \ - else (self.__SERIALVERSION, self.__name, self.__onset, self.__offset, other_attributes) - - def __setstate__(self, state): - if state[0] == 1: - self.__name, self.__onset, self.__offset = state[1], state[2], state[3] - if len(state) == 5: - self.__dict__.update(state[4]) - else: - raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' - f'Supported versions: 1.') diff --git a/src/ltbio/biosignals/timeseries/Frequency.py b/src/ltbio/biosignals/timeseries/Frequency.py deleted file mode 100644 index d3759fa8..00000000 --- a/src/ltbio/biosignals/timeseries/Frequency.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: biosignals -# Module: Frequency -# Description: Class Frequency, a float representing frequencies in Hertz. - -# Contributors: João Saraiva -# Created: 22/07/2022 - -# =================================== - -class Frequency(float): - - def __init__(self, value:float): - self.value = float(value) - - def __str__(self): - return str(self.value) + ' Hz' - - def __repr__(self): - return self.__str__() - - def __eq__(self, other): - if isinstance(other, float): - return other == self.value - elif isinstance(other, Frequency): - return other.value == self.value - - def __float__(self): - return self.value - - def __copy__(self): - return Frequency(self.value) diff --git a/src/ltbio/biosignals/timeseries/Timeline.py b/src/ltbio/biosignals/timeseries/Timeline.py deleted file mode 100644 index 088ad0cf..00000000 --- a/src/ltbio/biosignals/timeseries/Timeline.py +++ /dev/null @@ -1,286 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: src/ltbio/biosignals/timeseries -# Module: Timeline -# Description: - -# Contributors: João Saraiva -# Created: 08/02/2023 - -# =================================== -from datetime import datetime, timedelta -from functools import reduce -from typing import Sequence, List - -import matplotlib.pyplot as plt -from datetimerange import DateTimeRange -from matplotlib import cm -from matplotlib.dates import date2num -from matplotlib.lines import Line2D -from matplotlib.patches import Rectangle - - -class Timeline(): - - class Group(): - - def __init__(self, intervals: Sequence[DateTimeRange] = [], points: Sequence[datetime] = [], name: str = None, color_hex: str = None): - self.intervals = list(intervals) - self.points = list(points) - self.name = name - self.color_hex = color_hex - - def __repr__(self): - res = '' - if 0 < len(self.intervals): - if len(self.intervals) < 10: - res += ' U '.join(['[' + str(interval) + '[' for interval in self.intervals]) - else: - res += f'{len(self.intervals)} intervals with {self.duration} of total duration' - if 0 < len(self.points): - if len(self.points) < 10: - res += '\nand the following timepoints:\n' - res += ', '.join(['[' + str(point) + '[' for point in self.points]) - else: - res += f'\nand {len(self.points)} timepoints.\n' - return res - - @property - def initial_datetime(self) -> datetime: - return min([interval.start_datetime for interval in self.intervals] + self.points) - - @property - def final_datetime(self) -> datetime: - return max([interval.end_datetime for interval in self.intervals] + self.points) - - @property - def duration(self) -> timedelta: - return sum([interval.timedelta for interval in self.intervals], timedelta()) - - @property - def has_only_intervals(self) -> bool: - return len(self.intervals) > 0 and len(self.points) == 0 - - @property - def has_only_points(self) -> bool: - return len(self.intervals) == 0 and len(self.points) > 0 - - def _as_index(self) -> tuple: - if self.has_only_intervals: - return tuple(self.intervals) - if self.has_only_points: - return tuple(self.points) - return None - - def __init__(self, *groups: Group, name: str = None): - self.groups = list(groups) - self.__name = name - - @property - def name(self): - return self.__name if self.__name else "No Name" - - @name.setter - def name(self, name: str): - self.__name = name - - def __repr__(self): - if len(self.groups) == 1: - return repr(self.groups[0]) - else: - res = '' - for g in self.groups: - res += f'\nGroup {g}\n' - res += repr(g) - return res - - def __and__(self, other): - if isinstance(other, Timeline): - groups = [] - groups += self.groups - groups += other.groups - group_names = [g.name for g in groups] - if len(set(group_names)) != len(group_names): - raise NameError('Cannot join Timelines with groups with the same names.') - return Timeline(*groups, name = self.name + " and " + other.name) - - @property - def initial_datetime(self) -> datetime: - return min([g.initial_datetime for g in self.groups]) - - @property - def final_datetime(self) -> datetime: - return max([g.final_datetime for g in self.groups]) - - @property - def has_single_group(self) -> bool: - return len(self.groups) == 1 - - @property - def single_group(self) -> Group: - return self.groups[0] if self.has_single_group else None - - @property - def duration(self) -> timedelta: - if len(self.groups) == 1: - return self.groups[0].duration - else: - return NotImplementedError() - - @property - def is_index(self) -> bool: - """ - Returns whether or not this can serve as an index to a Biosignal. - A Timeline can be an index when: - - It only contains one interval or a union of intervals (serves as a subdomain) - - It only contains one point or a set of points (serves as set of objects) - """ - return len(self.groups) == 1 and (self.groups[0].has_only_intervals ^ self.groups[0].has_only_points) - - def _as_index(self) -> tuple | None: - if self.is_index: - return self.groups[0]._as_index() - - def plot(self, show:bool=True, save_to:str=None): - fig = plt.figure(figsize=(len(self.groups)*10, len(self.groups)*2)) - ax = plt.gca() - legend_elements = [] - - cmap = cm.get_cmap('tab20b') - for y, g in enumerate(self.groups): - color = g.color_hex - if color is None: - color = cmap(y/len(self.groups)) - - for interval in g.intervals: - start = date2num(interval.start_datetime) - end = date2num(interval.end_datetime) - rect = Rectangle((start, y + 0.4), end - start, 0.4, facecolor=color, alpha=0.5) - ax.add_patch(rect) - - for point in g.points: - ax.scatter(date2num(point), y + 0.95, color=color, alpha=0.5, marker = 'o', markersize=10) - - if len(self.groups) > 1: - legend_elements.append(Line2D([0], [0], marker='o', color=color, label=g.name, markerfacecolor='g', markersize=10)) - - ax.set_xlim(date2num(self.initial_datetime), date2num(self.final_datetime)) - ax.set_ylim(0, len(self.groups)) - ax.get_yaxis().set_visible(False) - for pos in ['right', 'top', 'left']: - plt.gca().spines[pos].set_visible(False) - ax.xaxis_date() - fig.autofmt_xdate() - - if len(self.groups) > 1: - ax.legend(handles=legend_elements, loc='center') - - if self.name: - fig.suptitle(self.name, fontsize=11) - fig.tight_layout() - if save_to is not None: - fig.savefig(save_to) - plt.show() if show else plt.close() - - def _repr_png_(self): - self.plot() - - @classmethod - def union(cls, *timelines): - # Check input - if not all(isinstance(tl, Timeline) for tl in timelines): - raise TypeError("Give objects Timeline to Timeline.union.") - if len(timelines) < 2: - raise ValueError("Give at least 2 Timelines to compute their union.") - - # Get sets of intervals of each Timeline - tl_intervals = [] - for i, tl in enumerate(timelines): - if tl.has_single_group and tl.single_group.has_only_intervals: - tl_intervals.append(tl.single_group.intervals) - else: - raise AssertionError(f"The {i+1}th Timeline does not have a single group with only intervals.") - - # Binary function - def union_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): - intervals = intervals1 + intervals2 - intervals.sort(key=lambda x: x.start_datetime) - union = [intervals[0]] - for i in range(1, len(intervals)): - if union[-1].end_datetime >= intervals[i].start_datetime: - union[-1].set_end_datetime(max(union[-1].end_datetime, intervals[i].end_datetime)) - else: - union.append(intervals[i]) - return union - - res_intervals = reduce(union_of_two_timelines, tl_intervals) - return Timeline(Timeline.Group(res_intervals), name=f"Union of " + ', '.join(tl.name for tl in timelines)) - - @classmethod - def intersection(cls, *timelines): - # Check input - if not all(isinstance(tl, Timeline) for tl in timelines): - raise TypeError("Give objects Timeline to Timeline.union.") - if len(timelines) < 2: - raise ValueError("Give at least 2 Timelines to compute their union.") - - # Get sets of intervals of each Timeline - tl_intervals = [] - for i, tl in enumerate(timelines): - if tl.has_single_group and tl.single_group.has_only_intervals: - tl_intervals.append(tl.single_group.intervals) - else: - raise AssertionError(f"The {i + 1}th Timeline does not have a single group with only intervals.") - - # Binary function - def intersection_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): - intervals1.sort(key=lambda x: x.start) - intervals2.sort(key=lambda x: x.start) - - intersection = [] - i, j = 0, 0 - while i < len(intervals1) and j < len(intervals2): - if intervals1[i].end_datetime <= intervals2[j].start_datetime: - i += 1 - elif intervals2[j].end_datetime <= intervals1[i].start_datetime: - j += 1 - else: - start = max(intervals1[i].start_datetime, intervals2[j].start_datetime) - end = min(intervals1[i].end_datetime, intervals2[j].end_datetime) - intersection.append(DateTimeRange(start, end)) - if intervals1[i].end_datetime <= intervals2[j].end_datetime: - i += 1 - else: - j += 1 - - return intersection - - res_intervals = reduce(intersection_of_two_timelines, tl_intervals) - return Timeline(Timeline.Group(res_intervals), name=f"Intersection of " + ', '.join(tl.name for tl in timelines)) - - EXTENSION = '.timeline' - - def save(self, save_to: str): - # Check extension - if not save_to.endswith(Timeline.EXTENSION): - save_to += Biosignal.EXTENSION - # Write - from _pickle import dump - with open(save_to, 'wb') as f: - dump(self, f) - - @classmethod - def load(cls, filepath: str): - # Check extension - if not filepath.endswith(Timeline.EXTENSION): - raise IOError("Only .timeline files are allowed.") - - # Read - from _pickle import load - with open(filepath, 'rb') as f: - timeline = load(f) - return timeline diff --git a/src/ltbio/biosignals/timeseries/__init__.py b/src/ltbio/biosignals/timeseries/__init__.py deleted file mode 100644 index 938c98f5..00000000 --- a/src/ltbio/biosignals/timeseries/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .Timeseries import Timeseries, OverlappingTimeseries -from .Event import Event -from .Frequency import Frequency diff --git a/src/ltbio/biosignals/timeseries/Unit.py b/src/ltbio/biosignals/units.py similarity index 100% rename from src/ltbio/biosignals/timeseries/Unit.py rename to src/ltbio/biosignals/units.py From 6a5dba322f3a9f9fc4aeee81219c39ee76825ab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Mon, 27 Mar 2023 11:36:04 +0100 Subject: [PATCH 22/47] Add BOXEN option --- src/ltbio/processing/filters/TimeDomainFilter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ltbio/processing/filters/TimeDomainFilter.py b/src/ltbio/processing/filters/TimeDomainFilter.py index 321960f1..4e9fd627 100644 --- a/src/ltbio/processing/filters/TimeDomainFilter.py +++ b/src/ltbio/processing/filters/TimeDomainFilter.py @@ -31,6 +31,7 @@ class ConvolutionOperation(str, Enum): PARZEN = 'Parzen' KAISER = 'Kaiser' GAUSSIAN = 'Gaussian' + BOXZEN = 'Boxzen' class TimeDomainFilter(Filter): From 500a40c6d18f343666d91a186505d6b421527e27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saraiva?= Date: Mon, 27 Mar 2023 11:36:23 +0100 Subject: [PATCH 23/47] Update docs and changelog --- docs/changelog/pythonversions/index.md | 3 +-- docs/changelog/serialversions/Biosignal.md | 2 +- .../serialversions/BiosignalSource.md | 6 +++--- docs/changelog/serialversions/index.rst | 4 ++-- docs/learn/basic/ltbio101.md | 18 +++++++++--------- docs/learn/basic/properties.md | 4 ++-- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/changelog/pythonversions/index.md b/docs/changelog/pythonversions/index.md index b222cae2..ac18c3b0 100644 --- a/docs/changelog/pythonversions/index.md +++ b/docs/changelog/pythonversions/index.md @@ -79,11 +79,10 @@ _Released: 31-05-2022 | Created: 01-02-2022 | **Not Public**_ ### Added -* Abstract classes `Biosignal` and `BiosignalSource`, and some concrete implementations in the sub-packages +* Abstract classes `Biosignal` and `__BiosignalSource`, and some concrete implementations in the sub-packages `modalities` and `sources`, respectively. * Classes `Timeseries`, `Segment`, `Unit`, `Event`. * Packages `clinical`: classes `Patient`, `BodyLocation`, `MedicalCondition`, `Medication`, and `SurgicalProcedure`. * Package `processing`: classes `Segmenter` and `Filter`. * Package `features`: classes `FeatureExtractor` and `FeatureSelector`. * Package `ml`: classes `SupervisedModel`, `SupervisedTrainConditions`, `SurpervisingTrainer`, `SupervisedTrainResults`. - diff --git a/docs/changelog/serialversions/Biosignal.md b/docs/changelog/serialversions/Biosignal.md index 8aa39595..531c0da0 100644 --- a/docs/changelog/serialversions/Biosignal.md +++ b/docs/changelog/serialversions/Biosignal.md @@ -10,7 +10,7 @@ _Date Created: 01-06-2022_ * `SERIALVERSION` equals 1. * `name` is a `str` with the value of the biosignal's `__name` attribute. -* `source` is a `BiosignalSource` class, or the state of a `BiosignalSource` object, based on the value of the biosignal's `__source` attribute. +* `source` is a `__BiosignalSource` class, or the state of a `__BiosignalSource` object, based on the value of the biosignal's `__source` attribute. * `patient` is the state of the `Patient` referenced in the biosignal's `__patient` attribute. * `acquisition_location` is a `BodyLocation` with the value of the biosignal's `__acquisition_location` attribute. * `associated_events` is a tuple of the states of all `Event`s' referenced in the biosignal's `__associated_events` attribute. diff --git a/docs/changelog/serialversions/BiosignalSource.md b/docs/changelog/serialversions/BiosignalSource.md index e1a57698..873c90e6 100644 --- a/docs/changelog/serialversions/BiosignalSource.md +++ b/docs/changelog/serialversions/BiosignalSource.md @@ -1,6 +1,6 @@ -# BiosignalSource +# __BiosignalSource -`BiosignalSource` is usually not instantiated as an object, so there are no states to serialize. +`__BiosignalSource` is usually not instantiated as an object, so there are no states to serialize. However, there are some sources that are instantiated, e.g., `Sense`, `Bitalino`. In these cases, the following serial versions apply. ## Serial Version 1 @@ -12,5 +12,5 @@ _Date Created: 01-06-2022_ ``` * `SERIALVERSION` equals 1. -* `others` is a dictionary of properties an instantiated `BiosignalSource` object may have. +* `others` is a dictionary of properties an instantiated `__BiosignalSource` object may have. diff --git a/docs/changelog/serialversions/index.rst b/docs/changelog/serialversions/index.rst index f39e0630..5ff8930e 100644 --- a/docs/changelog/serialversions/index.rst +++ b/docs/changelog/serialversions/index.rst @@ -8,7 +8,7 @@ Below you can check the current and past versions of the .biosignal files (first +------------------------------+------------+------------+------------+---------------------+ | :code:`Biosignal` | 1 | 1 | 1 | **2** | +------------------------------+------------+------------+------------+---------------------+ -| :code:`BiosignalSource` | 1 | 1 | 1 | 1 | +| :code:`__BiosignalSource` | 1 | 1 | 1 | 1 | +------------------------------+------------+------------+------------+---------------------+ | :code:`Timeseries` | 1 | **2** | 2 | 2 | +------------------------------+------------+------------+------------+---------------------+ @@ -37,7 +37,7 @@ Any Biosignal and associated objects are stateful, so that they can be serialize :maxdepth: 1 Biosignal - BiosignalSource + __BiosignalSource .. tip:: How this structure is created can be inspected in more detail in the methods :code:`__getstate__` and :code:`__setstate__` of each of these class. diff --git a/docs/learn/basic/ltbio101.md b/docs/learn/basic/ltbio101.md index e498a416..d17d3ed0 100644 --- a/docs/learn/basic/ltbio101.md +++ b/docs/learn/basic/ltbio101.md @@ -35,16 +35,16 @@ For example, to instantiate an `ECG` from a file, one could use the following in ecg = ECG('pathToFile', HSM, name='My First Biosignal') ``` -In the above example, `HSM` is the `BiosignalSource` representing "Hospital de Santa Maria" 🏥, a hospital in Lisbon, Portugal. +In the above example, `HSM` is the `__BiosignalSource` representing "Hospital de Santa Maria" 🏥, a hospital in Lisbon, Portugal. This class _knows_ how to read biosignals from EDF files collected at HSM. -As depicted in the above Figure, there can be as many `BiosignalSource` subclasses as the user needs. The ones you see are just examples, +As depicted in the above Figure, there can be as many `__BiosignalSource` subclasses as the user needs. The ones you see are just examples, like [ScientISST](scientisst.com/sense) and [BITalino](https://www.pluxbiosignals.com/collections/bitalino) devices, public databases like -MITDB and Seer, and many others. A `BiosignalSource` is an entity with knowledge about where (devices, hospitals, databases, etc.) and how +MITDB and Seer, and many others. A `__BiosignalSource` is an entity with knowledge about where (devices, hospitals, databases, etc.) and how biosignals are acquired. It has static procedures to ease the reading of biosignals from files of that source, and the respective patient metadata, clinical records, and event annotations. These have their own classes as well, as shall be described ahead. -Other sources can be easily implemented by deriving `BiosignalSource`. This scalable property is of vital importance, since biosignal +Other sources can be easily implemented by deriving `__BiosignalSource`. This scalable property is of vital importance, since biosignal researchers get data from a large variety of sources that increases by the day. Hence, you have the possibility of working with data from -new sources only by creating your own `BiosignalSource`, therefore personalising the framework to your research needs. +new sources only by creating your own `__BiosignalSource`, therefore personalising the framework to your research needs. ------------ @@ -58,7 +58,7 @@ _Biosignals_ to modify themselves, without you having to remember their metadata **The Concept as a Class:** A `Biosignal` object is a non-empty set of channels measuring one biological or physiological variable. Each channel is represented by a `Timeseries` object (see UML above). Optionally, it may also have an associated `Patient`, an associated -`BodyLocation`, one ore more associated `Event`s, an associated `BiosignalSource`, and a name. All this metadata is introduced later; for now it's crucial you understand how +`BodyLocation`, one ore more associated `Event`s, an associated `__BiosignalSource`, and a name. All this metadata is introduced later; for now it's crucial you understand how channels and samples are organised. ------------ @@ -96,7 +96,7 @@ endevours. If you want to have it all in one place regarding one biosignal (and with more properties: * 🤕 **Patient**: An object `Patient` where you can drop any information regarding the -patient that can be useful to process the biosignal. If reading from a file or fetching a database, `BiosignalSource` might fill this property +patient that can be useful to process the biosignal. If reading from a file or fetching a database, `__BiosignalSource` might fill this property automatically for you. * 🧍‍♀️ **Location**: An object `BodyLocation` to remember where the sensors were placed. @@ -128,7 +128,7 @@ You have three ways 🤯 of instantiating a `Biosignal`. ### Way 1: Instantiate from files -Give the path to the directory where the files are located and specify the source (`BiosignalSource`) from where the files come from: +Give the path to the directory where the files are located and specify the source (`__BiosignalSource`) from where the files come from: ``` biosignal = ECG("path_to_files", HSM) ``` @@ -139,7 +139,7 @@ you'll want to create your own source. ### Way 2: Instantiate from a Database [comming 🔜] -Give the patient code, the source (`BiosignalSource`) corresponding to a database, and the interval of time (in tuple) you want to fetch from the database: +Give the patient code, the source (`__BiosignalSource`) corresponding to a database, and the interval of time (in tuple) you want to fetch from the database: ``` biosignal = ECG(patient_code=101, source=HSM, ('2022-01-01 16:00', '2022-01-01 17:30')) ``` diff --git a/docs/learn/basic/properties.md b/docs/learn/basic/properties.md index 8e9ed002..5703e202 100644 --- a/docs/learn/basic/properties.md +++ b/docs/learn/basic/properties.md @@ -1,7 +1,7 @@ # Enrich your Biosignals You have noticed by now that ``Biosignal`` objects gather all information about a biosignal in an holistic way, and not just the data -samples. But ``Biosignal`` objects will only hold the information you give or that their `BiosignalSource` deducted. +samples. But ``Biosignal`` objects will only hold the information you give or that their `__BiosignalSource` deducted. ## Print to see me @@ -145,7 +145,7 @@ You can **get** any of the following properties of a `Biosignal`: * `channel_names` returns a set with the channel labels (in `string` or `BodyLocation`). * `sampling_frequency` returns the the sampling frequency of every channel, if equal (in `float`). * `acquisition_location` returns the body location where the biosignal was acquired (in `BodyLocation`). -* `source` returns the source where the Biosignal was acquired: hospital, device, etc. (in `BiosignalSource`). +* `source` returns the source where the Biosignal was acquired: hospital, device, etc. (in `__BiosignalSource`). * `patient_code` returns the code of the patient whose the biosignal belongs (in `int` or `string`). * `type` returns the biosignal modality (in any `Biosignal` subclass). * `initial_datetime` returns the initial datetime of the channel that starts the earliest (in `datetime`). From 7308e56c0381606ca28c823ba7808be7960e5323 Mon Sep 17 00:00:00 2001 From: saraiva Date: Wed, 7 Jun 2023 11:04:06 +0200 Subject: [PATCH 24/47] Remove processing, features, ml, decision and pipeline packages --- src/ltbio/decision/BinaryDecision.py | 28 -- src/ltbio/decision/Decision.py | 26 -- src/ltbio/decision/DecisionMaker.py | 32 -- src/ltbio/decision/NAryDecision.py | 27 -- src/ltbio/decision/__init__.py | 4 - src/ltbio/features/FeatureExtractor.py | 53 --- src/ltbio/features/FeatureSelector.py | 44 -- src/ltbio/features/Features.py | 82 ---- src/ltbio/features/__init__.py | 5 - src/ltbio/ml/__init__.py | 0 src/ltbio/ml/datasets/BiosignalDataset.py | 251 ---------- .../ml/datasets/EventDetectionDataset.py | 230 --------- .../ml/datasets/SegmentToSegmentDataset.py | 136 ------ src/ltbio/ml/datasets/ValueToValueDataset.py | 105 ----- src/ltbio/ml/datasets/__init__.py | 3 - src/ltbio/ml/datasets/augmentation.py | 163 ------- src/ltbio/ml/metrics.py | 210 --------- .../supervised/SupervisedTrainConditions.py | 266 ----------- src/ltbio/ml/supervised/SupervisingTrainer.py | 117 ----- .../supervised/SupervisingTrainerReporter.py | 107 ----- src/ltbio/ml/supervised/__init__.py | 3 - .../ml/supervised/models/SkLearnModel.py | 155 ------ .../ml/supervised/models/SupervisedModel.py | 153 ------ src/ltbio/ml/supervised/models/TorchModel.py | 307 ------------ src/ltbio/ml/supervised/models/__init__.py | 3 - src/ltbio/ml/supervised/results.py | 56 --- src/ltbio/pipeline/GoTo.py | 27 -- src/ltbio/pipeline/Input.py | 32 -- src/ltbio/pipeline/Packet.py | 227 --------- src/ltbio/pipeline/Pipeline.py | 175 ------- src/ltbio/pipeline/PipelineUnit.py | 440 ------------------ src/ltbio/pipeline/__init__.py | 5 - src/ltbio/pipeline/reports.py | 142 ------ src/ltbio/processing/__init__.py | 1 - src/ltbio/processing/filters/Filter.py | 61 --- .../filters/FrequencyDomainFilter.py | 131 ------ .../processing/filters/TimeDomainFilter.py | 73 --- src/ltbio/processing/filters/__init__.py | 3 - src/ltbio/processing/formaters/Normalizer.py | 63 --- src/ltbio/processing/formaters/Segmenter.py | 70 --- src/ltbio/processing/formaters/__init__.py | 3 - src/ltbio/processing/noises/GaussianNoise.py | 34 -- src/ltbio/processing/noises/Noise.py | 140 ------ src/ltbio/processing/noises/__init__.py | 0 44 files changed, 4193 deletions(-) delete mode 100644 src/ltbio/decision/BinaryDecision.py delete mode 100644 src/ltbio/decision/Decision.py delete mode 100644 src/ltbio/decision/DecisionMaker.py delete mode 100644 src/ltbio/decision/NAryDecision.py delete mode 100644 src/ltbio/decision/__init__.py delete mode 100644 src/ltbio/features/FeatureExtractor.py delete mode 100644 src/ltbio/features/FeatureSelector.py delete mode 100644 src/ltbio/features/Features.py delete mode 100644 src/ltbio/features/__init__.py delete mode 100644 src/ltbio/ml/__init__.py delete mode 100644 src/ltbio/ml/datasets/BiosignalDataset.py delete mode 100644 src/ltbio/ml/datasets/EventDetectionDataset.py delete mode 100644 src/ltbio/ml/datasets/SegmentToSegmentDataset.py delete mode 100644 src/ltbio/ml/datasets/ValueToValueDataset.py delete mode 100644 src/ltbio/ml/datasets/__init__.py delete mode 100644 src/ltbio/ml/datasets/augmentation.py delete mode 100644 src/ltbio/ml/metrics.py delete mode 100644 src/ltbio/ml/supervised/SupervisedTrainConditions.py delete mode 100644 src/ltbio/ml/supervised/SupervisingTrainer.py delete mode 100644 src/ltbio/ml/supervised/SupervisingTrainerReporter.py delete mode 100644 src/ltbio/ml/supervised/__init__.py delete mode 100644 src/ltbio/ml/supervised/models/SkLearnModel.py delete mode 100644 src/ltbio/ml/supervised/models/SupervisedModel.py delete mode 100644 src/ltbio/ml/supervised/models/TorchModel.py delete mode 100644 src/ltbio/ml/supervised/models/__init__.py delete mode 100644 src/ltbio/ml/supervised/results.py delete mode 100644 src/ltbio/pipeline/GoTo.py delete mode 100644 src/ltbio/pipeline/Input.py delete mode 100644 src/ltbio/pipeline/Packet.py delete mode 100644 src/ltbio/pipeline/Pipeline.py delete mode 100644 src/ltbio/pipeline/PipelineUnit.py delete mode 100644 src/ltbio/pipeline/__init__.py delete mode 100644 src/ltbio/pipeline/reports.py delete mode 100644 src/ltbio/processing/__init__.py delete mode 100644 src/ltbio/processing/filters/Filter.py delete mode 100644 src/ltbio/processing/filters/FrequencyDomainFilter.py delete mode 100644 src/ltbio/processing/filters/TimeDomainFilter.py delete mode 100644 src/ltbio/processing/filters/__init__.py delete mode 100644 src/ltbio/processing/formaters/Normalizer.py delete mode 100644 src/ltbio/processing/formaters/Segmenter.py delete mode 100644 src/ltbio/processing/formaters/__init__.py delete mode 100644 src/ltbio/processing/noises/GaussianNoise.py delete mode 100644 src/ltbio/processing/noises/Noise.py delete mode 100644 src/ltbio/processing/noises/__init__.py diff --git a/src/ltbio/decision/BinaryDecision.py b/src/ltbio/decision/BinaryDecision.py deleted file mode 100644 index 77a23d9a..00000000 --- a/src/ltbio/decision/BinaryDecision.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: decision -# Module: BinaryDecision -# Description: Class BinaryDecision, a type of Decision that returns a boolean on 'evaluate'. - -# Contributors: João Saraiva -# Created: 10/06/2022 - -# =================================== - -from typing import Callable - -from ltbio.biosignals import Timeseries -from ltbio.decision.Decision import Decision - - -class BinaryDecision(Decision): - - def __init__(self, decision_function:Callable[[Timeseries], bool], name=None): - super().__init__(decision_function, name) - - def evaluate(self, object:Timeseries) -> bool: - return self.decision_function(object) diff --git a/src/ltbio/decision/Decision.py b/src/ltbio/decision/Decision.py deleted file mode 100644 index 50ec3233..00000000 --- a/src/ltbio/decision/Decision.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: decision -# Module: Decision -# Description: Abstract class Decision, representing how decisions are made. - -# Contributors: João Saraiva -# Created: 10/06/2022 - -# =================================== -from abc import ABC, abstractmethod - - -class Decision(ABC): - - def __init__(self, decision_function, name:str): - self.decision_function = decision_function - self.name = name - - @abstractmethod - def evaluate(self, object): - pass diff --git a/src/ltbio/decision/DecisionMaker.py b/src/ltbio/decision/DecisionMaker.py deleted file mode 100644 index 19003c68..00000000 --- a/src/ltbio/decision/DecisionMaker.py +++ /dev/null @@ -1,32 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: decision -# Module: DecisionMaker -# Description: Class DecisionMaker, a type of PipelineUnit that makes decisions. - -# Contributors: João Saraiva -# Created: 10/06/2022 - -# =================================== -from ltbio.biosignals import Timeseries -from ltbio.decision.Decision import Decision -from ltbio.pipeline.PipelineUnit import SinglePipelineUnit - -class DecisionMaker(SinglePipelineUnit): - - PIPELINE_INPUT_LABELS = {'timeseries': 'timeseries'} - PIPELINE_OUTPUT_LABELS = {'_': 'decision'} - ART_PATH = 'resources/pipeline_media/decision_maker.png' - - def __init__(self, decision: Decision, name: str = None): - super().__init__(name) - self.__decision = decision - - def apply(self, timeseries: Timeseries): - return self.__decision.evaluate(timeseries) - - diff --git a/src/ltbio/decision/NAryDecision.py b/src/ltbio/decision/NAryDecision.py deleted file mode 100644 index 076193fb..00000000 --- a/src/ltbio/decision/NAryDecision.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: decision -# Module: NAryDecision -# Description: Class NAryDecision, a type of Decision that returns an integer value on 'evaluate'. - -# Contributors: João Saraiva -# Created: 10/06/2022 - -# =================================== -from typing import Callable - -from ltbio.biosignals import Timeseries -from ltbio.decision.Decision import Decision - - -class NAryDecision(Decision): - - def __init__(self, decision_function:Callable[[Timeseries], int], name=None): - super().__init__(decision_function, name) - - def evaluate(self, object:Timeseries) -> int: - return self.decision_function(object) diff --git a/src/ltbio/decision/__init__.py b/src/ltbio/decision/__init__.py deleted file mode 100644 index 83bcda4d..00000000 --- a/src/ltbio/decision/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Quick shortcuts to classes -from ltbio.decision.BinaryDecision import BinaryDecision -from ltbio.decision.NAryDecision import NAryDecision -from ltbio.decision.DecisionMaker import DecisionMaker diff --git a/src/ltbio/features/FeatureExtractor.py b/src/ltbio/features/FeatureExtractor.py deleted file mode 100644 index 3e86e7e2..00000000 --- a/src/ltbio/features/FeatureExtractor.py +++ /dev/null @@ -1,53 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: features -# Module: FeatureExtractor -# Description: Class FeatureExtractor, a type of PipelineUnit that extracts features from a Timeseries. - -# Contributors: João Saraiva -# Created: 03/06/2022 -# Last Updated: 22/07/2022 - -# =================================== - -from typing import Collection, Dict, Callable - -from ltbio.biosignals import Timeseries -from ltbio.biosignals.timeseries.Unit import Unitless -from ltbio.pipeline.PipelineUnit import SinglePipelineUnit - - -class FeatureExtractor(SinglePipelineUnit): - - PIPELINE_INPUT_LABELS = {'timeseries': 'timeseries'} - PIPELINE_OUTPUT_LABELS = {'features': 'timeseries'} - ART_PATH = 'resources/pipeline_media/feature_extractor.png' - - def __init__(self, feature_functions: Collection[Callable], name:str=None): - super().__init__(name) - self.__feature_functions = feature_functions - - def apply(self, timeseries:Timeseries) -> Dict[str, Timeseries]: - - if not timeseries.is_equally_segmented: # we're assuming all Segments have the same duration - raise AssertionError("Given Timeseries is not equally segmented.") - segment_duration = timeseries.segment_duration.total_seconds() - - features = {} - - for feature_function in self.__feature_functions: - extracted_values = timeseries._apply_operation_and_return(feature_function) - features[feature_function.__name__] = timeseries._new(segments_by_time = {timeseries.initial_datetime: extracted_values}, - sampling_frequency = 1/segment_duration, - units=Unitless(), - name = feature_function.__name__ + " - " + timeseries.name, - equally_segmented=True, - overlapping_segments=False, - rawsegments_by_time={timeseries.initial_datetime: extracted_values} - ) - - return features diff --git a/src/ltbio/features/FeatureSelector.py b/src/ltbio/features/FeatureSelector.py deleted file mode 100644 index 8f628b5d..00000000 --- a/src/ltbio/features/FeatureSelector.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: features -# Module: FeatureSelector -# Description: Class FeatureSelector, a type of PipelineUnit that selects features from collection of them. - -# Contributors: João Saraiva -# Created: 04/06/2022 -# Last Updated: 22/07/2022 - -# =================================== - -from typing import Callable, Dict - -from numpy import ndarray - -from ltbio.biosignals import Timeseries -from ltbio.pipeline.PipelineUnit import SinglePipelineUnit - - -class FeatureSelector(SinglePipelineUnit): - - PIPELINE_INPUT_LABELS = {'features': 'timeseries'} - PIPELINE_OUTPUT_LABELS = {'selected_features': 'timeseries'} - ART_PATH = 'resources/pipeline_media/feature_selector.png' - - def __init__(self, selection_function: Callable[[ndarray], bool], name:str=None): - super().__init__(name) - self.__selection_function = selection_function - - def apply(self, features:Dict[str, Timeseries]) -> Dict[str, Timeseries]: - assert isinstance(features, dict) - selected_features = {} - for feature_name in features: - ts = features[feature_name] - assert len(ts.segments) == 1 # Feature Timeseries should have only 1 Segment - if self.__selection_function(ts._to_array()[0]): - selected_features[feature_name] = ts - - return selected_features diff --git a/src/ltbio/features/Features.py b/src/ltbio/features/Features.py deleted file mode 100644 index f3fe7828..00000000 --- a/src/ltbio/features/Features.py +++ /dev/null @@ -1,82 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: features -# Module: Features -# Description: Static procedures to extract features from sequences of samples, organized by classes. - -# Contributors: João Saraiva -# Created: 03/06/2022 -# Last Updated: 22/07/2022 - -# =================================== - -from abc import ABC - -import numpy as np -from numpy import ndarray - -from ltbio.biosignals import Timeseries - - -class Features(): - """ - Class that stores extracted features of a Timeseries. - """ - - def __init__(self, original_timeseries:Timeseries=None): - self.__original_timeseries = original_timeseries - self.__features = dict() - - @property - def original_timeseries(self) -> Timeseries: - return self.__original_timeseries - - def __setitem__(self, key:str, value:Timeseries): - self.__features[key] = value - - def __getitem__(self, key:str): - return self.__features[key] - - def __iter__(self): - return self.__features.__iter__() - - def __len__(self): - return len(self.__features) - - def to_dict(self): - return self.__features - - -class TimeFeatures(ABC): - """ - Class with implementation of extraction of of several time features. - """ - - @staticmethod - def mean(segment:ndarray) -> float: - return np.mean(segment) - - @staticmethod - def variance(segment:ndarray) -> float: - return np.var(segment) - - @staticmethod - def deviation(segment:ndarray) -> float: - return np.std(segment) - - -class HRVFeatures(ABC): - - @staticmethod - def r_indices(segment:ndarray) -> float: - pass - - @staticmethod - def hr(segment:ndarray) -> float: - pass - - diff --git a/src/ltbio/features/__init__.py b/src/ltbio/features/__init__.py deleted file mode 100644 index 9de2a463..00000000 --- a/src/ltbio/features/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Quick shortcuts to classes -from ltbio.features.FeatureExtractor import FeatureExtractor -from ltbio.features.FeatureSelector import FeatureSelector -from ltbio.features.Features import TimeFeatures -from ltbio.features.Features import HRVFeatures diff --git a/src/ltbio/ml/__init__.py b/src/ltbio/ml/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/ltbio/ml/datasets/BiosignalDataset.py b/src/ltbio/ml/datasets/BiosignalDataset.py deleted file mode 100644 index 2ddedbb7..00000000 --- a/src/ltbio/ml/datasets/BiosignalDataset.py +++ /dev/null @@ -1,251 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: MLDataset -# Description: - -# Contributors: João Saraiva and code adapted from PyTorch Documentation -# Created: 03/08/2022 -# Last Updated: 05/08/2022 - -# =================================== -from abc import ABC -from typing import Sequence, Iterable, Collection - -import torch -from numpy import ndarray, concatenate, array -from torch import Generator, randperm -from torch.utils.data.dataset import Dataset, ConcatDataset, Subset -from matplotlib import pyplot as plt - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.ml.datasets.augmentation import DatasetAugmentationTechnique - - -class BiosignalDataset(Dataset, ABC): - """ - An abstract class representing a dataset of Biosignals. - All subclasses should store the ordered list of objects and targets, respectively, in `__objects` and `__targets`. - Also, subclasses have to overwrite `__getitem__`, supporting fetching an example for a given key. An example is a - pair (object, target). - """ - - def __init__(self, name: str = None): - self.__biosignals = {} - self.__objects = None - self.__targets = None - self.name = name - - def __getitem__(self, index) -> tuple[ndarray, ndarray]: - o = self.__objects[index] - t = self.__targets[index] - return o, t - - def __len__(self): - """The number of examples in the dataset.""" - return len(self.__objects) - - @property - def all_examples(self) -> list[tuple[ndarray, ndarray]]: - """All examples in the dataset.""" - # Pairs each object to its target - return [(o, t) for o, t in zip(self.__objects, self.__targets)] - - @property - def all_objects(self) -> ndarray: - """All objects in the dataset.""" - return self.__objects.copy() - - @property - def all_targets(self) -> ndarray: - """All targets in the dataset.""" - return self.__targets.copy() - - @property - def biosignals(self) -> dict[str, Biosignal]: - """The Biosignals from which the dataset was populated.""" - if len(self.__biosignals) != 0: - return self.__biosignals - else: - raise AttributeError("Dataset was not populated with Biosignals.") - - @property - def object_timeseries_names(self): - return self.__object_timeseries_names - - @property - def target_timeseries_names(self): - return self.__target_timeseries_names - - - def split(self, subsetA_size: int, subsetB_size: int, randomly: bool): - if subsetA_size + subsetB_size != len(self): - raise ValueError("Sum of sizes does not equal the length of the input dataset.") - - if randomly: - indices = randperm(subsetA_size + subsetB_size, generator=Generator().manual_seed(42)).tolist() - subsetA = BiosignalSubset(self, indices[:subsetA_size]) - subsetB = BiosignalSubset(self, indices[subsetA_size:]) - - else: - subsetA = BiosignalSubset(self, range(subsetA_size)) - subsetB = BiosignalSubset(self, range(subsetA_size, subsetA_size + subsetB_size)) - - return subsetA, subsetB - - def __add__(self, other: 'BiosignalDataset') -> 'CohortDataset': - return CohortDataset([self, other]) - - def augment(self, techniques:Collection[DatasetAugmentationTechnique], how_many_times=1, show_example=False): - initial_n_examples = len(self) - new_objects, new_targets = [], [] - - for i in range(how_many_times): - for technique in techniques: - for o in self.__objects: - if len(o.shape) == 1: - new_objects.append(technique._apply(o)) - else: - new_objects.append([technique._apply(seg) for seg in o]) - for t in self.__targets: - if isinstance(t, ndarray): - if len(t.shape) == 1: - new_targets.append(t.__copy__()) - else: - new_targets.append([seg.__copy__() for seg in t]) - else: - new_targets.append(t) - - self.__objects = concatenate((self.__objects, array(new_objects))) - self.__targets = concatenate((self.__targets, array(new_targets))) - - print(f"Dataset augmented from {initial_n_examples} to {len(self)} examples.") - - return initial_n_examples, len(self) - - def plot_example_object(self, number: int = None): - if number is None: - example = self[len(self) // 2] # middle example - else: - example = self[number] - - plt.figure() - for ts in example[0]: # get the object only - plt.plot(ts) - plt.show() - - def redimension_to(self, dimensions: int): - if len(self._BiosignalDataset__objects.shape) == 3: - if dimensions == 2: - self._BiosignalDataset__objects = self._BiosignalDataset__objects[:, None, :, :] - self._BiosignalDataset__targets = self._BiosignalDataset__targets[:, None, :, :] - if dimensions == 1: - self._BiosignalDataset__objects = self._BiosignalDataset__objects[:, 0, :, :] - self._BiosignalDataset__targets = self._BiosignalDataset__targets[:, 0, :, :] - if len(self._BiosignalDataset__objects.shape) == 2: - if dimensions == 2: - self._BiosignalDataset__objects = self._BiosignalDataset__objects[:, None, :] - #self._BiosignalDataset__targets = self._BiosignalDataset__targets[:, None, :, None] - else: - raise NotImplementedError() - - def transfer_to_device(self, device): - if device == 'cpu': - self._BiosignalDataset__objects = self._BiosignalDataset__objects.cpu().detach().numpy() - self._BiosignalDataset__targets = self._BiosignalDataset__targets.cpu().detach().numpy() - else: - self._BiosignalDataset__objects = torch.Tensor(self._BiosignalDataset__objects).to(device=device, dtype=torch.float) - self._BiosignalDataset__targets = torch.Tensor(self._BiosignalDataset__targets).to(device=device, dtype=torch.float) - - def to_tensor(self): - self._BiosignalDataset__objects = torch.Tensor(self._BiosignalDataset__objects) - self._BiosignalDataset__targets = torch.Tensor(self._BiosignalDataset__targets).to(torch.long) - - def __repr__(self): - return f"Name: {self.name}" - - -class BiosignalSubset(Subset, BiosignalDataset): - - def __init__(self, dataset: BiosignalDataset, indices: Sequence[int]): - super().__init__(dataset=dataset, indices=indices) - self.name = dataset.name - self._BiosignalDataset__objects = dataset._BiosignalDataset__objects - self._BiosignalDataset__targets = dataset._BiosignalDataset__targets - - @property - def all_examples(self): - return tuple([self.dataset[i] for i in self.indices]) - - @property - def all_objects(self): - return tuple([self.dataset[i][0] for i in self.indices]) - - @property - def all_targets(self): - return tuple([self.dataset[i][1] for i in self.indices]) - - @property - def object_timeseries_names(self): - return self.dataset.object_timeseries_names - - @property - def target_timeseries_names(self): - return self.dataset.target_timeseries_names - - -class CohortDataset(ConcatDataset, BiosignalDataset): - - def __init__(self, datasets: Iterable[BiosignalDataset]): - super().__init__(datasets=datasets) - name = 'Cohort ' - try: - name += ', '.join([d.name for d in datasets]) - except TypeError: - try: - res = [] - for d in datasets: - common_patient_code = d.biosignals['object'][0].patient_code - if all([biosignal.patient_code == common_patient_code for biosignal in d.biosignals['object']]) and \ - all([biosignal.patient_code == common_patient_code for biosignal in d.biosignals['target']]): - res.append(common_patient_code) - name += ', '.join(res) - except AttributeError: - name = 'Cohort' - - self.name = name - - def __iter__(self): - return self.datasets.__iter__() - - @property - def all_examples(self): - return tuple([x for x in self]) - - @property - def all_objects(self): - return tuple([x[0] for x in self]) - - @property - def all_targets(self): - return tuple([x[1] for x in self]) - - @property - def object_timeseries_names(self): - return tuple([d.object_timeseries_names for d in self.datasets]) - - @property - def target_timeseries_names(self): - return tuple([d.target_timeseries_names for d in self.datasets]) - - def _get_output_biosignals(self, output_segments:tuple, res = [], i = 0) -> list[Biosignal]: - for d in self.datasets: - if isinstance(d, CohortDataset): - res.append(d._get_output_biosignals(output_segments[i:], res, i)) - else: - res.append(d._get_output_biosignals(output_segments[i:len(d)])) - i += len(d) diff --git a/src/ltbio/ml/datasets/EventDetectionDataset.py b/src/ltbio/ml/datasets/EventDetectionDataset.py deleted file mode 100644 index 03c8cccf..00000000 --- a/src/ltbio/ml/datasets/EventDetectionDataset.py +++ /dev/null @@ -1,230 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: EfficientDataset -# Description: - -# Contributors: João Saraiva -# Created: 03/09/2022 - -# =================================== -import logging -import random -from datetime import timedelta -from math import ceil -from typing import overload, Collection - -import matplotlib.pyplot as plt -import numpy as np -import torch -from datetimerange import DateTimeRange -from matplotlib.dates import DateFormatter -from numpy import array -from torch import Tensor -from torchvision.transforms import Compose - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.ml.datasets.BiosignalDataset import BiosignalDataset -from ltbio.ml.datasets.augmentation import DatasetAugmentationTechnique - - -class EventDetectionDataset(BiosignalDataset): - - @overload - def __init__(self, - *objects: Biosignal, - event_names: str | tuple[str], - paddings: tuple[timedelta| int | None] = (None, None), - ignore_margins: tuple[timedelta| int | None] = (None, None), - name: str = None): ... - - def __init__(self, *objects, event_names, paddings=(None, None), ignore_margins=(None, None), exclude_event: bool = False, name=None): - super().__init__(name) - - # Check objects - self._BiosignalDataset__biosignals = objects - if any(not isinstance(o, Biosignal) for o in objects) or len(objects) == 0: - raise TypeError("Parameter 'objects' must be one or multiple Biosignals.") - - # Check channel names - self._BiosignalDataset__object_timeseries_names = objects[0].channel_names - if len(objects) > 1: - for biosignal in objects: - if biosignal.channel_names != self._BiosignalDataset__object_timeseries_names: - raise AssertionError("The Biosignals given must have the same channel names.") - - # Check Event names - self.__event_names = event_names - if isinstance(event_names, str): - event_names = (event_names, ) - elif not (isinstance(event_names, (tuple, list)) and all(isinstance(x, str) for x in event_names)): - raise TypeError("Parameter 'event_names' must be one or multiple strings.") - - # Check paddings and ignore-margins - paddings, ignore_margins = list(paddings), list(ignore_margins) - for x in (paddings, ignore_margins): - for i in (0, 1): - if isinstance(x[i], int): - x[i] = timedelta(seconds=x[i]) - elif x[i] is None: - x[i] = timedelta(seconds=0) - elif not isinstance(x[i], timedelta): - raise TypeError(f'Paddings and ignore-margins must be timedeltas, or integer numbers of seconds, or None if inexistent.') - - # Assert channels are segmented in the same way - if isinstance(objects[0]._n_segments, dict): - raise AssertionError("Not all channels of the given Biosignal are segmented in the same way.") - - # Prepare time intervals of each example - positive_intervals = [] - self.positive_biosignals, self.negative_biosignals = [], [] - self.positive_boundaries, self.negative_boundaries = [], [] - biosignal = objects[0] # use just the first as reference; assuming all other have the same domain - self.onsets = [] - # Positive objects - for e in event_names: - event = biosignal.get_event(e) - self.onsets.append(event.onset) - interval_to_index = event.domain_with_padding(*paddings) - if exclude_event and event.has_onset and event.has_offset: - interval_to_index -= event.duration - p = biosignal[interval_to_index] - positive_intervals.append(interval_to_index) - self.positive_biosignals.append(p) - self.positive_boundaries.append(p._n_segments) - self.n_positive_examples = sum(self.positive_boundaries) - # Negative objects - for i in range(len(positive_intervals)): - if i == 0: - n = biosignal[: positive_intervals[i].start_datetime - ignore_margins[1]] - else: - start, end = positive_intervals[i - 1].end_datetime + ignore_margins[1], positive_intervals[i].start_datetime - ignore_margins[0] - if end < start: - print('>> Skipping negative chunk. <<') - break # don't index - n = biosignal[start:end] - self.negative_biosignals.append(n) - self.negative_boundaries.append(n._n_segments) - - # also, add segments from the last event until the end - n = biosignal[positive_intervals[-1].end_datetime + ignore_margins[0]:] - self.negative_biosignals.append(n) - self.negative_boundaries.append(n._n_segments) - self.n_negative_examples = sum(self.negative_boundaries) - - # Initially, the datset is not augmented. - self.augmentation_factor = 1 - - pass - # Shuffling is responsability of the user - - def __len__(self): - return self.n_positive_examples + self.n_negative_examples - - def __get_from_item(self, item, domain=False) -> tuple[Tensor, Tensor] | tuple[DateTimeRange, Tensor, bool]: - transform = False - - if item < self.n_positive_examples: - if self.augmentation_factor > 1 and self.class_to_augment == 1: - if item >= self.n_real_positive_examples: - transform = True - augment_iteration = item // self.n_real_positive_examples - item -= self.n_real_positive_examples * augment_iteration - for b, boundary in enumerate(self.positive_boundaries): - if b != 0: - item -= self.positive_boundaries[b - 1] - if item < boundary: - # print(f'Retriving example from + Biosignal {b}, block {item}') - o = self.positive_biosignals[b]._vblock(item) if not domain else self.positive_biosignals[b]._block_subdomain(item) - # print('Shape:', o.shape) - break - t = 1 - else: - item -= self.n_positive_examples - for b, boundary in enumerate(self.negative_boundaries): - if b != 0: - item -= self.negative_boundaries[b - 1] - if item < boundary: - # print(f'Retriving example from - Biosignal {b}, block {item}') - o = self.negative_biosignals[b]._vblock(item) if not domain else self.negative_biosignals[b]._block_subdomain(item) - # print('Shape:', o.shape) - break - t = 0 - - if transform and self.class_to_augment == t and not domain: - # print(f"Transformed in interation {augment_iteration}") - o = self.augmentation_techniques(o) - - - if not domain: - # Pass to MPS backend device - o = torch.tensor(o, dtype=torch.float32).to('mps', non_blocking=False) - t = torch.tensor(t, dtype=torch.long).to('mps', non_blocking=False) - return (o, t) - else: - return (o, t, transform) - - - def __getitem__(self, item): - """ - :param item: Integer index - :return: A pair (object, target) - """ - return self.__get_from_item(item) - - def __repr__(self): - res = self.name if self.name is not None else 'Untitled Event Detection Dataset' - res += f"\nNegative Examples: {self.n_negative_examples} ({int(self.n_negative_examples/len(self)*100)}%)" - res += f"\nPositive Examples: {self.n_positive_examples} ({int(self.n_positive_examples/len(self)*100)}%)" - res += f"\nTotal: {len(self)}" - return res - - def draw_timeline(self, precision:float): - fig = plt.figure(figsize=(18, 2)) - ax = plt.subplot() - for i in range(0, len(self), int(1/precision)): - domain, t, augmented = self.__get_from_item(i, domain=True) - plt.scatter(x=domain.end_datetime, y=0 if not augmented else random.random()+0.2, c='green' if t == 1 else 'red', marker='*', alpha=0.4) - date_form = DateFormatter("%d, %H:%M") - ax.xaxis.set_major_formatter(date_form) - plt.yticks((0, 0.5+0.2), ('Real', 'Augmented')) - - # Onsets with vertical lines - plt.vlines(self.onsets, ymin=0, ymax=1.2, colors='black') - - plt.show() - - @property - def class_weights(self) -> tuple[float, float]: - weight_0 = self.n_negative_examples/len(self) - weight_1 = self.n_positive_examples/len(self) - return weight_0, weight_1 - - def balance_with_augmentation(self, *techniques: DatasetAugmentationTechnique): - # Save for later, to aplly at indexing time - self.augmentation_techniques = Compose(techniques) - - self.n_real_examples = len(self) - - # Define which class has less examples - if self.n_positive_examples < self.n_negative_examples: - self.class_to_augment = 1 - self.augmentation_factor += ceil(self.n_negative_examples / self.n_positive_examples) - self.n_real_positive_examples = self.n_positive_examples - self.n_positive_examples *= self.augmentation_factor - else: - self.class_to_augment = 0 - self.augmentation_factor += ceil(self.n_positive_examples / self.n_negative_examples) - self.n_real_negative_examples = self.n_negative_examples - self.n_negative_examples *= self.augmentation_factor - - # They will not be absolutly 50-50%, but the balancing is most likely reasonable. - - print(f"Dataset augmented from {self.n_real_examples} to {len(self)} examples.") - print(f"Class weigths: {self.class_weights}") - - return self.n_real_examples, len(self) diff --git a/src/ltbio/ml/datasets/SegmentToSegmentDataset.py b/src/ltbio/ml/datasets/SegmentToSegmentDataset.py deleted file mode 100644 index 5d9df9af..00000000 --- a/src/ltbio/ml/datasets/SegmentToSegmentDataset.py +++ /dev/null @@ -1,136 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: SegmentToSegmentDataset -# Description: - -# Contributors: João Saraiva -# Created: 24/07/2022 -# Last Updated: 05/08/2022 - -# =================================== -from typing import Collection, overload - -import numpy as np - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.biosignals.timeseries.Timeseries import Timeseries -from ltbio.ml.datasets.BiosignalDataset import BiosignalDataset - - -class SegmentToSegmentDataset(BiosignalDataset): - - @overload - def __init__(self, object: Collection[Biosignal], target: Collection[Biosignal], name: str = None): ... - @overload - def __init__(self, object: Collection[Timeseries], target: Collection[Timeseries], name: str = None): ... - - def __init__(self, object, target, name: str = None): - super().__init__(name) - - # Check object types - if isinstance(object, Collection) and all(isinstance(x, Biosignal) for x in object): - self._BiosignalDataset__biosignals['object'] = object - res = [] - self._BiosignalDataset__object_timeseries_names = [] - for biosignal in object: - for channel_name, channel in biosignal: - res.append(channel) - self._BiosignalDataset__object_timeseries_names.append(channel_name) # Save the order of the channels, by their names - object = res - - elif isinstance(object, Collection) and all(isinstance(x, Timeseries) for x in object): - self._BiosignalDataset__object_timeseries_names = tuple([timeseries.name for timeseries in object]) # Save the order of the Timeseries, by their names - else: - raise ValueError("Parameter 'object' needs to be a collection of Biosignals.") - - # Check duplicate object names: - self._BiosignalDataset__object_timeseries_names = tuple(self._BiosignalDataset__object_timeseries_names) - if len(self._BiosignalDataset__object_timeseries_names) != len(set(self._BiosignalDataset__object_timeseries_names)): - raise AssertionError("Not all object Timeseries given have distinct names. Give a unique name for each Timeseries.") - - # Check target types - if isinstance(target, Collection) and all(isinstance(x, Biosignal) for x in target): - self._BiosignalDataset__biosignals['target'] = target - res = [] - self._BiosignalDataset__target_timeseries_names = [] - for biosignal in target: - for channel_name, channel in biosignal: - res.append(channel) - self._BiosignalDataset__target_timeseries_names.append(channel_name) # Save the order of the channels, by their names - target = res - - elif isinstance(target, Collection) and all(isinstance(x, Timeseries) for x in target): - self._BiosignalDataset__target_timeseries_names = tuple([timeseries.name for timeseries in target]) # Save the order of the Timeseries, by their names - else: - raise ValueError("Parameter 'target' needs to be a collection of Biosignals.") - - # Check duplicate target names: - self._BiosignalDataset__target_timeseries_names = tuple(self._BiosignalDataset__target_timeseries_names) - if len(self._BiosignalDataset__target_timeseries_names) != len(set(self._BiosignalDataset__target_timeseries_names)): - raise AssertionError("Not all target Timeseries given have distinct names. Give a unique name for each Timeseries.") - - # Assert not empty - if len(object) == 0: - raise AssertionError("Given object cannot be an empty Collection.") - if len(target) == 0: - raise AssertionError("Given target cannot be an empty Collection.") - - # Save references to target Timeseries for later reconstruction - self.__target_timeseries = target - - # Assert all Timeseries have the same domain - objects_domain = object[0].domain - if any([x.domain != objects_domain for x in object]) or any([x.domain != objects_domain for x in target]): - pass#raise AssertionError("All Timeseries must have the same domain in a SegmentToSegmentDataset.") - - # Assert all Object Timeseries have the same sampling frequency - objects_sampling_frequency = object[0].sampling_frequency - if any([x.sampling_frequency != objects_sampling_frequency for x in object]): - raise AssertionError("All object Timeseries must have the same sampling frequency in a SegmentToSegmentDataset.") - - # Assert all Target Timeseries have the same sampling frequency - targets_sampling_frequency = target[0].sampling_frequency - if any([x.sampling_frequency != targets_sampling_frequency for x in target]): - raise AssertionError("All target Timeseries must have the same sampling frequency in a SegmentToSegmentDataset.") - - # Gets samples from each Segment of each Timeseries. - object_all_segments = np.array([timeseries._to_array() for timeseries in object]) - target_all_segments = np.array([timeseries._to_array() for timeseries in target]) - - # VStacks the segments of all Timeseries. Each item is a sample to be fed to the model. - self._BiosignalDataset__objects = object_all_segments.swapaxes(0, 1) - self._BiosignalDataset__targets = target_all_segments.swapaxes(0, 1) - - def _get_output_timeseries(self, output_segments:tuple) -> list[Timeseries]: - output_segments = np.array(output_segments) - output_segments = output_segments.swapaxes(1, 0) - - new_timeseries = [] - for samples, timeseries in zip(output_segments, self.__target_timeseries): - new_timeseries.append(timeseries._new_samples(samples_by_segment=samples)) - - return new_timeseries - - def _get_output_biosignals(self, output_segments:tuple) -> list[Biosignal]: - new_timeseries = self._get_output_timeseries(output_segments) - new_biosignals = [] - - # Match to correspondent Biosignals - # Assuming they were vertically stacked by the order they were passed - i = 0 - for target_biosignal in self._BiosignalDataset__biosignals['target']: - new_channels = {} - for channel_name, _ in target_biosignal: - new_channels[channel_name] = new_timeseries[i] - i += 1 - new_biosignals.append(target_biosignal._new(timeseries=new_channels, name='Output '+ target_biosignal.name)) - - assert i == len(new_timeseries) # all Timeseries in 'new_timeseries' were used - - return new_biosignals - diff --git a/src/ltbio/ml/datasets/ValueToValueDataset.py b/src/ltbio/ml/datasets/ValueToValueDataset.py deleted file mode 100644 index 17d8cd49..00000000 --- a/src/ltbio/ml/datasets/ValueToValueDataset.py +++ /dev/null @@ -1,105 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: ValueToValueDataset -# Description: - -# Contributors: João Saraiva -# Created: 24/07/2022 -# Last Updated: 05/08/2022 - -# =================================== -from typing import Collection, overload - -import numpy as np - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.biosignals.timeseries.Timeseries import Timeseries -from ltbio.ml.datasets.BiosignalDataset import BiosignalDataset - - -class ValueToValueDataset(BiosignalDataset): - - @overload - def __init__(self, object: Collection[Biosignal], target: Collection[Biosignal], name: str = None): ... - @overload - def __init__(self, object: Collection[Timeseries], target: Collection[Timeseries], name: str = None): ... - - def __init__(self, object, target, name: str = None): - super().__init__(name) - - # Check object types - if isinstance(object, Collection) and all(isinstance(x, Biosignal) for x in object): - self._BiosignalDataset__biosignals['object'] = object - res = [] - self._BiosignalDataset__object_timeseries_names = [] - for biosignal in object: - for channel_name, channel in biosignal: - res.append(channel) - self._BiosignalDataset__object_timeseries_names.append(channel_name) # Save the order of the channels, by their names - object = res - - elif isinstance(object, Collection) and all(isinstance(x, Timeseries) for x in object): - self._BiosignalDataset__object_timeseries_names = tuple([timeseries.name for timeseries in object]) # Save the order of the Timeseries, by their names - else: - raise ValueError("Parameter 'object' needs to be a collection of Biosignals or TImeseries.") - - # Check duplicate object names: - self._BiosignalDataset__object_timeseries_names = tuple(self._BiosignalDataset__object_timeseries_names) - if len(self._BiosignalDataset__object_timeseries_names) != len(set(self._BiosignalDataset__object_timeseries_names)): - raise AssertionError("Not all object Timeseries given have distinct names. Give a unique name for each Timeseries.") - - # Check target types - if isinstance(target, Collection) and all(isinstance(x, Biosignal) for x in target): - self._BiosignalDataset__biosignals['target'] = target - res = [] - self._BiosignalDataset__target_timeseries_names = [] - for biosignal in target: - for channel_name, channel in biosignal: - res.append(channel) - self._BiosignalDataset__target_timeseries_names.append(channel_name) # Save the order of the channels, by their names - target = res - - elif isinstance(target, Collection) and all(isinstance(x, Timeseries) for x in target): - self._BiosignalDataset__target_timeseries_names = tuple([timeseries.name for timeseries in target]) # Save the order of the Timeseries, by their names - else: - raise ValueError("Parameter 'target' needs to be a collection of Biosignals or Timeseries.") - - # Check duplicate target names: - self._BiosignalDataset__target_timeseries_names = tuple(self._BiosignalDataset__target_timeseries_names) - if len(self._BiosignalDataset__target_timeseries_names) != len(set(self._BiosignalDataset__target_timeseries_names)): - raise AssertionError("Not all target Timeseries given have distinct names. Give a unique name for each Timeseries.") - - # Assert not empty - if len(object) == 0: - raise AssertionError("Given object cannot be an empty Collection.") - if len(target) == 0: - raise AssertionError("Given target cannot be an empty Collection.") - - # Assert all Timeseries have the same domain - objects_domain = object[0].domain - if any([x.domain != objects_domain for x in object]) or any([x.domain != objects_domain for x in target]): - raise AssertionError("All Timeseries must have the same domain in a SegmentToSegmentDataset.") - - # Assert all Object Timeseries have the same sampling frequency - objects_sampling_frequency = object[0].sampling_frequency - if any([x.sampling_frequency != objects_sampling_frequency for x in object]): - raise AssertionError("All object Timeseries must have the same sampling frequency in a SegmentToSegmentDataset.") - - # Assert all Target Timeseries have the same sampling frequency - targets_sampling_frequency = target[0].sampling_frequency - if any([x.sampling_frequency != targets_sampling_frequency for x in target]): - raise AssertionError("All target Timeseries must have the same sampling frequency in a SegmentToSegmentDataset.") - - # Gets samples from each Segment of each Timeseries. - object_all_segments = np.array([timeseries._to_array() for timeseries in object]) - target_all_segments = np.array([timeseries._to_array() for timeseries in target]) - - # VStacks the segments of all Timeseries. Each item is a sample to be fed to the model. - self._BiosignalDataset__objects = object_all_segments.swapaxes(0, 1) - self._BiosignalDataset__targets = target_all_segments.swapaxes(0, 1) - diff --git a/src/ltbio/ml/datasets/__init__.py b/src/ltbio/ml/datasets/__init__.py deleted file mode 100644 index e8e54bf4..00000000 --- a/src/ltbio/ml/datasets/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from ltbio.ml.datasets.SegmentToSegmentDataset import SegmentToSegmentDataset -from ltbio.ml.datasets.ValueToValueDataset import ValueToValueDataset -from ltbio.ml.datasets.EventDetectionDataset import EventDetectionDataset diff --git a/src/ltbio/ml/datasets/augmentation.py b/src/ltbio/ml/datasets/augmentation.py deleted file mode 100644 index 44938d58..00000000 --- a/src/ltbio/ml/datasets/augmentation.py +++ /dev/null @@ -1,163 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: augmentation -# Description: - -# Contributors: João Saraiva -# Created: 25/08/2022 - -# =================================== -import random -from abc import abstractmethod, ABC -from typing import Iterable - -from numpy import ndarray, array, roll, arange, sin, pi, linspace -from numpy.random import normal -from scipy.signal import square - -from ltbio.processing.filters import FrequencyDomainFilter, FrequencyResponse, BandType - - -class DatasetAugmentationTechnique(ABC): - def __init__(self, parameter): - self.parameter = parameter - - @abstractmethod - def _apply(self, example: ndarray): - pass - - def __call__(self, example): - """For PyTorch on-the-fly data augmentation.""" - return self._apply(example) - - -class Scale(DatasetAugmentationTechnique): - """ - Multiplies the signal by a random value between `minimum_magnitude` and 1. - Common values for `minimum_magnitude` are between [0.25, 1[. - """ - def __init__(self, magnitude): - super().__init__(magnitude) - - def _apply(self, example: ndarray): - return example * random.uniform(self.parameter, 1) - - -class Flip(DatasetAugmentationTechnique): - """ - Inverts the signal (* -1) with probability `probability`. - Values for `probability` must be between [0, 1]. - """ - def __init__(self, probability): - if 0 > probability > 1: - raise ValueError("Probabilty must be between 0 and 1.") - super().__init__(probability) - - def _apply(self, example: ndarray): - if random.random() < self.parameter: - return example * -1 - else: - return example - - -class Drop(DatasetAugmentationTechnique): - """ - Randomly makes missing samples (* 0) with probability `probability`. - Common values for `probability` are between [0, 0.4]. - Values for `probability` must be between [0, 1]. - """ - - def __init__(self, probability): - if 0 > probability > 1: - raise ValueError("Probabilty must be between 0 and 1.") - super().__init__(probability) - - def _apply(self, example: ndarray): - mask = array([0 if random.random() < self.parameter else 1 for i in range(len(example))]) - return example * mask - - -class Shift(DatasetAugmentationTechnique): - """ - Temporally shifts the signal by `displacement` * number of samples. - Direction (left or right) is chosen with equal probability. - Values for `displacement` must be between [0, 1]. - """ - - def __init__(self, displacement): - if 0 > displacement > 1: - raise ValueError("Displacement must be between 0 and 1, like a % porportion.") - super().__init__(displacement) - - def _apply(self, example: ndarray): - if random.random() < 0.5: # left - return roll(example, -int(self.parameter*len(example))) - else: # right - return roll(example, int(self.parameter*len(example))) - - -class Sine(DatasetAugmentationTechnique): - """ - Adds a sine curve to the signal with random frequency and amplitude `magnitude`. - Frequency is random between [0.001, 0.02]. - Common values for `magnitude` are between [0, 1]. - """ - - def __init__(self, magnitude): - super().__init__(magnitude) - - def _apply(self, example: ndarray): - frequency = 0.019 * random.random() + 0.001 - samples = arange(len(example)) - sinusoidal = self.parameter * sin(2 * pi * frequency * samples) - return example + sinusoidal - - -class SquarePulse(DatasetAugmentationTechnique): - """ - Adds square pulses to the signal with random frequency and amplitude `magnitude`. - Frequency is random between [0.001, 0.1]. - Common values for `magnitude` are between [0, 0.02]. - """ - - def __init__(self, magnitude): - super().__init__(magnitude) - - def _apply(self, example: ndarray): - frequency = 0.099 * random.random() + 0.001 - samples = arange(len(example)) - pulses = self.parameter * square(2 * pi * frequency * samples) - return example + pulses - - -class Randomness(DatasetAugmentationTechnique): - """ - Adds gaussian noise to the signal with amplitude `magnitude`. - Common values for `magnitude` are between [0, 0.02]. - """ - - def __init__(self, magnitude): - super().__init__(magnitude) - - def _apply(self, example: ndarray): - pulses = self.parameter * normal(0, 1, len(example)) - return example + pulses - - -""" -class Lowpass(DatasetAugmentationTechnique): - - def __init__(self, magnitude): - super().__init__(magnitude) - - def _apply(self, example: ndarray): - filter = FrequencyDomainFilter(FrequencyResponse.FIR, BandType.LOWPASS, self.parameter * 40, 20) - filter._visit() - pulses = self.parameter * normal(0, 1, len(example)) - return example + pulses -""" diff --git a/src/ltbio/ml/metrics.py b/src/ltbio/ml/metrics.py deleted file mode 100644 index 703a8182..00000000 --- a/src/ltbio/ml/metrics.py +++ /dev/null @@ -1,210 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: metrics -# Description: - -# Contributors: João Saraiva -# Created: 02/08/2022 - -# =================================== -from abc import ABC, abstractmethod - -import numpy as np - -from ltbio.biosignals.timeseries.Unit import Unit, Unitless, Decibels -from ltbio.ml.datasets.BiosignalDataset import BiosignalDataset - - -class Metric(ABC): - def __init__(self): - pass - - @property - @abstractmethod - def name(self) -> str: - pass - -class ValueMetric(Metric, ABC): - def __init__(self, value: float | int): - super().__init__() - self.__value = value - - @classmethod - def fromDatasetPredictions(cls, dataset:BiosignalDataset, predictions): - return cls(cls.compute_value(dataset, predictions)) - - @staticmethod - @abstractmethod - def compute_value(dataset:BiosignalDataset, predictions) -> float: - pass - - @property - def unit(self) -> Unit: - return Unitless() - - def __getitem__(self, item): - if isinstance(self.__value, dict): - return self.__value[item] - else: - raise TypeError("There are no multiple values in this metric.") - - def __float__(self): - if isinstance(self.__value, dict): - raise TypeError("This metric computed a value for each Timeseries. Index its name first.") - return float(self.__value) - - def __int__(self): - if isinstance(self.__value, dict): - raise TypeError("This metric computed a value for each Timeseries. Index its name first.") - return int(self.__value) - - def __str__(self): - if isinstance(self.__value, dict): - return self.name + ':\n\t' + '\n\t'.join([str(name) + ' = ' + str(value) + ' (' + str(self.unit) + ')' for name, value in self.__value.items()]) - else: - return self.name + ' = ' + str(self.__value) + ' (' + str(self.unit) +')' - - def __repr__(self): - return self.__str__() - -class PlotMetric(Metric, ABC): - def __init__(self, x, y): - super().__init__() - self.__x = x - self.__y = y - -class Sensitivity(ValueMetric): - """Sensitivity based on true and false positives and negatives.""" - - def __init__(self, value): - super().__init__(value) - - @property - def name(self): - return 'Sensitivity' - -class Specificity(ValueMetric): - """Specificity based on true and false positives and negatives.""" - - def __init__(self, value): - super().__init__(value) - - @property - def name(self): - return 'Specificity' - -class Precision(ValueMetric): - """Precision based on true and false positives and negatives.""" - - def __init__(self, value): - super().__init__(value) - - @property - def name(self): - return 'Precision' - -class Recall(ValueMetric): - """Recall based on true and false positives and negatives.""" - - def __init__(self, value): - super().__init__(value) - - @property - def name(self): - return 'Recall' - -class Accuracy(ValueMetric): - """Accuracy based on true and false positives and negatives.""" - def __init__(self, value): - super().__init__(value) - - @property - def name(self): - return 'Accuracy' - -class F1(ValueMetric): - """F1-score based on true and false positives and negatives.""" - def __init__(self, value): - super().__init__(value) - - @property - def name(self): - return 'F1-Score' - -class MSE(ValueMetric): - """Mean Squared Error.""" - @staticmethod - def compute_value(dataset, predictions): - average_mse = 0 - targets = dataset.all_targets - for target, prediction in zip(targets, predictions): - mse = (np.square(target - prediction)).mean(axis=1) - average_mse += mse - average_mse /= len(targets) - if np.shape(average_mse)[0] > 1: - return {ts_label: value for ts_label, value in zip(dataset.target_timeseries_names, tuple(average_mse))} - else: - return average_mse - - @property - def name(self): - return 'Mean Squared Error' - -class MAE(ValueMetric): - """Mean Absolute Error.""" - def __init__(self, value): - super().__init__(value) - - @property - def name(self): - return 'Mean Absolute Error' - -class SNR(ValueMetric): - """Signal-to-noise ratio.""" - def __init__(self, value): - super().__init__(value) - - @property - def name(self): - return 'Signal-to-noise ratio' - - @property - def unit(self) -> Unit: - return Decibels() - -class SNRI(ValueMetric): - """Signal-to-noise ratio improvement.""" - def __init__(self, value): - super().__init__(value) - - @property - def name(self): - return 'SNR Improvement' - - @property - def unit(self) -> Unit: - return Decibels() - - - - - - - - - - - - - - - - - - - diff --git a/src/ltbio/ml/supervised/SupervisedTrainConditions.py b/src/ltbio/ml/supervised/SupervisedTrainConditions.py deleted file mode 100644 index 6ba846e8..00000000 --- a/src/ltbio/ml/supervised/SupervisedTrainConditions.py +++ /dev/null @@ -1,266 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: SupervisedTrainConditions -# Description: Class SupervisedTrainConditions, that holds values of parameters to train a model in a specific manner. - -# Contributors: João Saraiva -# Created: 04/06/2022 -# Last Updated: 07/06/2022 - -# =================================== -from copy import deepcopy -from typing import Iterable, Collection - - -class SupervisedTrainConditions(): - def __init__(self, loss, - optimizer = None, - train_size:int = None, train_ratio:float = None, test_size:int = None, test_ratio:float = None, - validation_ratio:float = None, - epochs: int = None, learning_rate:float = None, batch_size:int = None, - shuffle:bool=False, epoch_shuffle:bool = False, - stop_at_deltaloss:float = None, patience:int = None, - **hyperparameters): - - # Mandatory conditions - - self.loss = loss - self.optimizer = optimizer - - # Versatile-mandatory conditions - - if train_size is not None: - if isinstance(train_size, int) and train_size >= 1: - self.train_size = train_size - else: - raise ValueError("Condition 'train_size' must be an integer >= 1.") - else: - self.train_size = None - - if test_size is not None: - if isinstance(test_size, int) and test_size >= 1: - self.test_size = test_size - else: - raise ValueError("Condition 'test_size' must be an integer >= 1.") - else: - self.test_size = None - - if train_ratio is not None: - if isinstance(train_ratio, float) and 0 < train_ratio < 1: - self.train_ratio = train_ratio - else: - raise ValueError("Condition 'train_ratio' must be between 0 and 1.") - else: - self.train_ratio = None - - if test_ratio is not None: - if isinstance(test_ratio, float) and 0 < test_ratio < 1: - self.test_ratio = test_ratio - else: - raise ValueError("Condition 'test_ratio' must be between 0 and 1.") - else: - self.test_ratio = None - - if train_size is None and test_size is None and train_ratio is None and test_ratio is None: - raise AssertionError("Specify at least 'train_size' or 'test_size' or 'train_ratio' or 'test_ratio'.") - - # Optional conditions - - if validation_ratio is not None: - if isinstance(validation_ratio, float) and 0 < validation_ratio < 1: - self.validation_ratio = validation_ratio - else: - raise ValueError("Condition 'validation_ratio' must be between 0 and 1.") - else: - self.validation_ratio = None - - if epochs is not None: - if isinstance(epochs, int) and epochs > 0: - self.epochs = epochs - else: - raise ValueError("Condition 'epochs' must be an integer >= 1.") - else: - self.epochs = None - - if batch_size is not None: - if isinstance(batch_size, int) and batch_size > 0: - self.batch_size = batch_size - else: - raise ValueError("Condition 'batch_size' must be an integer >= 1.") - else: - self.batch_size = None - - if learning_rate is not None: - if isinstance(learning_rate, float) and 0 < learning_rate < 1: - self.learning_rate = learning_rate - else: - raise ValueError("Condition 'learning_rate' must be between 0 and 1.") - else: - self.learning_rate = None - - if shuffle is not None: - if isinstance(shuffle, bool): - self.shuffle = shuffle - else: - raise TypeError("Condition 'shuffle' must be True or False.") - else: - self.shuffle = None - - if epoch_shuffle is not None: - if isinstance(epoch_shuffle, bool): - self.epoch_shuffle = epoch_shuffle - else: - raise TypeError("Condition 'epoch_shuffle' must be True or False.") - else: - self.epoch_shuffle = None - - if stop_at_deltaloss is not None: - if isinstance(stop_at_deltaloss, float): - self.stop_at_deltaloss = stop_at_deltaloss - else: - raise TypeError("Condition 'stop_at_deltaloss' must be a float.") - else: - self.stop_at_deltaloss = None - - if patience is not None: - if isinstance(patience, int) and patience > 0: - self.patience = patience - else: - raise TypeError("Condition 'patience' must be an integer > 0.") - else: - self.patience = None - - self.hyperparameters = hyperparameters - - @property - def _slots(self): - return { - 'optimizer': self.optimizer, - 'loss': self.loss, - 'train_size': self.train_size, - 'test_size': self.test_size, - 'train_ratio': self.train_ratio, - 'test_ratio': self.test_ratio, - 'validation_ratio': self.validation_ratio, - 'epochs': self.epochs, - 'batch_size': self.batch_size, - 'shuffle': self.shuffle, - 'epoch_shuffle': self.epoch_shuffle, - 'learning_rate': self.learning_rate, - 'stop_at_deltaloss': self.stop_at_deltaloss, - 'patience': self.patience, - } - - @staticmethod - def differences_between(sets_of_conditions: Collection) -> tuple[dict, ...]: - if not isinstance(sets_of_conditions, (list, tuple, set)) or not all(isinstance(_set, SupervisedTrainConditions) for _set in sets_of_conditions): - raise TypeError("Sets of conditions must be a collection of SupervisedTrainConditions") - - slot_keys = sets_of_conditions[0]._slots.keys() - differences = [{} for i in range(len(sets_of_conditions))] - - for key in slot_keys: - all_values = [_set._slots[key] for _set in sets_of_conditions] - for value in all_values: - if (key == 'optimizer' or key == 'loss') and not isinstance(value, str): - if hasattr(all_values[0], '__getstate__') and hasattr(value, '__getstate__'): - if all_values[0].__getstate__() != value.__getstate__(): - for i, x in enumerate(all_values): - differences[i][key] = x - else: - if all_values[0].__repr__() != value.__repr__(): - for i, x in enumerate(all_values): - differences[i][key] = x - else: - if value != all_values[0]: - for i, x in enumerate(all_values): - differences[i][key] = x - - # Find differences in hyperparameters - all_hyperparameters = [_set.hyperparameters for _set in sets_of_conditions] - if len(all_hyperparameters) > 1: - different_keys_found = [] - for i in range(len(all_hyperparameters)): - x = all_hyperparameters[i] - for j in range(i+1, len(all_hyperparameters)): - y = all_hyperparameters[j] - diff = set(x.items()) - set(y.items()) - if len(diff) > 0: - for d in diff: - different_keys_found.append(d[0]) - for key in different_keys_found: - for i, x in enumerate(all_hyperparameters): - differences[i][key] = x[key] if key in x else None - - return tuple(differences) - - def check_it_has(self, attributes:Iterable[str]): - slots = self._slots - for a in attributes: - if slots[a] == None: - raise ValueError(f"This type of model requires '{a}' to be defined in the given conditions.") - - def __str__(self): - res = f'Optimizer: {self.optimizer} | Loss Function: {self.loss}\n' - - if self.train_size is not None and self.test_size is not None: - res += f'Train Size = {self.train_size} | Test Size = {self.test_size}' - elif self.train_size is not None: - res += f'Train Size = {self.train_size}' - elif self.test_size is not None: - res += f'Test Size = {self.test_size}' - - if self.train_ratio is not None and self.test_ratio is not None: - res += f'Train Ratio = {self.train_ratio} | Test Ratio = {self.test_ratio}' - elif self.train_ratio is not None: - res += f'Train Ratio = {self.train_ratio}' - elif self.test_ratio is not None: - res += f'Test Ratio = {self.test_ratio}' - - if self.validation_ratio is not None: - res += f' | Validation Ratio = {self.validation_ratio}' - - res += '\n' - - other_optionals = [] - if self.epochs is not None: - other_optionals.append(f'Epochs = {self.epochs}') - if self.batch_size is not None: - other_optionals.append(f'Batch size = {self.batch_size}') - if self.shuffle is not None: - other_optionals.append(f'Shuffle: {self.shuffle}') - if self.epoch_shuffle is not None: - other_optionals.append(f'Shuffle in-Epoch: {self.epoch_shuffle}') - if self.learning_rate is not None: - other_optionals.append(f'Learning Rate = {self.learning_rate}') - - res += ' | '.join(other_optionals) - - res += '\nHyperparameters:\n' - res += ' | '.join([key + ' = ' + value for key, value in self.hyperparameters.items()]) - - return res - - def __copy__(self): - return self.__class__(**deepcopy(self._slots), **deepcopy(self.hyperparameters)) - - def __eq__(self, other): - if isinstance(other, SupervisedTrainConditions): - x_slots, y_slots = self._slots, other._slots - - optimizer, loss = True, True - if not isinstance(x_slots['optimizer'], str): - optimizer = x_slots['optimizer'].__repr__() == y_slots['optimizer'].__repr__() - del x_slots['optimizer'], y_slots['optimizer'] - if not isinstance(x_slots['loss'], str): - loss = x_slots['loss'].__repr__() == y_slots['loss'].__repr__() - del x_slots['loss'], y_slots['loss'] - - primitive_values = all([x_slots[label] == y_slots[label] for label in x_slots.keys()]) - - return primitive_values and optimizer and loss diff --git a/src/ltbio/ml/supervised/SupervisingTrainer.py b/src/ltbio/ml/supervised/SupervisingTrainer.py deleted file mode 100644 index 43d7bab8..00000000 --- a/src/ltbio/ml/supervised/SupervisingTrainer.py +++ /dev/null @@ -1,117 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: SupervisingTrainer -# Description: Class SupervisingTrainer, a type of PipelineUnit that trains supervised machine learning models. - -# Contributors: João Saraiva -# Created: 04/06/2022 -# Last Updated: 07/08/2022 - -# =================================== - -from typing import Collection - -from ltbio.biosignals import Timeseries -from ltbio.ml.datasets import SegmentToSegmentDataset -from ltbio.ml.datasets.BiosignalDataset import BiosignalDataset -from ltbio.ml.supervised.models import SupervisedModel as _SupervisedModel -from ltbio.ml.supervised.SupervisedTrainConditions import SupervisedTrainConditions -from ltbio.ml.supervised.SupervisingTrainerReporter import SupervisingTrainerReporter -from ltbio.pipeline.PipelineUnit import SinglePipelineUnit - - -class SupervisingTrainer(SinglePipelineUnit): - PIPELINE_INPUT_LABELS = {'dataset': ('timeseries', 'target')} - PIPELINE_OUTPUT_LABELS = {'results': 'results'} - ART_PATH = 'resources/pipeline_media/ml.png' - - def __init__(self, model: _SupervisedModel.SupervisedModel, - train_conditions: Collection[SupervisedTrainConditions], - evaluation_metrics: Collection = None, - name: str = None, save_report_to: str = None): - - super().__init__(name) - - if not isinstance(model, _SupervisedModel.SupervisedModel): - raise TypeError("Parameter 'model' must be an instance of SupervisedModel.") - self.__model = model - - if len(train_conditions) == 0: - raise AttributeError("Give at least one SupervisedTrainConditions to 'train_conditions'.") - if not isinstance(train_conditions, (tuple, list, set)) or not all( - isinstance(x, SupervisedTrainConditions) for x in train_conditions): - raise TypeError("Parameter 'train_conditions' must be a collection of SupervisedTrainConditions objects.") - self.train_conditions = train_conditions - - self.evaluation_metrics = evaluation_metrics - self.save_report_to = save_report_to - - self.reporter = SupervisingTrainerReporter() - self.reporter.declare_model_description(self.__model, **self.__model.non_trainable_parameters) - - def apply(self, dataset: BiosignalDataset, test_dataset: BiosignalDataset = None): - - if not isinstance(dataset, BiosignalDataset): - raise TypeError(f"A BiosignalDataset is expected. Instead a {type(dataset)} was given.") - - # Infer what is different between all sets of the train conditions - differences_in_conditions = SupervisedTrainConditions.differences_between(self.train_conditions) - - for i, set_of_conditions in enumerate(self.train_conditions): - if test_dataset is None: - # Train subdatset size - if set_of_conditions.train_size != None: - train_subsize = set_of_conditions.train_size - elif set_of_conditions.train_ratio != None: - train_subsize = int(set_of_conditions.train_ratio * len(dataset)) - else: - train_subsize = None - # Test subdatset size - if set_of_conditions.test_size != None: - test_subsize = set_of_conditions.test_size - elif set_of_conditions.test_ratio != None: - test_subsize = int(set_of_conditions.test_ratio * len(dataset)) - else: - test_subsize = None - # By inference - if train_subsize is None: - train_subsize = len(dataset) - test_subsize - if test_subsize is None: - test_subsize = len(dataset) - train_subsize - # SupervisedTrainConditions garantees that at least one of these four conditions is defined to make these computations. - - # Prepare the train and test datasets - train_dataset, test_dataset = dataset.split(train_subsize, test_subsize, set_of_conditions.shuffle is True) - else: - train_dataset = dataset - - # Train the model - train_results = self.__model.train(train_dataset, set_of_conditions) - - # Test the model - test_results = self.__model.test(test_dataset, self.evaluation_metrics) - - # Name each test result with what version number and differences in train conditions. - test_results.name = f"[V{self.__model.current_version}: " + ', '.join([f'{key} = {value}' for key, value in differences_in_conditions[i].items()]) + ']' - - # Report results - self.reporter.declare_training_session(set_of_conditions, train_results, test_results) - - if self.save_report_to is not None: - self.reporter.output_report('Supervising Trainer Report', self.save_report_to) - - return self.__model.best_version_results - - def _transform_input(self, object:tuple[Timeseries], target:tuple[Timeseries]) -> BiosignalDataset: - if len(target) == 1 and target[0].is_contiguous: - # dataset = SegmentToValueDataset() - pass # TODO - else: - dataset = SegmentToSegmentDataset(object=object, target=target) - - return dataset diff --git a/src/ltbio/ml/supervised/SupervisingTrainerReporter.py b/src/ltbio/ml/supervised/SupervisingTrainerReporter.py deleted file mode 100644 index 401c1bf1..00000000 --- a/src/ltbio/ml/supervised/SupervisingTrainerReporter.py +++ /dev/null @@ -1,107 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: SupervisedTrainReport -# Description: Class SupervisedTrainReport, produces a PDF report for a SupervisingTrainer. - -# Contributors: João Saraiva -# Created: 06/05/2022 -# Last Updated: 06/08/2022 - -# =================================== -import os - -from matplotlib import pyplot as plt - -from ltbio.ml.metrics import ValueMetric -from ltbio.ml.supervised.models import SupervisedModel -from ltbio.ml.supervised import SupervisedTrainConditions -from ltbio.ml.supervised.results import PredictionResults -from ltbio.ml.supervised.results import SupervisedTrainResults -from ltbio.pipeline.reports import Reporter - - -class SupervisingTrainerReporter(Reporter): - - def __init__(self, writer=None): - super().__init__(writer) - self.model: SupervisedModel = None - self.model_descriptors: dict = {} - self.training_conditions: list[SupervisedTrainConditions] = [] - self.train_results: list[SupervisedTrainResults] = [] - self.test_results: list[PredictionResults] = [] - - def body(self): - # Model Description - self.begin_subsection('MODEL DESCRIPTION') - self.add_text_block('Name: {0}'.format(self.model.name)) - self.add_text_block('Design class: {0}'.format(type(self.model.design).__name__)) - self.add_text_block("\t".join(['{0}={1}'.format(label, self.model_descriptors[label]) for label in self.model_descriptors])) - - # Experiments - for i, (conditions, train_results, test_results) in enumerate(zip(self.training_conditions, self.train_results, self.test_results)): - self.begin_subsection("EXPERIMENT {}".format(str(i+1))) - # Conditions - self.add_text_block(str(conditions)) - # Avg. Losses - if train_results.train_losses is not None: - self.add_text_block("Train Loss: {:.5f}".format(train_results.train_losses[-1])) - if train_results.validation_losses is not None: - self.add_text_block("Validation Loss: {:.5f}".format(train_results.validation_losses[-1])) - if test_results.loss is not None: - self.add_text_block("Avg. Test Loss: {:.5f}".format(test_results.loss)) - # Losses plot - self.__plot_train_and_test_loss(train_results.train_losses, train_results.validation_losses, './losses.png') - self.add_image_fullwidth('./losses.png') - os.remove('./losses.png') - # Other metrics - grid_filepaths: list[str] = [] - for metric in test_results.metrics: - if isinstance(metric, ValueMetric): - self.add_text_block(str(metric)) - else: #elif isinstance(metric, PlotMetric): - grid_filepaths.append(metric.filepath) - self.add_image_grid(tuple(grid_filepaths)) - - def declare_model_description(self, model: SupervisedModel, **descriptors): - self.model = model - self.model_descriptors = descriptors - - def declare_training_session(self, train_conditions:SupervisedTrainConditions, train_results: SupervisedTrainResults, test_results: PredictionResults): - self.training_conditions.append(train_conditions) - self.train_results.append(train_results) - self.test_results.append(test_results) - - def print_loss_plot(self, image_path: str): - self.writer.__break_line() - self.writer.image(image_path, w=self.writer.FULL_PIC_WIDTH, h=self.writer.FULL_PIC_HEIGHT) - - def print_small_plots(self, image_paths: str): - """ - Prints a grid of n lines and 2 columns. - """ - self.writer.__break_line() - for i, image_path in enumerate(image_paths): - if i % 2 == 0: - self.writer.image(image_path, w=self.writer.SMALL_PIC_WIDTH, h=self.writer.SMALL_PIC_HEIGHT) - else: - self.writer.image(image_path, w=self.writer.SMALL_PIC_WIDTH, h=self.writer.SMALL_PIC_HEIGHT, - x=self.writer.x + self.writer.SMALL_PIC_WIDTH + self.writer.SMALL_PIC_SEP, y=self.writer.y - self.writer.SMALL_PIC_HEIGHT) - - def __plot_train_and_test_loss(self, train_losses: list[float], validation_losses: list[float], save_to: str): - fig = plt.figure(figsize=(10, 5)) - plt.subplot(1, 1, 1) - plt.title("Loss over the Epochs") - plt.plot(range(1, len(train_losses)+1), train_losses, "b-", label="Train Loss") - if validation_losses is not None: - plt.plot(range(1, len(validation_losses)+1), validation_losses, "r-", label="Train Loss") - plt.legend(loc="upper right") - plt.xlabel("Epochs") - plt.ylabel("Loss") - fig.tight_layout() - fig.savefig(save_to) - plt.close() diff --git a/src/ltbio/ml/supervised/__init__.py b/src/ltbio/ml/supervised/__init__.py deleted file mode 100644 index 6fea3400..00000000 --- a/src/ltbio/ml/supervised/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Quick shortcuts to classes -from ltbio.ml.supervised.SupervisedTrainConditions import SupervisedTrainConditions -from ltbio.ml.supervised.SupervisingTrainer import SupervisingTrainer diff --git a/src/ltbio/ml/supervised/models/SkLearnModel.py b/src/ltbio/ml/supervised/models/SkLearnModel.py deleted file mode 100644 index 635d77e1..00000000 --- a/src/ltbio/ml/supervised/models/SkLearnModel.py +++ /dev/null @@ -1,155 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: SkLearnModel -# Description: Class SkLearnModel, that encapsulates the API of SKLearn supervised models. - -# Contributors: João Saraiva and code from https://scikit-learn.org/ -# Created: 05/06/2022 -# Last Updated: 25/06/2022 - -# =================================== - -from warnings import warn - -from matplotlib import pyplot as plt -from numpy import arange, argsort, array -from sklearn.base import is_classifier, is_regressor - -import ltbio.ml.supervised.models.SupervisedModel as _SupervisedModel -from ltbio.ml.supervised.results import PredictionResults -from ltbio.ml.supervised.results import SupervisedTrainResults - - -class SkLearnModel(_SupervisedModel.SupervisedModel): - - def __init__(self, design, name: str = None): - # Check design - if not (is_classifier(design) or is_regressor(design)): - raise ValueError("The design given is not a valid SkLearn classifier or regressor.") - - super().__init__(design, name) - - def __set_parameter_from_condition(self, parameter_label:str, conditions_label:str, value): - if parameter_label in self.__required_parameters: - if value is not None: - self._SupervisedModel__design.set_params(**{parameter_label: value}) - else: - warn(f"Omitted train condition '{conditions_label}' = {self._SupervisedModel__design.get_params()[parameter_label]} being used.") - else: - if value is not None: - warn(f"Train condition '{conditions_label}' given is not required for this model. Ignoring it.") - else: - pass - - def train(self, dataset, conditions): - # Call super for version control - super().train(dataset, conditions) - - # Set whichever model hyperparameters were defined - self._SupervisedModel__design.set_params(**conditions.hyperparameters) - - # Map some train conditions to model parameters - self.__required_parameters = self._SupervisedModel__design.get_params().keys() - self.__set_parameter_from_condition('max_iter', 'epochs', conditions.epochs) - self.__set_parameter_from_condition('loss', 'loss', conditions.loss) - self.__set_parameter_from_condition('tol', 'stop_at_deltaloss', conditions.stop_at_deltaloss) - self.__set_parameter_from_condition('n_iter_no_change', 'patience', conditions.patience) - self.__set_parameter_from_condition('solver', 'optimizer', conditions.optimizer) - self.__set_parameter_from_condition('?', 'shuffle', conditions.shuffle) - self.__set_parameter_from_condition('shuffle', 'epoch_shuffle', conditions.epoch_shuffle) - self.__set_parameter_from_condition('batch_size', 'batch_size', conditions.batch_size) - self.__set_parameter_from_condition('learning_rate_init', 'learning_rate', conditions.learning_rate) - self.__set_parameter_from_condition('validation_fraction', 'validation_ratio', conditions.validation_ratio) - self.__set_parameter_from_condition('?', 'test_ratio', conditions.test_ratio) - self.__set_parameter_from_condition('?', 'train_ratio', conditions.train_ratio) - self.__set_parameter_from_condition('?', 'test_size', conditions.test_size) - self.__set_parameter_from_condition('?', 'train_size', conditions.train_size) - - # Fits the model - self._SupervisedModel__design.fit(dataset.all_objects, dataset.all_targets) - - # Update version - self._SupervisedModel__update_current_version_state(self)#, epoch_concluded=int(self._SupervisedModel__design.n_iter_)) - - # Create results object - return SupervisedTrainResults(self._SupervisedModel__design.loss_, None, None) - - def test(self, dataset, evaluation_metrics = None, version = None): - # Call super for version control - super().test(dataset, evaluation_metrics, version) - # Make predictions about the objects - predictions = self._SupervisedModel__design.predict(dataset.all_objects) - # Create results object - return PredictionResults(self._SupervisedModel__design.loss_, dataset, predictions, evaluation_metrics) - - @property - def trained_parameters(self): - try: - return self._SupervisedModel__design.coef_, self._SupervisedModel__design.intercepts_ - except: - raise ReferenceError("Unfortunately cannot find the trained parameters, but the design internal state is functional.") - - @property - def non_trainable_parameters(self): - return self._SupervisedModel__design.get_params() - - def _SupervisedModel__set_state(self, state): - self._SupervisedModel__design.__setstate__(state) - - def _SupervisedModel__get_state(self): - return self._SupervisedModel__design.__getstate__() - - - - def __plot_timeseries_importance(self, show:bool=True, save_to:str=None): - """ - All code was adapted from https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_regression.html#sphx-glr-auto-examples-ensemble-plot-gradient-boosting-regression-py - """ - # TImeseries importance - timeseries_labels = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j') - feature_importance = self._SupervisedModel__design.feature_importances_ - sorted_idx = argsort(feature_importance) - pos = arange(sorted_idx.shape[0]) + 0.5 - fig = plt.figure(figsize=(6, 6)) - plt.subplot(1, 1, 1) - plt.barh(pos, feature_importance[sorted_idx], align="center") - plt.yticks(pos, array(timeseries_labels)[sorted_idx]) - plt.title("Timeseries Importance (MDI)") - - fig.tight_layout() - if save_to is not None: - fig.savefig(save_to) - if show: - plt.show() - print("Timeseries Importance plot was shown.") - else: - plt.close() - - def __plot_timeseries_permutation_importance(self, show:bool=True, save_to:str=None): - from sklearn.inspection import permutation_importance - result = permutation_importance(self._SupervisedModel__design, self.__last_results.object, self.__last_results.target, - n_repeats=10, random_state=42, n_jobs=2) - sorted_idx = result.importances_mean.argsort() - timeseries_labels = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j') - fig = plt.figure(figsize=(6, 6)) - plt.subplot(1, 1, 1) - plt.boxplot( - result.importances[sorted_idx].T, - vert=False, - labels=array(timeseries_labels)[sorted_idx], - ) - plt.title("Timeseries Permutation Importance (test set)") - - fig.tight_layout() - if save_to is not None: - fig.savefig(save_to) - if show: - plt.show() - print("Timeseries Permutation Importance plot was shown.") - else: - plt.close() diff --git a/src/ltbio/ml/supervised/models/SupervisedModel.py b/src/ltbio/ml/supervised/models/SupervisedModel.py deleted file mode 100644 index b6c1f1bd..00000000 --- a/src/ltbio/ml/supervised/models/SupervisedModel.py +++ /dev/null @@ -1,153 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: SupervisedModel -# Description: Abstract Class SupervisedModel, representing a generic machine learning supervised model. - -# Contributors: João Saraiva -# Created: 31/05/2022 -# Last Updated: 07/08/2022 - -# =================================== -from _datetime import datetime -from abc import ABC, abstractmethod -from copy import copy -from inspect import isclass -from typing import Collection - -from ltbio.ml.datasets.BiosignalDataset import BiosignalDataset -from ltbio.ml.metrics import Metric -from ltbio.ml.supervised.results import PredictionResults -from ltbio.ml.supervised.results import SupervisedTrainResults -from ltbio.ml.supervised import SupervisedTrainConditions - - -class SupervisedModel(ABC): - """ - A generic machine learning supervised model. - """ - - class __Version: - def __init__(self, number, state=None, conditions=None): - self.number = number - self.created_on = datetime.now() - self.state = state - self.conditions = conditions - self.epoch = None - self.best_test_results = None - - def __init__(self, design, name:str=None): - self.__design = design - self.name = name - - self.__versions:list[SupervisedModel.__Version] = [] - self.__current_version = None - - self.verbose = True # by default - - # ==================================== - # Public API - - @property - def design(self): - return copy(self.__design) - - @property - def current_version(self) -> int: - if self.__current_version is not None: - return self.__current_version.number - else: - raise AttributeError("Model has never been trained.") - - @property - def versions(self) -> list[str]: - return [f'V{version.number} on {version.created_on}' for version in self.__versions] - - @property - def is_trained(self) -> bool: - return len(self.__versions) > 0 - - @property - @abstractmethod - def trained_parameters(self): - pass - - @property - @abstractmethod - def non_trainable_parameters(self): - pass - - @abstractmethod - def train(self, dataset:BiosignalDataset, conditions:SupervisedTrainConditions) -> SupervisedTrainResults: - # This is to be executed before the training session starts - self.__current_version = SupervisedModel.__Version(len(self.__versions) + 1, conditions=conditions.__copy__()) - self.__versions.append(self.__current_version) - - @abstractmethod - def test(self, dataset:BiosignalDataset, evaluation_metrics:Collection = None, version:int = None) -> PredictionResults: - # This is to be executed before the testing starts - if version is None: - if self.__current_version is None: - if len(self.__versions) == 0: - raise AssertionError("Model has never been trained.") - self.__set_to_version(self.__versions[-1]) - else: - pass # uses current version - else: - self.set_to_version(version) - - # Check types - for metric in evaluation_metrics: - if not isclass(metric) and metric.__base__ is not Metric: - raise TypeError("Give non instantiated evaluation metrics, i.e., types of Metric.") - - def set_to_version(self, version:int = None): - if version <= len(self.__versions): - self.__set_to_version(self.__versions[version - 1]) - else: - raise ValueError(f"There is no version number {version}. Check version numbers by accessing 'versions'.") - - @property - def best_version_results(self) -> PredictionResults: - if not self.is_trained: - raise AttributeError("Model was not trained yet, hence it has no results.") - if self.__versions[0].best_test_results is None: - raise AttributeError("Model was not tested yet, hence it has no test results.") - - best_results = self.__versions[0].best_test_results - - for version in self.__versions: - if version.best_test_results is not None and version.best_test_results.loss < best_results.loss: - best_results = version.best_test_results - - return best_results - - # ==================================== - # For Internal Usage - - def __set_to_version(self, version: __Version): - self.__set_state(version.state) - self.__current_version = version - - def __update_current_version_state(self, epoch_concluded:int = None): - self.__current_version.state = self.__get_state() - self.__current_version.epoch = epoch_concluded - - def __update_current_version_best_test_results(self, results: PredictionResults): - if self.__current_version.best_test_results is not None: - if results.loss < self.__current_version.best_test_results.loss: - self.__current_version.best_test_results = results - else: - self.__current_version.best_test_results = results - - @abstractmethod - def __set_state(self, state): - pass - - @abstractmethod - def __get_state(self): - pass diff --git a/src/ltbio/ml/supervised/models/TorchModel.py b/src/ltbio/ml/supervised/models/TorchModel.py deleted file mode 100644 index a193e6a4..00000000 --- a/src/ltbio/ml/supervised/models/TorchModel.py +++ /dev/null @@ -1,307 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: TorchModel -# Description: Class TorchModel, that encapsulates the API of PyTorch supervised models. - -# Contributors: João Saraiva and code from https://pytorch.org/tutorials/beginner/basics/optimization_tutorial -# Created: 24/07/2022 -# Last Updated: 07/08/2022 - -# =================================== -import gc -from pickle import dump - -import torch -import torchmetrics -from torch import float32 -from torch.nn.modules.loss import _Loss -from torch.optim.optimizer import Optimizer -from torch.utils.data.dataloader import DataLoader -from torchsummary import summary - -from ltbio.ml.datasets.BiosignalDataset import BiosignalDataset -from ltbio.ml.supervised.models.SupervisedModel import SupervisedModel -from ltbio.ml.supervised.results import PredictionResults -from ltbio.ml.supervised.results import SupervisedTrainResults - - -class TorchModel(SupervisedModel): - - DEVICE = torch.device('cpu') - - def __init__(self, design: torch.nn.Module, name: str = None): - if not isinstance(design, torch.nn.Module): - raise ValueError("The design given is not a valid PyTorch module. " - "Give a torch.nn.Module instance.") - - super().__init__(design, name) - - - # Check for CUDA (NVidea GPU) or MPS (Apple Sillicon) acceleration - try: - if torch.backends.mps.is_built(): - self.DEVICE = torch.device('mps') - self._SupervisedModel__design.to(self.DEVICE) - self._SupervisedModel__design.to(float32) - except: - pass - try: - if torch.cuda.is_available(): - self.DEVICE = torch.device('cuda') - self._SupervisedModel__design.to(self.DEVICE) - except: - pass - - def shapes_summary(self, dataset: BiosignalDataset): - example_shape = dataset[0][0].shape - self._SupervisedModel__design.to('cpu') - try: - summary(self._SupervisedModel__design, example_shape, device='cpu') - self._SupervisedModel__design.to(self.DEVICE) - finally: - self._SupervisedModel__design.to(self.DEVICE) - - def train(self, dataset, conditions, n_subprocesses: int = 0, track_memory: bool = False): - - def __train(dataloader) -> float: - size = len(dataloader.dataset) - num_batches = len(dataloader) - self._SupervisedModel__design.train() # Sets the module in training mode - sum_losses = 0. - for i, (batch_objects, batch_targets) in enumerate(dataloader): - #if track_memory: - # print_resident_set_size(f'before batch {i} processing') - #print('!!! batch_objects.shape =', batch_objects.shape) - #print('!!! batch_targets.shape =', batch_targets.shape) - conditions.optimizer.zero_grad() # Zero gradients for every batch - pred = self._SupervisedModel__design(batch_objects) # Make predictions for this batch - loss = conditions.loss(pred, batch_targets) # Compute loss - loss.backward() # Compute its gradients - conditions.optimizer.step() # Adjust learning weights - - if i % 10 == 0: - loss_value, current = loss.item(), i * len(batch_objects) - sum_losses += loss_value - if self.verbose: - print(f"loss: {loss_value:>7f} [{current:>5d}/{size:>5d}]") - - del batch_objects, batch_targets, loss, pred - gc.collect() - - #if self.verbose: - # print(f"Avg Train Loss: {sum_losses/(num_batches/10):>8f} \n") - - #if track_memory: - # print_resident_set_size('after epoch') - return loss_value # returns the last loss - - def __validate(dataloader: DataLoader) -> float: - size = len(dataloader.dataset) - num_batches = len(dataloader) - self._SupervisedModel__design.eval() # Sets the module in evaluation mode - loss_value, correct = 0., 0 - with torch.no_grad(): - for batch_objects, batch_targets in dataloader: - pred = self._SupervisedModel__design(batch_objects) - loss = conditions.loss(pred, batch_targets) - loss_value += loss.data.item() - correct += (pred.argmax(1) == batch_targets).type(torch.float).sum().item() - - del batch_objects, batch_targets, loss, pred - gc.collect() - - loss_value /= num_batches - correct /= size - - if self.verbose: - print(f"Avg Validation Loss: {loss_value:>8f} \n") - - return loss_value - - # Call super for version control - super().train(dataset, conditions) - - # Check it these optional conditions are defined - conditions.check_it_has(('optimizer', 'learning_rate', 'validation_ratio', 'batch_size', 'epochs')) - - # Check loss function - if not isinstance(conditions.loss, _Loss): - raise ValueError("The loss function given in 'conditions' is not a valid PyTorch loss function." - " Give an instance of one of the listed here: https://pytorch.org/docs/stable/nn.html#loss-functions") - - # Check optimizer algorithm - if not isinstance(conditions.optimizer, Optimizer): - raise ValueError("The optimizer algorithm given in 'conditions' is not a valid PyTorch optimizer." - " Give an instance of one of the listed here: https://pytorch.org/docs/stable/optim.html#algorithms") - - # Learning rate is a property of the optimizer - conditions.optimizer.lr = conditions.learning_rate - - # Divide dataset into 2 smaller train and validation datasets - validation_size = int(len(dataset) * conditions.validation_ratio) - train_size = len(dataset) - validation_size - train_dataset, validation_dataset = dataset.split(train_size, validation_size, conditions.shuffle is True) - - # Decide on shuffling between epochs - epoch_shuffle = False - if conditions.epoch_shuffle is True: # Shuffle in every epoch - epoch_shuffle = True - - # Create DataLoaders - train_dataloader = DataLoader(dataset=train_dataset, - batch_size=conditions.batch_size, shuffle=epoch_shuffle, - #pin_memory=True, #pin_memory_device=TorchModel.DEVICE.type, - num_workers=n_subprocesses, prefetch_factor=2, - drop_last=True) - - validation_dataloader = DataLoader(dataset=validation_dataset, - batch_size=conditions.batch_size, shuffle=epoch_shuffle, - #pin_memory=True, #pin_memory_device=TorchModel.DEVICE.type, - num_workers=n_subprocesses, prefetch_factor=2, - drop_last=True) - - scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(conditions.optimizer, mode='min', factor=0.1, patience=5) - - # Repeat the train-validate process for N epochs - train_losses, validation_losses = [], [] - try: - for t in range(conditions.epochs): - if self.verbose: - print(f"Epoch {t + 1}\n-------------------------------") - - # Train and validate - train_loss = __train(train_dataloader) - validation_loss = __validate(validation_dataloader) - scheduler.step(validation_loss) - train_losses.append(train_loss) - validation_losses.append(validation_loss) - - # Remember the smaller loss and save checkpoint - if t == 0: - best_loss = validation_loss # defines the first - count_loss_has_not_decreased = 0 - self._SupervisedModel__update_current_version_state(epoch_concluded=t + 1) - elif validation_loss < best_loss: - best_loss = validation_loss - self._SupervisedModel__update_current_version_state(epoch_concluded=t+1) - else: - count_loss_has_not_decreased +=1 - - if conditions.patience != None and count_loss_has_not_decreased == conditions.patience: - print(f'Early stopping at epoch {t}') - break - - print("Training finished") - - except KeyboardInterrupt: - print("Training Interrupted") - while True: - answer = input("Save Parameters? (y/n): ").lower() - if answer == 'y': - self._SupervisedModel__update_current_version_state(epoch_concluded=t+1) - print("Model and parameters saved.") - break - elif answer == 'n': - print("Session Terminated. Parameters not saved.") - break - else: - continue # asking - - # FIXME: This should be a PlotMetric (?) - """ - finally: - fig = plt.figure(figsize=(10, 5)) - plt.subplot(1, 1, 1) - plt.title("Loss over the Epochs") - plt.plot(range(1, len(train_losses) + 1), train_losses, "b-", label="Train Loss") - if validation_losses is not None: - plt.plot(range(1, len(validation_losses) + 1), validation_losses, "r-", label="Train Loss") - plt.legend(loc="upper right") - plt.xlabel("Epochs") - plt.ylabel("Loss") - fig.tight_layout() - plt.show() - plt.close() - """ - - return SupervisedTrainResults(train_losses, validation_losses) - - - def test(self, dataset, evaluation_metrics = (), version = None): - # Call super for version control - super().test(dataset, evaluation_metrics, version) - - # Get current conditions - conditions = self._SupervisedModel__current_version.conditions - - # Create dataset and dataloader - dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, - #pin_memory=True, - #pin_memory_device=TorchModel.DEVICE.type - ) - - f1 = torchmetrics.F1Score(average='weighted', num_classes=2) - # auc = torchmetrics.AUROC(average='weighted', num_classes=2) - - # Test by example - size = len(dataset) - num_batches = len(dataloader) - self._SupervisedModel__design.eval() # Sets the module in evaluation mode - test_loss = 0 - predictions = [] - with torch.no_grad(): - for batch_objects, batch_targets in dataloader: # for each batch - pred = self._SupervisedModel__design(batch_objects) - predictions.append(pred.cpu().detach().numpy().squeeze()) - test_loss += conditions.loss(pred, batch_targets).item() - # compute metrics - pred, batch_targets = pred.to('cpu'), batch_targets.to('cpu') - # FIXME: these shoud be ValueMetric(s) - #f1(pred, batch_targets) - #auc(pred, batch_targets) - - test_loss /= num_batches - - if self.verbose: - print(f"Test Error: Avg loss: {test_loss:>8f}") - #print(f"Test F1-Score: {f1.compute()}") - #print(f"Test AUC: {auc.compute()}") - - # FIXME: Remove these two lines below - #dataset.redimension_to(1) - #dataset.transfer_to_device('cpu') - - results = PredictionResults(test_loss, dataset, tuple(predictions), evaluation_metrics) - self._SupervisedModel__update_current_version_best_test_results(results) - return results - - @property - def trained_parameters(self): - if not self.is_trained: - raise ReferenceError("This model was not yet trained.") - return self._SupervisedModel__design.state_dict() - - @property - def non_trainable_parameters(self): - if not self.is_trained: - return {} - else: - return self._SupervisedModel__current_version.conditions.hyperparameters - - def _SupervisedModel__set_state(self, state): - self._SupervisedModel__design.load_state_dict(state) - - def _SupervisedModel__get_state(self): - return self._SupervisedModel__design.state_dict() - # Optimizer state_dict is inside conditions.optimizer, hence also saved in Version - - def save_design(self, path:str): - self._SupervisedModel__design.to('cpu') - with open(path, 'wb') as f: - dump(self._SupervisedModel__design, f) - self._SupervisedModel__design.to(self.DEVICE) diff --git a/src/ltbio/ml/supervised/models/__init__.py b/src/ltbio/ml/supervised/models/__init__.py deleted file mode 100644 index fe42a5ec..00000000 --- a/src/ltbio/ml/supervised/models/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Quick shortcuts to classes -from ltbio.ml.supervised.models.SkLearnModel import SkLearnModel -from ltbio.ml.supervised.models.TorchModel import TorchModel diff --git a/src/ltbio/ml/supervised/results.py b/src/ltbio/ml/supervised/results.py deleted file mode 100644 index 659784bf..00000000 --- a/src/ltbio/ml/supervised/results.py +++ /dev/null @@ -1,56 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: ml -# Module: results -# Description: - -# Contributors: João Saraiva -# Created: 08/08/2022 - -# =================================== -from typing import Collection - -from ltbio.ml.datasets.BiosignalDataset import BiosignalDataset -from ltbio.ml.metrics import Metric - - -class SupervisedTrainResults(): - """Stores the results of a training session of a supervised ML model.""" - - def __init__(self, train_losses:list, validation_losses:list): - self.train_losses = train_losses - self.validation_losses = validation_losses - - -class PredictionResults(): - """Stores the results of predictions made with of a supervised ML model.""" - - def __init__(self, loss: float, test_dataset: BiosignalDataset, predictions:tuple, - evaluation_metrics: Collection[Metric] = None, name: str = None): - self.loss = loss - self.test_dataset = test_dataset - self.predictions = predictions - self.metrics = [metric.fromDatasetPredictions(test_dataset, predictions) for metric in evaluation_metrics] - self.name = name - - def __str__(self): - res = f'{self.name}\n' - res += f'Loss = {self.loss}\n' - for metric in self.metrics: - res += str(metric) + '\n' - return res - - def __repr__(self): - return self.__str__() - - @property - def biosignals(self): - return self.test_dataset._get_output_biosignals(self.predictions) - - @property - def timeseries(self): - return self.test_dataset._get_output_timeseries(self.predictions) diff --git a/src/ltbio/pipeline/GoTo.py b/src/ltbio/pipeline/GoTo.py deleted file mode 100644 index 7c7bba03..00000000 --- a/src/ltbio/pipeline/GoTo.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: pipeline -# Module: GoTo -# Description: Class GoTo, a type of PipelineUnit that introduces flow control. - -# Contributors: João Saraiva -# Created: 11/06/2022 -# Last Updated: 07/07/2022 - -# =================================== - -from .PipelineUnit import SinglePipelineUnit - - -class GoTo(SinglePipelineUnit): - def __init__(self, name=None): - super().__init__(name) - - ART_PATH = 'resources/pipeline_media/goto.png' - - def apply(self, step_number:int): - pass # TODO diff --git a/src/ltbio/pipeline/Input.py b/src/ltbio/pipeline/Input.py deleted file mode 100644 index 7288b364..00000000 --- a/src/ltbio/pipeline/Input.py +++ /dev/null @@ -1,32 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: pipeline -# Module: Input -# Description: Class Input, a type of PipelineUnit that introduces new data to the flow. - -# Contributors: João Saraiva -# Created: 25/06/2022 -# Last Updated: 07/07/2022 - -# =================================== - -from ltbio.pipeline.PipelineUnit import SinglePipelineUnit - - -class Input(SinglePipelineUnit): - - PIPELINE_INPUT_LABELS = {} - PIPELINE_OUTPUT_LABELS = {'_': '_'} # the packet label is to be defined for each instance - ART_PATH = 'resources/pipeline_media/input.png' - - def __init__(self, label:str, data, name:str=None): - super().__init__(name) - self.PIPELINE_OUTPUT_LABELS['_'] = label - self.__data_to_add = data - - def apply(self): - return self.__data_to_add diff --git a/src/ltbio/pipeline/Packet.py b/src/ltbio/pipeline/Packet.py deleted file mode 100644 index 14e24572..00000000 --- a/src/ltbio/pipeline/Packet.py +++ /dev/null @@ -1,227 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: pipeline -# Module: Packet -# Description: Class Packet, that holds and transports any content between Pipeline Units. - -# Contributors: João Saraiva -# Created: 12/06/2022 -# Last Updated: 07/07/2022 - -# =================================== - -from inspect import stack -from typing import Collection, Dict - -from ltbio.biosignals import Timeseries - - -class Packet(): - - TIMESERIES_LABEL = 'timeseries' - - def __init__(self, **load): - self.__load = load - - if Packet.TIMESERIES_LABEL in self.__load: - assert ((isinstance(self.__load[Packet.TIMESERIES_LABEL], Timeseries)) or (isinstance(self.__load[Packet.TIMESERIES_LABEL], dict) and all(isinstance(x, Timeseries) for x in self.__load[Packet.TIMESERIES_LABEL].values())) or (isinstance(self.__load[Packet.TIMESERIES_LABEL], Collection) and all(isinstance(x, Timeseries) for x in self.__load[Packet.TIMESERIES_LABEL]))) - # if a collection of Timeseries is given and it is not in a dictionary format, then it will be converted to one: - if not isinstance(self.__load[Packet.TIMESERIES_LABEL], Timeseries) and isinstance(self.__load[Packet.TIMESERIES_LABEL], Collection) and not isinstance(self.__load[Packet.TIMESERIES_LABEL], dict): - self.__load[Packet.TIMESERIES_LABEL] = {str(i): ts for i, ts in enumerate(self.__load[Packet.TIMESERIES_LABEL])} - - self.__who_packed = stack()[1][3] # FIX ME: this gets the function name that called this one; we want the object pointer - - def __getitem__(self, item:str): - return self.__load[item] - - @property - def __timeseries(self): - return self.__load[Packet.TIMESERIES_LABEL] - - @property - def has_timeseries(self) -> bool: - """ - Be very careful when using this checkers. - Correct use case: To know if there's any Timeseries in the Packet. - """ - return Packet.TIMESERIES_LABEL in self.__load - - @property - def has_timeseries_collection(self) -> bool: - """ - Be very careful when using this checkers. - Correct use case: To know if the timeseries, if any, in the Packet were packed/delivered collectively. - This holds True even if the collection only has 1 element; it's still a collection. - Incorrect use case: To know if there's a plurality of Timeseries. Use 'has_multiple_timeseries' instead. - """ - return self.has_timeseries and isinstance(self.__timeseries, dict) - - @property - def has_multiple_timeseries(self) -> bool: - """ - Be very careful when using this checkers. - Correct use case: To know if the Packet contains 2 or more Timeseries - """ - return self.has_timeseries_collection and len(self.__timeseries) > 1 - - @property - def has_single_timeseries(self) -> bool: - """ - Be very careful when using this checkers. - Correct use case: To know if the Packet contains 1 and only 1 Timeseries - Incorrect use case: To know if 'timeseries' is not a collection. Instead, use `!has_timeseries_collection`. - """ - return self.has_timeseries and \ - ( - isinstance(self.__timeseries, Timeseries) # could be alone ... - or - (isinstance(self.__timeseries, dict) and len(self.__timeseries) == 1) # or be the only one in dict - ) - - @property - def timeseries(self) -> Timeseries | Dict[str, Timeseries]: - """ - Get (all) Timeseries as they were packed, either alone or in collection. - """ - if self.has_timeseries_collection: - return self.__timeseries - elif self.has_single_timeseries: - return self.__timeseries - else: - raise AttributeError("There are no Timeseries in this Packet.") - - @property - def contents(self) -> dict: - return {key:type(self.__load[key]) for key in self.__load.keys()} - - def __str__(self): - '''Allows to print a Packet''' - contents = self.contents - res = 'Packet contains {} contents:\n'.format(len(contents)) - for key in contents: - res += '- ' + key + ' (' + contents[key].__name__ + ')\n' - return res - - @property - def who_packed(self): - return self.__who_packed - - def __len__(self): - return len(self.__load) - - def __contains__(self, item): - return item in self.__load - - def _to_dict(self): - return self.__load.copy() - - def _ungroup_timeseries(self, packet_labels:tuple[str]) -> tuple[tuple[Timeseries]]: - res = [] - all_not_timeseries = [] - - for label in packet_labels: - if label != Packet.TIMESERIES_LABEL: - found = [] - # Search on tags - for ts in self.__timeseries.values(): - if label in ts.tags: - found.append(ts) - all_not_timeseries.append(ts) - # Seach on load - if label in self.__load: - if isinstance(self.__load[label], Timeseries): - found.append(self.__load[label]) - all_not_timeseries.append(self.__load[label]) - elif isinstance(self.__load[label], dict): - found += list(self.__load[label].values()) - all_not_timeseries += list(self.__load[label].values()) - - assert len(found) > 0 # otherwise there is no point in this - res.append(tuple(found)) - - else: - res.append(Packet.TIMESERIES_LABEL) - - for i in range(len(res)): - if res[i] == Packet.TIMESERIES_LABEL: - ts_to_include = [] - for ts in self.__timeseries.values(): - if ts not in all_not_timeseries: - ts_to_include.append(ts) - assert len(ts_to_include) > 0 # otherwise there is no point in this - res[i] = tuple(ts_to_include) - - return tuple(res) - - - - @staticmethod - def join_packets(**packets): - """ - Receives multiple packets keyed by the prefix for each, in case there are conflicts in labels. - Returns 1 Packet. - """ - - if len(packets) == 1: - raise AssertionError("Give multiple Packets to join. Only 1 given.") - - seen_labels = set() - conflicting_labels = set() - seen_ts_labels = set() - conflicting_ts_labels = set() - - # Check for conflicting labels - for packet in packets.values(): - for label in packet.contents.keys(): - if label in seen_labels: - conflicting_labels.add(label) # mark as 'conflicting', if not already - seen_labels.add(label) # mask as 'seen', if not already - # Also, - # Check inside 'timeseries', if it's a collection - if label is Packet.TIMESERIES_LABEL and packet.has_timeseries_collection: - for ts_label in packet.timeseries.keys(): - if ts_label in seen_ts_labels: - conflicting_ts_labels.add(ts_label) # mark as 'conflicting', if not already - seen_ts_labels.add(ts_label) # mask as 'seen', if not already - - # Prepare load containers - timeseries = {} - load = {} - - # Deal with conflicting labels - for prefix, packet in packets.items(): - for label in packet.contents.keys(): - - # Timeseries - if label == Packet.TIMESERIES_LABEL: - if label in conflicting_labels: # 'timeseries' is a conflicting label - if not packet.has_timeseries_collection: # if not a Collection - timeseries[prefix] = packet.timeseries # just use unit prefix - else: # if a collection - for ts_label, ts in packet.timeseries.items(): # for each Timeseries - if ts_label in conflicting_ts_labels: # if its label is conflicting - timeseries[prefix+':'+ts_label] = ts # use unit prefix and that label - else: # if not - timeseries[ts_label] = ts # pass the Timeseries as it is - else: # if 'timeseries' is not a conflicting label, the specific labels might be - if packet.has_timeseries_collection: # if there's a collection of Timeseries - for ts_label, ts in packet.timeseries.items(): # for each Timeseries - if ts_label in conflicting_ts_labels: # if its label is conflicting - timeseries[prefix + ':' + ts_label] = ts # use unit prefix and that label - else: # if not - timeseries[ts_label] = ts # pass the Timeseries as it is - # Others - else: - if label in conflicting_labels: - load[prefix+':'+label] = packet[label] - else: - load[label] = packet[label] - - # Add timeseries to load - load[Packet.TIMESERIES_LABEL] = timeseries - - return Packet(**load) diff --git a/src/ltbio/pipeline/Pipeline.py b/src/ltbio/pipeline/Pipeline.py deleted file mode 100644 index c8296d2d..00000000 --- a/src/ltbio/pipeline/Pipeline.py +++ /dev/null @@ -1,175 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: pipeline -# Module: Pipeline -# Description: Class Pipeline, representing a pipeline of steps to process Biosignals. - -# Contributors: João Saraiva -# Created: 11/06/2022 -# Last Updated: 07/07/2022 - -# =================================== - -from inspect import signature -from typing import List, Collection - -from ltbio.biosignals.modalities.Biosignal import Biosignal -from ltbio.pipeline.Input import Input -from ltbio.pipeline.Packet import Packet -from ltbio.pipeline.PipelineUnit import PipelineUnit, SinglePipelineUnit, PipelineUnitsUnion - - -class Pipeline(): - - # Attributes - __steps: List[PipelineUnit] - __current_step: int - __biosignals: Collection[Biosignal] - __current_packet: Packet - - def __init__(self, name:str=None): - self.name = name - self.__current_step = 0 - self.__steps = [] - - @property - def current_step(self) -> int: - if self.__current_step > 0: - return self.__current_step - else: - raise AttributeError('Pipeline has not started yet.') - - @property - def current_packet(self) -> Packet: - return self.__current_packet - - def __len__(self): - return len(self.__steps) - - def __repr__(self): - res = 'Pipeline' + (' ' + self.name if self.name is not None else '') - for i in range(len(self)): - res += f'\nStep {i+1}: ' + str(self.__steps[i]) - return res - - def add(self, unit:PipelineUnit): - #if len(self) > 0: - # self.__check_completeness(unit) - self.__steps.append(unit) - - def __rshift__(self, other): - ''' - Defines the >> operator, the fastest shortcut to create a Pipeline - ''' - if isinstance(other, PipelineUnit): # concatenate self.Pipeline + other.Unit = res.Pipeline - self.add(other) - return self - elif isinstance(other, Pipeline): # concatenate another self.Pipeline + other.Pipeline = res.Pipeline - pass - else: - raise TypeError(f'Cannot join a PipelineUnit with a {type(other)}.') - - def load(self, biosignals: Biosignal | Collection[Biosignal]): - if isinstance(biosignals, Biosignal): - self.__biosignals = (biosignals, ) - else: - self.__biosignals = biosignals - - def next(self): - if self.__current_step == 0: # if starting - self.__create_first_packet() - - # Do next step - self.__current_packet = self.__steps[self.__current_step]._apply(self.__current_packet) - self.__current_step += 1 - - return self.__current_packet - - def applyAll(self, biosignals: Biosignal | Collection[Biosignal]): - self.load(biosignals) - N_STEPS = len(self) - while self.__current_step < N_STEPS: - self.next() - return self.__unpack_last_packet() - - def __call__(self, *biosignals: Biosignal): - res = [] - for b in biosignals: - new_channels = self.applyAll(b)['timeseries'] - res.append(b._new(new_channels)) - return tuple(res) if len(res) > 1 else res[0] - - def __create_first_packet(self): - assert self.__biosignals is not None # Check if Biosignals were loaded - all_timeseries = {} - for biosignal in self.__biosignals: - for channel_name, channel in biosignal: - if channel_name in all_timeseries.keys(): # Ensure there are no repeated keys - channel_name = biosignal.name + ' : ' + channel_name - if channel_name in all_timeseries.keys(): - raise NameError("Cannot give Biosignals with the same name and with the same channel names. Suggestion: Change the name of Biosignals to unique names.") - all_timeseries[channel_name] = channel - - self.__current_packet = Packet(timeseries=all_timeseries) - - def __unpack_last_packet(self) -> Biosignal | Collection[Biosignal]: - return self.__current_packet._to_dict() - - def __check_completeness(self, new_unit:PipelineUnit): - # Know what will be available up to this point - load_that_will_be_available = {} - for unit in self.__steps: - # Get output label and type - if isinstance(unit, SinglePipelineUnit): - output_label = tuple(unit.PIPELINE_OUTPUT_LABELS.values())[0] - output_type = signature(unit.apply).return_annotation - load_that_will_be_available[output_label] = output_type # If it's the case, it replaces type of same labels, as it should - elif isinstance(unit, PipelineUnitsUnion): - output_labels = tuple(unit.PIPELINE_OUTPUT_LABELS.values()) - - - # Know what the new unit needs - if isinstance(new_unit, SinglePipelineUnit): - new_unit_parameters = tuple(signature(new_unit.apply).parameters.values()) - elif isinstance(new_unit, PipelineUnitsUnion): - new_unit_parameters = new_unit.all_input_parameters - - # Check if it matches - for parameter in new_unit_parameters: - parameter_name = parameter.name - parameter_type = parameter.annotation - input_label = new_unit.PIPELINE_INPUT_LABELS[parameter_name] # Map to the label in Packet - - if input_label in load_that_will_be_available: - if isinstance(new_unit, SinglePipelineUnit): # TODO: Currently, we're jumpting verification of Union input and output types - if isinstance(parameter_type, type(load_that_will_be_available[input_label])): - continue - else: - raise AssertionError('Input type, {}, of the new unit does not match the output type, {}, of the last unit.'.format( - parameter_type, load_that_will_be_available[input_label])) - else: - raise AssertionError('{} input label of the new unit does not match to any output label of the last unit.'.format( - input_label)) - - def plot_diagram(self, show:bool=True, save_to:str=None): - from diagrams import Diagram - from diagrams.custom import Custom - - with Diagram(name="Pipeline" + ((" " + self.name) if self.name is not None else ""), direction='LR', show=show, filename=save_to): - blocks = [] - input_unit = False - for unit in self.__steps: - blocks.append(Custom(str(unit), unit.ART_PATH)) - if len(blocks) > 1: - if isinstance(unit, Input): - input_unit = True - elif input_unit: - blocks[-3] >> blocks[-1] - blocks[-2] >> blocks[-1] - else: - blocks[-2] >> blocks[-1] - diff --git a/src/ltbio/pipeline/PipelineUnit.py b/src/ltbio/pipeline/PipelineUnit.py deleted file mode 100644 index 281c7c2c..00000000 --- a/src/ltbio/pipeline/PipelineUnit.py +++ /dev/null @@ -1,440 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: pipeline -# Module: PipelineUnit -# Description: Classes PipelineUnit, SinglePipelineUnit, PipelineUnitsUnion, ApplyTogether, and ApplySeparately. - -# Contributors: João Saraiva -# Created: 02/06/2022 -# Last Updated: 07/07/2022 - -# =================================== - -from abc import ABC, abstractmethod -from inspect import signature, Parameter -from typing import Collection, Dict, Iterable, Tuple - -import ltbio.biosignals as biosignals -from ltbio.pipeline.Packet import Packet - - -class PipelineUnit(ABC): - """ - Pipeline Units are the building blocks of Pipelines. - Following the Composite design pattern, a PipelineUnit is the abstract 'Component', so that Pipeline can deal with - SingleUnit and Union in the same way. - - Subclasses - ------------ - - SingleUnit: A single pipeline unit, that actually acts on Timeseries. It's the 'Leaf' in the design pattern. - - Union: A collection of single units where the Pipeline branches to each of them. It's the 'Composite' in the - design pattern. - - Abstract Method '_apply' - ------------ - Acts as the 'operation' method in the design pattern, and it's implemented in each subclass. - It receives a Packet with the necessary inputs to apply the unit and returns a Packet with the relevant outputs. - """ - - def __init__(self, name:str): - self.name = name - - @abstractmethod - def _apply(self, packet:Packet) -> Packet: - """ - Receives a Packet with the necessary inputs to apply the unit and returns a Packet with the relevant outputs. - Acts as the 'operation' method in the composite design pattern. - """ - pass - - def __rshift__(self, other): - ''' - Defines the >> operator, the fastest shortcut to create a Pipeline - ''' - from ltbio.pipeline.Pipeline import Pipeline - if isinstance(other, PipelineUnit): # concatenate self.Unit + other.Unit = res.Pipeline - res = Pipeline() - res.add(self) - res.add(other) - return res - elif isinstance(other, Pipeline): # concatenate another self.Unit + other.Pipeline = res.Pipeline - pass - else: - raise TypeError(f'Cannot join a PipelineUnit with a {type(other)}.') - - @staticmethod - def _unpack_separately(packet:Packet, unit) -> Tuple[Iterable[str], Iterable[Dict]]: - """ - Auxiliary class procedures. - Receives a Packet and returns a Tuple of Iterables, (x, y), where: - - y are dictionaries with the necessary inputs, each with one Timeseries. - - x are the original labels of each Timeseries in the receiving Packet. - """ - - # Get what this unit needs from the Packet - what_this_unit_needs = tuple(signature(unit.apply).parameters.values()) - - # Unpack from the Packet what is needed - common_input = {} - for parameter in what_this_unit_needs: - parameter_name = parameter.name - parameter_type = parameter.annotation - packet_label = unit.PIPELINE_INPUT_LABELS[parameter_name] # Map to the label in Packet - - if packet_label == Packet.TIMESERIES_LABEL : # Timeseries - separate_inputs = [] - original_ts_labels = [] - if packet.has_timeseries_collection: # Meaning there were discovered 1 or more Timeseries in a collection - for original_ts_label, ts in packet[packet_label].items(): - this_input = {label: content for label, content in common_input.items()} # Create copy of common content - if parameter_type is biosignals.Timeseries: # if apply only requires 1 Timeseries, rather than collection - this_input[parameter_name] = ts # Add the element of 1 Timeseries - else: - this_input[parameter_name] = {original_ts_label: ts} # Add only 1 the collection of 1 Timeseries - separate_inputs.append(this_input) # Save separate input - original_ts_labels.append(original_ts_label) - elif packet.has_single_timeseries: # Meaning just 1 Timeseries was found outside a collection - this_input = {label: content for label, content in common_input.items()} # Create copy of common content - this_input[parameter_name] = packet[packet_label] # Add the only Timeseries - separate_inputs.append(this_input) # Save separate input - else: - pass # There are no Timeseries - - return iter(original_ts_labels), iter(separate_inputs) - - else: # Others - common_input[parameter_name] = packet[packet_label] - return iter((packet_label, )), iter((common_input, )) - - @staticmethod - def _unpack_as_is(packet: Packet, unit) -> Dict: - """ - Auxiliary class procedures. - Receives a Packet and returns an input dictionaries with all necessary parameters - """ - - # Get what this unit needs from the Packet - what_this_unit_needs = tuple(signature(unit.apply).parameters.values()) - - # Unpack from the Packet what is needed - input = {} - for parameter in what_this_unit_needs: - parameter_name = parameter.name - parameter_type = parameter.annotation - packet_label = unit.PIPELINE_INPUT_LABELS[parameter_name] # Map to the label in Packet - - if isinstance(packet_label, tuple) and any(x == Packet.TIMESERIES_LABEL for x in packet_label): # Transformation needed - ungrouped = packet._ungroup_timeseries(packet_label) - content = unit._transform_input(*ungrouped) - else: - content = packet[packet_label] - - # Insert on input - if isinstance(content, dict) and parameter_type is biosignals.Timeseries: - assert len(content) == 1 - input[parameter_name] = tuple(content.values())[0] # arity match - elif not isinstance(content, dict) and packet_label == Packet.TIMESERIES_LABEL and parameter_type is not biosignals.Timeseries: - input[parameter_name] = {'_': content} # arity match - else: - input[parameter_name] = content # arity already matches - - return input - - @staticmethod - def _pack_as_is(previous_packet:Packet, current_output, unit) -> Packet: - """ - Receives the received Packet and the output dictionary of 'apply' and returns a new Packet with the union of all - contents. If some new content has the same label of a previous content, it will be replaced. - """ - load = previous_packet._to_dict() # start with the contents already in the previous packet - packet_label = tuple(unit.PIPELINE_OUTPUT_LABELS.values())[0] - load[packet_label] = current_output # replace or add - return Packet(**load) - - @staticmethod - def _pack_with_original_ts_labels(previous_packet:Packet, current_output:list, unit, original_ts_labels:list) -> Packet: - """ - Receives the received Packet, its original Timeseries labels, and the output dictionary of 'apply'. - It returns a new Packet with the union of all contents. - If some new content has the same label of a previous content, it will be replaced. - """ - load = previous_packet._to_dict() # start with the contents already in the previous packet - packet_label = tuple(unit.PIPELINE_OUTPUT_LABELS.values())[0] - - # Timeseries - timeseries = {} - if packet_label == Packet.TIMESERIES_LABEL: - for original_ts_label, ts in zip(original_ts_labels, current_output): - assert isinstance(ts, biosignals.Timeseries) # Assuming only 1 Timeseries was outputted in each application - timeseries[original_ts_label] = ts - load[Packet.TIMESERIES_LABEL] = timeseries - - # Others - else: - load[packet_label] = current_output # replace or add - - return Packet(**load) - - @staticmethod - def _pack_separate_outputs(previous_packet:Packet, separate_outputs:list, unit, original_ts_labels:list) -> Packet: - """ - Receives the received Packet, its original Timeseries labels, and a list of outputs, one per each time 'apply' was called. - It returns a new Packet with the union of all contents. - If some new content has the same label of a previous content, it will be replaced. - """ - load = previous_packet._to_dict() # start with the contents already in the previous packet - packet_label = tuple(unit.PIPELINE_OUTPUT_LABELS.values())[0] - - res = {} - for original_ts_label, output in zip(original_ts_labels, separate_outputs): - if isinstance(output, dict): - if len(separate_outputs) > 1: - for content_label, content in output.items(): - res[original_ts_label+':'+content_label] = content - else: # no need to associate to each original label, because there is just 1 output, it means there was just 1 input - for content_label, content in output.items(): - res[content_label] = content - else: - res[original_ts_label] = output - - load[packet_label] = res - - return Packet(**load) - - -class SinglePipelineUnit(PipelineUnit, ABC): - """ - A Single Pipeline Unit is any agent that can act (use, process or make changes) to a collection (usually of Timeseries). - Following the Command design pattern, a SingleUnit is the abstract 'Command', so that Pipeline can execute various - kinds of processing by calling the 'apply' method of each concrete unit. - - Subclasses - ------------ - E.g. Filter, Segmenter, FeatureExtractor, FeatureSelector, SupervisingTrainer, DecisionMaker - Any subclass that implements 'apply'. - - Abstract Method 'apply' - ------------ - Every subclass must define 'apply' and implement a concrete behaviour. - To map the parameters' names of 'apply' to the labels inside any arriving Packet, PIPELINE_INPUT_LABELS should be - defined. To map the outputs to the labels of the resulting Packet, PIPELINE_OUTPUT_LABELS should be defined. - - Labels - ------------ - PIPELINE_INPUT_LABELS - Maps every label of a needed input inside a Packet to the name of the corresponding 'apply' parameter. - PIPELINE_OUTPUT_LABELS - Maps every output name of 'apply' to a label to be saved inside a Packet. - """ - - # =============================================================== - # Subclass-specific -- Define: - - PIPELINE_INPUT_LABELS: Dict[str, str] # { apply parameter : packet label } - PIPELINE_OUTPUT_LABELS: Dict[str, str] # { apply output : packet label } - - def __init__(self, name:str=None): - super(SinglePipelineUnit, self).__init__(name) - - @abstractmethod - def apply(self, **kwargs): - pass - - # =============================================================== - # Framework below -- Do not alter: - - ART_PATH = 'resources/pipeline_media/nd.png' - - def __str__(self): - res = self.__class__.__name__ - res += ' ' + self.name if self.name is not None else '' - return res - - def _apply(self, packet:Packet) -> Packet: - if self.__requires_one_timeseries() and packet.has_timeseries_collection: - return ApplySeparately(self)._apply(packet) - else: - input = self.__unpack(packet) - output = self.__apply(input) - return self.__pack(packet, output) - - def __unpack(self, packet:Packet): - return PipelineUnit._unpack_as_is(packet, self) - - def __apply(self, input: Iterable): - return self.apply(**input) - - def __pack(self, previous_packet:Packet, current_output) -> Packet: - return PipelineUnit._pack_as_is(previous_packet, current_output, self) - - def __requires_one_timeseries(self) -> bool: - if Packet.TIMESERIES_LABEL in self.PIPELINE_INPUT_LABELS.values(): - what_this_unit_needs = tuple(signature(self.apply).parameters.values()) - for parameter in what_this_unit_needs: - if self.PIPELINE_INPUT_LABELS[parameter.name] == Packet.TIMESERIES_LABEL: - if parameter.annotation is biosignals.Timeseries: - return True - return False - -class PipelineUnitsUnion(PipelineUnit, ABC): - """ - A Union is a collection of single units where the Pipeline branches to each of them. - Following the Template Method design pattern, a Union is the abstract class, where '_apply' is the 'template' method. - - Subclasses - ------------ - - ApplyTogether: Runs all Timeseries together in a unique structure over each SingleUnit. - - ApplySeparately: Runs each Timeseries separately over each SingleUnit. - - Template Method '_apply' - ------------ - 1. Unpacks, 2. Delegates and 3. Packs. - Unpacking and packing is similar and independent of how application is delegated. - So, Step 2, '__delegate' should be defined in each subclass. - - Abstract Method '__delegate' - ------------ - This method should handle how each SingleUnit is applied to the Timeseries (when there are many) -- if together or - separately. - - Labels - ------------ - PIPELINE_INPUT_LABELS - Maps every label of a needed input inside a Packet to the parameter names of the corresponding 'apply' methods. - PIPELINE_OUTPUT_LABELS - Maps every output name of the 'apply' methods to a label to be saved inside a Packet. - """ - - PIPELINE_INPUT_LABELS: Dict[str, str] # { apply parameter : packet label } - PIPELINE_OUTPUT_LABELS: Dict[str, str] # { apply output : packet label } - - def __init__(self, units: SinglePipelineUnit | Collection[SinglePipelineUnit], name:str=None): - super(PipelineUnitsUnion, self).__init__(name) - - self.__units = [] - self.__current_unit = None - - if isinstance(units, SinglePipelineUnit): - self.__units.append(units) - self.PIPELINE_INPUT_LABELS = units.PIPELINE_INPUT_LABELS - self.PIPELINE_OUTPUT_LABELS = units.PIPELINE_OUTPUT_LABELS - elif isinstance(units, Collection) and not isinstance(units, dict): - self.PIPELINE_INPUT_LABELS = {} - self.PIPELINE_OUTPUT_LABELS = {} - for unit in units: - if isinstance(unit, SinglePipelineUnit): - if unit.name is not None: - self.__units.append(unit) - self.PIPELINE_INPUT_LABELS.update(unit.PIPELINE_INPUT_LABELS) - self.PIPELINE_OUTPUT_LABELS.update(unit.PIPELINE_OUTPUT_LABELS) - else: - raise AssertionError(f"Pipeline Unit of type {type(unit).__name__} must have a name if inside a Union, in order to resolve eventual conflicting labels.") - else: - raise TypeError(f"{unit.__class__} is not a unitary PipelineUnit.") - else: - raise TypeError(f"{units.__class__} is not one or multiple PipelineUnits.") - - @property - def current_unit(self): - return self.__current_unit - - @property - def all_input_parameters(self) -> Tuple[Parameter]: - res = [] # shouldn't this be a Set - for unit in self.__units: - res += list(signature(unit.apply).parameters.values()) - return tuple(res) - - def __str__(self): - return 'Union' + (': ' + self.name) if self.name is not None else '' - - def _apply(self, packet:Packet) -> Packet: - """ - Acts as the 'template' method in the template method design pattern. - """ - - # Assert that there is not a single Timeseries and a single unit - if len(self.__units) == 1 and packet.has_single_timeseries: - raise AssertionError(f"There's only 1 Timeseries arriving to Union {self.name} comprising only 1 PipelineUnit. There's no use case for this. Instead, try inserting the PipelineUnit directly to the Pipeline, without using Unions.") - - output_packets = [] - for unit in self.__units: - self.__current_unit = unit - input = self.__unpack(packet) - output = self.__delegate(input) - output_packets.append(self.__pack(packet, output)) - - return self.__return_packet(output_packets) - - @abstractmethod - def __unpack(self, packet: Packet): - pass - - @abstractmethod - def __delegate(self, input): - pass - - @abstractmethod - def __pack(self, previous_packet:Packet, current_output) -> Packet: - pass - - def __return_packet(self, output_packets:list) -> Packet: - if len(output_packets) == 1: - return output_packets[0] - else: - # There might exist some conflicts here, such as contents with the same label. - # To ensure resolution, units must have names, and previous labels will be prefixed by the unit name. - return Packet.join_packets(**{unit.name: packet for unit, packet in zip(self.__units, output_packets)}) - - -class ApplyTogether(PipelineUnitsUnion): - """ - An ApplyTogether is a collection of single units, to which each will be applied to all Timeseries at once. - Following the Template Method design pattern, this is a concrete class, where '__delegate' is implemented. - """ - - def __init__(self, units: SinglePipelineUnit | Collection[SinglePipelineUnit], name: str = None): - super(ApplyTogether, self).__init__(units, name) - - def _PipelineUnitsUnion__unpack(self, packet: Packet) -> dict: - unpacked = PipelineUnit._unpack_as_is(packet, self.current_unit) - return unpacked - - def _PipelineUnitsUnion__delegate(self, input: dict): - return self.current_unit.apply(**input) # Apply to all Timeseries together - - def _PipelineUnitsUnion__pack(self, previous_packet: Packet, current_output) -> Packet: - return PipelineUnit._pack_as_is(previous_packet, current_output, self.current_unit) - - -class ApplySeparately(PipelineUnitsUnion): - """ - An ApplySeparately is a collection of single units, to which each will be applied to one Timeseries at a time. - Following the Template Method design pattern, this is a concrete class, where '__delegate' is implemented. - """ - - def __init__(self, units: SinglePipelineUnit | Collection[SinglePipelineUnit], name: str = None): - super(ApplySeparately, self).__init__(units, name) - - def _PipelineUnitsUnion__unpack(self, packet: Packet) -> Iterable: - original_labels, separate_inputs = PipelineUnit._unpack_separately(packet, self.current_unit) - self.__original_ts_labels = original_labels - return separate_inputs - - def _PipelineUnitsUnion__delegate(self, separate_inputs: Iterable) -> list: - separate_outputs = [] - for input in separate_inputs: # If there was only 1 input (i.e. 1 Timeseries), this cycle runs only once, which is okay - output = self.current_unit.apply(**input) - separate_outputs.append(output) # Currently, Pipeline Units only output 1 object - - return separate_outputs - - def _PipelineUnitsUnion__pack(self, previous_packet: Packet, current_output) -> Packet: - if isinstance(current_output, dict) and Packet.TIMESERIES_LABEL in current_output and len(self.__original_ts_labels) == len(current_output[Packet.TIMESERIES_LABEL]): - return PipelineUnit._pack_with_original_ts_labels(previous_packet, current_output, self.current_unit, self.__original_ts_labels) - else: - return PipelineUnit._pack_separate_outputs(previous_packet, current_output, self.current_unit, self.__original_ts_labels) diff --git a/src/ltbio/pipeline/__init__.py b/src/ltbio/pipeline/__init__.py deleted file mode 100644 index a04ee240..00000000 --- a/src/ltbio/pipeline/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Quick shortcuts to classes -from ltbio.pipeline.GoTo import GoTo -from ltbio.pipeline.Input import Input -from ltbio.pipeline.Pipeline import Pipeline -from ltbio.pipeline.PipelineUnit import ApplySeparately, ApplyTogether diff --git a/src/ltbio/pipeline/reports.py b/src/ltbio/pipeline/reports.py deleted file mode 100644 index 502326fc..00000000 --- a/src/ltbio/pipeline/reports.py +++ /dev/null @@ -1,142 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: pipeline -# Module: reports -# Description: - -# Contributors: João Saraiva -# Created: 06/08/2022 - -# =================================== -from abc import ABC, abstractmethod -from datetime import datetime - -from fpdf.fpdf import FPDF - - -class PDFWriter(FPDF): - def __init__(self): - super().__init__() - - # Page dimensions - self.MARGINS = 18 - self.PAGE_WIDTH = 210 - self.MARGINS * 2 - self.PAGE_HEIGHT = 297 - - # Full width picture dimensions - self.FULL_PIC_WIDTH = self.PAGE_WIDTH - self.FULL_PIC_HEIGHT = 0 # zero means whatever the image height is - - # Small picture dimensions - self.SMALL_PIC_SEP = 6 - self.SMALL_PIC_WIDTH = self.PAGE_WIDTH / 2 - self.SMALL_PIC_SEP / 2 - self.SMALL_PIC_HEIGHT = self.SMALL_PIC_WIDTH - - self.set_margins(self.MARGINS, self.MARGINS, self.MARGINS) - - def header(self): - self.set_font('Arial', 'B', 16) - self.cell(123, 3, self.title, 0, 0, 'L') - self.set_font('Arial', '', 10) - # self.cell(25, 3) - current_date = datetime.now().strftime("%d-%m-%Y") - current_time = datetime.now().strftime("%H:%M:%S") - self.multi_cell(50, 5, 'Date: {0}\nHour: {1}\nEngineer: João Saraiva'.format(current_date, current_time), align='R') - - self.ln(10) - - def footer(self): - # Page numbers in the footer - self.set_y(-15) - self.set_font('Arial', 'I', 8) - self.set_text_color(128) - self.cell(0, 10, 'Page ' + str(self.page_no()), 0, 0, 'C') - - def __break_line(self): - self.ln(5) - - # Addition of cells on demand: - - def add_section_cell(self, name: str): - self.ln(15) - self.set_font('Arial', 'B', 12) - self.cell(self.PAGE_WIDTH, 5, name, 0, 0, 'C') - self.set_font('Arial', '', 10) - self.ln(12) - - def add_subsection_cell(self, text: str): - self.ln(12) - self.set_font('Arial', 'B', 10) - self.cell(self.PAGE_WIDTH, 5, text.upper(), 0, 0, 'L') - self.set_font('Arial', '', 10) - self.ln(7) - - def add_log_cell(self, text: str): - self.__break_line() - self.set_font('Arial', '', 10) - self.set_fill_color(247, 247, 247) - self.set_text_color(60, 60, 60) - self.cell(self.PAGE_WIDTH, 5, '{0} '.format(datetime.now().strftime("%H:%M:%S")) + text, 0, 0, 'L', fill=True) - self.set_text_color(0, 0, 0) - self.x = self.l_margin - - def add_text_cell(self, text: str): - self.multi_cell(self.PAGE_WIDTH, 5, str(text)) - - def add_image_fullwidth_cell(self, filepath: str): - self.__break_line() - self.image(filepath, w=self.FULL_PIC_WIDTH, h=self.FULL_PIC_HEIGHT) - - def add_image_grid_cell(self, filepaths: tuple[str]): - self.__break_line() - for i, image_path in enumerate(filepaths): - if i % 2 == 0: - self.image(image_path, w=self.SMALL_PIC_WIDTH, h=self.SMALL_PIC_HEIGHT) - else: - self.image(image_path, w=self.SMALL_PIC_WIDTH, h=self.SMALL_PIC_HEIGHT, - x=self.x + self.SMALL_PIC_WIDTH + self.SMALL_PIC_SEP, - y=self.y - self.SMALL_PIC_HEIGHT) - - -class Reporter(ABC): - - def __init__(self, writer: PDFWriter = None): - if writer is not None: - self.writer = writer - else: - self.writer = PDFWriter() - - @abstractmethod - def body(self): - pass - - def set_title(self, title: str): - self.writer.title = title - - def begin_section(self, name: str): - self.writer.add_section_cell(name) - - def begin_subsection(self, name: str): - self.writer.add_subsection_cell(name) - - def add_text_block(self, text: str): - self.writer.add_text_cell(text) - - def add_log_block(self, text: str): - self.writer.add_log_cell(text) - - def add_image_fullwidth(self, filepath: str): - self.writer.add_image_fullwidth_cell(filepath) - - def add_image_grid(self, filepaths: tuple[str]): - self.writer.add_image_grid_cell(filepaths) - - def output_report(self, title: str, filepath: str): - self.set_title(title) - self.writer.add_page() - self.body() # write body - self.writer.output(filepath) diff --git a/src/ltbio/processing/__init__.py b/src/ltbio/processing/__init__.py deleted file mode 100644 index 583b42d4..00000000 --- a/src/ltbio/processing/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Quick shortcuts to classes diff --git a/src/ltbio/processing/filters/Filter.py b/src/ltbio/processing/filters/Filter.py deleted file mode 100644 index 3fb8cb25..00000000 --- a/src/ltbio/processing/filters/Filter.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: processing -# Module: Filter -# Description: Abstract class Filter, representing a generic filter design and the methods to apply itself to samples. - -# Contributors: João Saraiva -# Created: 18/05/2022 -# Last Updated: 19/05/2022 - -# =================================== - -from abc import ABC, abstractmethod - -from numpy import array - -import ltbio.pipeline -from ltbio.biosignals import Timeseries - -class Filter(ltbio.pipeline.PipelineUnit.SinglePipelineUnit, ABC): - """ - It acts as the Visitor class in the Visitor Design Pattern. - """ - - PIPELINE_INPUT_LABELS = {'timeseries': 'timeseries'} - PIPELINE_OUTPUT_LABELS = {'timeseries': 'timeseries'} - ART_PATH = 'resources/pipeline_media/filter.png' - - def __init__(self, name: str = None): - super().__init__(name) - self.name = name - - @abstractmethod - def _setup(self, sampling_frequency: float): - """ - Implement this method to be called before visits. - Generally it gets some information from the sampling frequency of a Timeseries. - """ - pass - - @abstractmethod - def _visit(self, samples: array) -> array: - """ - Applies the Filter to a sequence of samples. - It acts as the visit method of the Visitor Design Pattern. - Implement its behavior in the Concrete Visitor classes. - """ - pass - - def apply(self, timeseries: Timeseries): - timeseries._accept_filtering(self) - return timeseries - - def __call__(self, *biosignals): - for b in biosignals: - b.filter(self) - return biosignals[0] if len(biosignals) == 1 else biosignals diff --git a/src/ltbio/processing/filters/FrequencyDomainFilter.py b/src/ltbio/processing/filters/FrequencyDomainFilter.py deleted file mode 100644 index 65c1cd38..00000000 --- a/src/ltbio/processing/filters/FrequencyDomainFilter.py +++ /dev/null @@ -1,131 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: processing -# Module: FrequencyDomainFilter -# Description: Class FrequencyDomainFilter, a type of Filter of the frequency-domain. -# Enumerations FrequencyResponse and BandType. - -# Contributors: João Saraiva -# Created: 17/05/2022 -# Last Updated: 25/05/2022 - -# =================================== - -from enum import unique, Enum -from typing import Tuple - -from biosppy.plotting import plot_filter -from biosppy.signals.tools import get_filter as get_coefficients, _filter_signal -from numpy import array - -from .Filter import Filter - - -@unique -class FrequencyResponse(str, Enum): - FIR = 'Finite Impulse Response (FIR)' - BUTTER = 'IIR Butterworth' - CHEBY1 = 'IIR Chebyshev 1' - CHEBY2 = 'IIR Chebyshev 2' - ELLIP = 'IIR Elliptic' - BESSEL = 'IIR Bessel' - - -@unique -class BandType(str, Enum): - LOWPASS = 'Low-pass' - HIGHPASS = 'High-pass' - BANDPASS = 'Band-pass' - BANDSTOP = 'Band-stop' - - -class FrequencyDomainFilter(Filter): - """ - Describes the design of a digital frequency-domain filter and holds the ability to apply that filter to any array of samples. - It acts as a concrete visitor in the Visitor Design Pattern. - - To instantiate, give: - - fresponse: The frequency response of the filter. Choose one from FrequencyResponse enumeration. - - band_type: Choose whether it should low, high, or band pass or reject a band of the samples' spectrum. Choose one from BandType enumeration. - - order: The order of the filter (in int). - - cutoff: The cutoff frequency at 3 dB (for lowpass and highpass) or a tuple of two cutoffs (for bandpass or bandstop) (in Hertz, float). - """ - - def __init__(self, fresponse: FrequencyResponse, band_type: BandType, cutoff: float | Tuple[float, float], - order: int, name:str=None, **options): - # These properties can be changed as pleased: - super().__init__(name=name) - self.fresponse = fresponse - self.band_type = band_type - self.order = order - self.cutoff = cutoff - self.options = options - # These are private properties: - self.__b, self.__a = None, None - - @property - def last_numerator_coefficients(self) -> array: - if self.__are_coefficients_computed(): - return self.__b - else: - raise AttributeError('The H function coefficients depend on the sampling frequency. This filter has not been applied to any Biosignal yet, hence the coeeficients were not computed yet.') - - @property - def last_denominator_coefficients(self) -> array: - if self.__are_coefficients_computed(): - return self.__a - else: - raise AttributeError('The H function coefficients depend on the sampling frequency. This filter has not been applied to any Biosignal yet, hence the coeeficients were not computed yet.') - - def _setup(self, sampling_frequency: float): - """ - Computes the coefficients of the H function. - They are stored as 'b' and 'a', respectively, the numerator and denominator coefficients. - - :param sampling_frequency: The sampling frequency of what should be filtered. - """ - - # Digital filter coefficients (from Biosppy) - self.__b, self.__a = get_coefficients(ftype=self.fresponse.name.lower() if self.fresponse != FrequencyResponse.FIR else self.fresponse.name, band=self.band_type.name.lower(), - order=self.order, - frequency=self.cutoff, sampling_rate=sampling_frequency, **self.options) - self.__sampling_frequency_of_coefficients = sampling_frequency - - def __are_coefficients_computed(self) -> bool: - """ - :return: True if coefficients have already been computed, and the Filter is ready to be applied. - """ - return self.__b is not None and self.__a is not None - - def _visit(self, samples: array) -> array: - """ - Applies the Filter to a sequence of samples. - It acts as the concrete visit method of the Visitor Design Pattern. - - :param samples: Sequence of samples to filter. - :return: The filtered sequence of samples. - """ - - x = _filter_signal(self.__b, self.__a, samples, check_phase=True)[0] - return x - - def plot_bode(self, show:bool=True, save_to:str=None): - if self.__are_coefficients_computed(): # Plot with frequencies in Hz - # figure = plot_bode_in_Hz(self.__b, self.__a, sampling_rate=self.__sampling_frequency_of_coefficients) FIXME: use this function to not recompute b and a again - # Temporary solution below: - sampling_frequency = self.__sampling_frequency_of_coefficients - else: # TODO: Plot with normalized frequencies - raise RuntimeError("Apply this filter to a Biosignal prior to trying to Bode plotting it. Plotting with normalized frequencies is not available yet.") - - plot_filter(ftype=self.fresponse.name.lower() if self.fresponse != FrequencyResponse.FIR else self.fresponse.name, - band=self.band_type.name.lower(), - order=self.order, - frequency=self.cutoff, - sampling_rate=sampling_frequency, - show=show, - path=save_to, - **self.options) diff --git a/src/ltbio/processing/filters/TimeDomainFilter.py b/src/ltbio/processing/filters/TimeDomainFilter.py deleted file mode 100644 index 4e9fd627..00000000 --- a/src/ltbio/processing/filters/TimeDomainFilter.py +++ /dev/null @@ -1,73 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: processing -# Module: TimeDomainFilter -# Description: Class TimeDomainFilter, a type of Filter of the time-domain. -# Enumeration ConvolutionOperation. - -# Contributors: João Saraiva -# Created: 19/05/2022 - -# =================================== - -from datetime import timedelta -from enum import unique, Enum - -from biosppy.signals.tools import smoother as apply_convolution -from numpy import array - -from .Filter import Filter - - -@unique -class ConvolutionOperation(str, Enum): - MEDIAN = 'Median' - HAMMING = 'Hamming' - HANN = 'Hann' - PARZEN = 'Parzen' - KAISER = 'Kaiser' - GAUSSIAN = 'Gaussian' - BOXZEN = 'Boxzen' - - -class TimeDomainFilter(Filter): - """ - Describes the design of a digital time-domain filter and holds the ability to apply that filter to any array of samples. - It acts as a concrete visitor in the Visitor Design Pattern. - - To instantiate, give: - - operation: The operation to apply to each window. Choose one from TimeOperation enumeration. - - window_length: The length of the window (in timedelta). - - overlap_window: The length of the overlap between window slides (in timedelta). Default: 0 seconds. - """ - - def __init__(self, operation: ConvolutionOperation, window_length: timedelta, - overlap_length: timedelta = timedelta(seconds=0), name: str = None, **options): - # These properties can be changed as pleased: - super().__init__(name=name) - self.operation = operation - self.window_length = window_length - self.overlap_length = overlap_length - self.options = options - - def _setup(self, sampling_frequency: float): - self.__window_length_in_samples = int(self.window_length.total_seconds() * sampling_frequency) - self.__overlap_length_in_samples = int(self.overlap_length.total_seconds() * sampling_frequency) - - if self.operation is ConvolutionOperation.MEDIAN and self.__window_length_in_samples % 2 == 0: # if even, in median - self.__window_length_in_samples += 1 # make it odd - - def _visit(self, samples: array) -> array: - """ - Applies the Filter to a sequence of samples. - It acts as the concrete visit method of the Visitor Design Pattern. - - :param samples: Sequence of samples to filter. - :return: The filtered sequence of samples. - """ - - return apply_convolution(samples, kernel=self.operation.name.lower(), size=self.__window_length_in_samples)[0] diff --git a/src/ltbio/processing/filters/__init__.py b/src/ltbio/processing/filters/__init__.py deleted file mode 100644 index f6fae9bd..00000000 --- a/src/ltbio/processing/filters/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Quick shortcuts to classes -from ltbio.processing.filters.FrequencyDomainFilter import FrequencyDomainFilter, FrequencyResponse, BandType -from ltbio.processing.filters.TimeDomainFilter import TimeDomainFilter, ConvolutionOperation diff --git a/src/ltbio/processing/formaters/Normalizer.py b/src/ltbio/processing/formaters/Normalizer.py deleted file mode 100644 index 0b2e9a19..00000000 --- a/src/ltbio/processing/formaters/Normalizer.py +++ /dev/null @@ -1,63 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: processing -# Module: Normalizer -# Description: - -# Contributors: João Saraiva -# Created: 26/07/2022 - -# =================================== -from numpy import ndarray, mean, std - -from ltbio.biosignals import Timeseries, Biosignal -from ltbio.pipeline.PipelineUnit import SinglePipelineUnit - -class Normalizer(SinglePipelineUnit): - """ - Pipeline Unit that normalizes Timeseries. - """ - - PIPELINE_INPUT_LABELS = {'timeseries': 'timeseries'} - PIPELINE_OUTPUT_LABELS = {'timeseries': 'timeseries'} - ART_PATH = 'resources/pipeline_media/segmenter.png' - - def __init__(self, method='mean', name: str = ''): - super().__init__(name) - if method != 'mean' and method != 'minmax': - raise ValueError("Normalizer 'method' should be either 'mean' (default) or 'minmax'.") - self.__method = method - - def apply(self, timeseries: Timeseries): - - if not isinstance(timeseries, Timeseries): - raise TypeError("Parameter 'timeseries' should be of type Timeseries.") - if len(timeseries) <= 0: - raise AssertionError("The given Timeseries has no samples. Give a non-empty Timeseries.") - - def __mean_normalization(samples: ndarray) -> ndarray: - samples -= mean(samples) - samples /= std(samples) - return samples - - def __min_max_normalization(samples: ndarray) -> ndarray: - return (samples - min(samples)) / (max(samples) - min(samples)) - - if self.__method == 'mean': - return timeseries._apply_operation_and_new(__mean_normalization) - else: - return timeseries._apply_operation_and_new(__min_max_normalization) - - def __call__(self, *biosignals): - res = [] - for b in biosignals: - if not isinstance(b, Biosignal): - raise TypeError(f"Parameter '{b}' should be of type Biosignal.") - new_channels = {name: self.apply(channel) for name, channel in b} - res.append(b._new(new_channels)) - - return tuple(res) if len(res) > 1 else res[0] diff --git a/src/ltbio/processing/formaters/Segmenter.py b/src/ltbio/processing/formaters/Segmenter.py deleted file mode 100644 index 05505112..00000000 --- a/src/ltbio/processing/formaters/Segmenter.py +++ /dev/null @@ -1,70 +0,0 @@ -# -*- encoding: utf-8 -*- - -# =================================== - -# IT - LongTermBiosignals - -# Package: processing -# Module: Segmenter -# Description: Class Segmenter, a type of PipelineUnit that segments Timeseries. - -# Contributors: João Saraiva -# Created: 01/06/2022 -# Last Updated: 22/07/2022 - -# =================================== - -from datetime import timedelta - -from biosppy.signals.tools import windower - -import ltbio.biosignals as _bio -from ltbio.pipeline.PipelineUnit import SinglePipelineUnit - - -class Segmenter(SinglePipelineUnit): - """ - This PipelineUnit can segment one Timeseries at a time. - """ - - PIPELINE_INPUT_LABELS = {'timeseries': 'timeseries'} - PIPELINE_OUTPUT_LABELS = {'timeseries': 'timeseries'} - ART_PATH = 'resources/pipeline_media/segmenter.png' - - def __init__(self, window_length: timedelta, overlap_length: timedelta = timedelta(seconds=0), name=None): - super().__init__(name) - self.window_length = window_length - self.overlap_length = overlap_length - - def apply(self, timeseries:_bio.Timeseries) -> _bio.Timeseries: - # Assert it only has one Segment or that all Segments are adjacent - - """ # FIXME: Uncomment this. - if len(timeseries.segments) > 0: - adjacent = True - for i in range(1, len(timeseries.segments)): - if not timeseries.segments[i-1].adjacent(timeseries.segments[i]): # assert they're adjacent - adjacent = False - break - - if not adjacent: - x = input(f"Segments of {timeseries.name} are not adjacent. Join them? (y/n) ").lower() - if x == 'y': - pass # go ahead - else: - raise AssertionError('Framework does not support segmenting non-adjacent segments, unless you want to join them. Try indexing the time period of interest first.') - """ - - new = timeseries._equally_segment_and_new(self.window_length, self.overlap_length) - new.name = timeseries.name + " segmented " + str(self.window_length) + " +/- " + str(self.overlap_length) - return new - - def __call__(self, *biosignals): - res = [] - for b in biosignals: - if not isinstance(b, _bio.Biosignal): - raise TypeError(f"Parameter '{b}' should be of type Biosignal.") - new_channels = {name: self.apply(channel) for name, channel in b} - res.append(b._new(new_channels)) - - return tuple(res) if len(res) > 1 else res[0] diff --git a/src/ltbio/processing/formaters/__init__.py b/src/ltbio/processing/formaters/__init__.py deleted file mode 100644 index 84220db3..00000000 --- a/src/ltbio/processing/formaters/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Quick shortcuts to classes -from ltbio.processing.formaters.Segmenter import Segmenter -from ltbio.processing.formaters.Normalizer import Normalizer diff --git a/src/ltbio/processing/noises/GaussianNoise.py b/src/ltbio/processing/noises/GaussianNoise.py deleted file mode 100644 index a3fb1279..00000000 --- a/src/ltbio/processing/noises/GaussianNoise.py +++ /dev/null @@ -1,34 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: processing -# Module: GaussianNoise -# Description: - -# Contributors: João Saraiva -# Created: 26/07/2022 - -# =================================== - -from datetime import timedelta -from math import ceil - -from numpy import ndarray -from numpy.random import normal - -from ltbio.processing.noises.Noise import Noise - - -class GaussianNoise(Noise): - - def __init__(self, mean:float, deviation:float, sampling_frequency: float, name: str = None): - super().__init__(sampling_frequency, name) - self.__mean = mean - self.__deviation = deviation - - def _Noise__generate_data(self, duration:timedelta) -> ndarray: - n_samples = int(duration.total_seconds() * self.sampling_frequency) - return normal(self.__mean, self.__deviation, n_samples) diff --git a/src/ltbio/processing/noises/Noise.py b/src/ltbio/processing/noises/Noise.py deleted file mode 100644 index 3df7e508..00000000 --- a/src/ltbio/processing/noises/Noise.py +++ /dev/null @@ -1,140 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: src/ltbio/processing/noises -# Module: Noise -# Description: - -# Contributors: João Saraiva -# Created: 26/07/2022 - -# =================================== -from abc import abstractmethod -from datetime import timedelta - -from matplotlib import pyplot as plt -from numpy import ndarray, array -from scipy.signal import resample - -import ltbio.biosignals.modalities as modalities -from ltbio.biosignals.timeseries.Frequency import Frequency - - -class Noise(): - - def __init__(self, sampling_frequency: float, name: str = None): - self.__sampling_frequency = sampling_frequency if isinstance(sampling_frequency, Frequency) else Frequency(sampling_frequency) - self.__name = name - self.__last_samples = None - - # =================================== - # Properties - - @property - def samples(self) -> ndarray: - """The last generated samples using indexing.""" - if self.__last_samples is not None: - return self.__last_samples - else: - raise AttributeError("Samples were not yet generated. Generate samples using indexing.") - - @property - def sampling_frequency(self) -> float: - """The frequency at which the samples were produced, in Hz.""" - return self.__sampling_frequency.value - - @property - def name(self): - """The name of the Timeseries, if any.""" - return self.__name if self.__name != None else "No Name" - - @name.setter - def name(self, name: str): - """Set or reset a name for the Timeseries.""" - self.__name = name - - # =================================== - # Built-ins - - def __getitem__(self, item) -> ndarray: - """ - Gets noisy samples for the amount of time specified. - If the noise is stochastic, it is not guaranteed the same data through calls. - """ - - if isinstance(item, int): # in minutes - self.__last_samples = self.__generate_data(timedelta(minutes=item)) - return self.__last_samples.copy() - - if isinstance(item, timedelta): - self.__last_samples = self.__generate_data(item) - return self.__last_samples.copy() - - raise IndexError( - "Index types not supported. Give a timedelta or an integer in minutes.") - - def __add__(self, other): - """The built-in sum operation (+) adds this noise, in an additive way, to a Biosignal.""" - if isinstance(other, modalities.Biosignal): - return modalities.Biosignal.withAdditiveNoise(original=other, noise=self) - - raise TypeError("Trying to add noise to an object of type {}. Expected type: Biosignal.".format(type(other))) - - # =================================== - # Methods - - def resample(self, frequency: float): - """Resamples the noisy data to the frequency specified.""" - if self.__last_samples is not None: - self.__last_samples = resample(self.__last_samples, num = int(frequency * len(self.__last_samples) / self.__sampling_frequency)) - self.__sampling_frequency = frequency if isinstance(frequency, Frequency) else Frequency(frequency) - - # =================================== - # INTERNAL USAGE - Generate data - - @abstractmethod - def __generate_data(self, duration:timedelta) -> ndarray: - """Generates an array of noisy samples for the amount of time specified.""" - pass - - # =================================== - # INTERNAL USAGE - Plots - - def plot(self, show:bool=True, save_to:str=None): - """ - Plots the last generated samples or a 1-minute example of the noise relative amplitude. - @param show: True if plot is to be immediately displayed; False otherwise. - @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. - """ - - if self.__last_samples is not None: - data = self.__last_samples - else: - data = self.__generate_data(timedelta(minutes=1)) # example - - fig = plt.figure() - ax = plt.subplot() - ax.title.set_size(8) - ax.margins(x=0) - ax.set_xlabel('Time (s)', fontsize=6, rotation=0, loc="right") - ax.set_ylabel('Relative Amplitude', fontsize=6, rotation=90, loc="top") - plt.xticks(fontsize=6) - plt.yticks(fontsize=6) - ax.grid() - - x, y = array(range(len(data)))/self.__sampling_frequency, data - plt.gca().plot(x, y, linewidth=0.5) - plt.tick_params(axis='x', direction='in') - - if self.__last_samples is not None: - fig.suptitle('Last used samples of Noise ' + self.name, fontsize=10) - else: - fig.suptitle('1-Minute Example of Noise ' + self.name, fontsize=10) - - fig.tight_layout() - if save_to is not None: - fig.savefig(save_to) - plt.show() if show else plt.close() diff --git a/src/ltbio/processing/noises/__init__.py b/src/ltbio/processing/noises/__init__.py deleted file mode 100644 index e69de29b..00000000 From ef87b4a85c4ce384e81d9a5f7490a4793d6d8d56 Mon Sep 17 00:00:00 2001 From: saraiva Date: Wed, 21 Jun 2023 19:58:17 +0100 Subject: [PATCH 25/47] Major refactor (1) --- docs/changelog/serialversions/Biosignal.md | 18 - .../serialversions/BiosignalSource.md | 16 - resources/config.ini | 2 - src/ltbio/__init__.py | 4 + src/ltbio/_core/__init__.py | 12 + src/ltbio/_core/exceptions.py | 109 + src/ltbio/_core/operations.py | 108 + src/ltbio/_core/serializations/edf.py | 237 ++ src/ltbio/_core/serializations/nparray.py | 130 ++ src/ltbio/_core/statistics.py | 14 + src/ltbio/biosignals/_Biosignal.py | 1616 +++++++++++++ src/ltbio/biosignals/_Biosignal.pyi | 248 ++ src/ltbio/biosignals/_BiosignalSource.py | 54 + src/ltbio/biosignals/_BiosignalSource.pyi | 68 + src/ltbio/biosignals/_Event.py | 139 ++ src/ltbio/biosignals/_Event.pyi | 55 + src/ltbio/biosignals/_Segment.py | 255 ++ src/ltbio/biosignals/_Segment.pyi | 104 + src/ltbio/biosignals/_Timeline.py | 459 ++++ src/ltbio/biosignals/_Timeline.pyi | 120 + src/ltbio/biosignals/_Timeseries.py | 810 +++++++ src/ltbio/biosignals/_Timeseries.pyi | 169 ++ src/ltbio/biosignals/__init__.py | 1611 +------------ src/ltbio/biosignals/__init__.pyi | 37 + src/ltbio/biosignals/derived.py | 96 - .../biosignals/derived_modalities/__init__.py | 11 + .../derived_modalities/__init__.pyi | 9 + .../biosignals/derived_modalities/motion.py | 28 + .../biosignals/derived_modalities/motion.pyi | 23 + .../biosignals/derived_modalities/pulse.py | 49 + .../biosignals/derived_modalities/pulse.pyi | 46 + src/ltbio/biosignals/modalities/_ACC.py | 18 + src/ltbio/biosignals/modalities/_ACC.pyi | 17 + .../{modalities.py => modalities/_ECG.py} | 218 +- src/ltbio/biosignals/modalities/_ECG.pyi | 51 + src/ltbio/biosignals/modalities/_EDA.py | 25 + src/ltbio/biosignals/modalities/_EDA.pyi | 17 + src/ltbio/biosignals/modalities/_EEG.py | 18 + src/ltbio/biosignals/modalities/_EEG.pyi | 17 + src/ltbio/biosignals/modalities/_EMG.py | 18 + src/ltbio/biosignals/modalities/_EMG.pyi | 17 + src/ltbio/biosignals/modalities/_PPG.py | 18 + src/ltbio/biosignals/modalities/_PPG.pyi | 17 + src/ltbio/biosignals/modalities/_RESP.py | 18 + src/ltbio/biosignals/modalities/_RESP.pyi | 17 + src/ltbio/biosignals/modalities/_TEMP.py | 18 + src/ltbio/biosignals/modalities/_TEMP.pyi | 17 + src/ltbio/biosignals/modalities/__init__.py | 20 + src/ltbio/biosignals/modalities/__init__.pyi | 22 + src/ltbio/biosignals/sources.py | 1311 ----------- src/ltbio/biosignals/sources/_BITalino.py | 272 +++ src/ltbio/biosignals/sources/_BITalino.pyi | 12 + src/ltbio/biosignals/sources/_E4.py | 193 ++ src/ltbio/biosignals/sources/_E4.pyi | 4 + src/ltbio/biosignals/sources/_HEM.py | 109 + src/ltbio/biosignals/sources/_HEM.pyi | 4 + src/ltbio/biosignals/sources/_HSM.py | 90 + src/ltbio/biosignals/sources/_HSM.pyi | 5 + src/ltbio/biosignals/sources/_MITDB.py | 134 ++ src/ltbio/biosignals/sources/_MITDB.pyi | 4 + src/ltbio/biosignals/sources/_Seer.py | 127 + src/ltbio/biosignals/sources/_Seer.pyi | 4 + src/ltbio/biosignals/sources/_Sense.py | 373 +++ src/ltbio/biosignals/sources/_Sense.pyi | 20 + src/ltbio/biosignals/sources/__init__.py | 22 + src/ltbio/biosignals/sources/__init__.pyi | 17 + src/ltbio/biosignals/statistics.py | 112 - src/ltbio/biosignals/timeseries.py | 2080 ----------------- src/ltbio/biosignals/units.py | 22 + src/ltbio/biosignals/units.pyi | 106 + src/ltbio/clinical/Patient.py | 15 + src/ltbio/clinical/conditions/Epilepsy.py | 2 +- src/ltbio/clinical/medications/Medication.py | 2 +- .../test_Biosignal.py | 17 +- tests/biosignals/Biosignal/test_builtins.py | 65 + .../Biosignal/test_get_properties.py | 90 + .../Biosignal/test_set_properties.py | 71 + tests/dependencies/__init__.py | 0 tests/dependencies/_import_from_source.py | 156 ++ tests/dependencies/test_import_from_source.py | 3 + tests/features/test_FeatureSelector.py | 2 +- tests/pipeline/test_Packet.py | 2 +- tests/pipeline/test_Pipeline.py | 4 +- tests/pipeline/test_PipelineUnitsUnion.py | 2 +- tests/pipeline/test_SinglePipelineUnit.py | 2 +- .../filters/test_FrequencyDomainFilter.py | 4 +- .../processing/formatters/test_Normalizer.py | 7 +- tests/processing/test_Segmenter.py | 6 +- tests/resources/biosignals.py | 127 + tests/resources/segments.py | 75 + tests/resources/timeseries.py | 105 + 91 files changed, 7534 insertions(+), 5464 deletions(-) create mode 100644 src/ltbio/_core/__init__.py create mode 100644 src/ltbio/_core/exceptions.py create mode 100644 src/ltbio/_core/operations.py create mode 100644 src/ltbio/_core/serializations/edf.py create mode 100644 src/ltbio/_core/serializations/nparray.py create mode 100644 src/ltbio/_core/statistics.py create mode 100644 src/ltbio/biosignals/_Biosignal.py create mode 100644 src/ltbio/biosignals/_Biosignal.pyi create mode 100644 src/ltbio/biosignals/_BiosignalSource.py create mode 100644 src/ltbio/biosignals/_BiosignalSource.pyi create mode 100644 src/ltbio/biosignals/_Event.py create mode 100644 src/ltbio/biosignals/_Event.pyi create mode 100644 src/ltbio/biosignals/_Segment.py create mode 100644 src/ltbio/biosignals/_Segment.pyi create mode 100644 src/ltbio/biosignals/_Timeline.py create mode 100644 src/ltbio/biosignals/_Timeline.pyi create mode 100644 src/ltbio/biosignals/_Timeseries.py create mode 100644 src/ltbio/biosignals/_Timeseries.pyi create mode 100644 src/ltbio/biosignals/__init__.pyi delete mode 100644 src/ltbio/biosignals/derived.py create mode 100644 src/ltbio/biosignals/derived_modalities/__init__.py create mode 100644 src/ltbio/biosignals/derived_modalities/__init__.pyi create mode 100644 src/ltbio/biosignals/derived_modalities/motion.py create mode 100644 src/ltbio/biosignals/derived_modalities/motion.pyi create mode 100644 src/ltbio/biosignals/derived_modalities/pulse.py create mode 100644 src/ltbio/biosignals/derived_modalities/pulse.pyi create mode 100644 src/ltbio/biosignals/modalities/_ACC.py create mode 100644 src/ltbio/biosignals/modalities/_ACC.pyi rename src/ltbio/biosignals/{modalities.py => modalities/_ECG.py} (74%) create mode 100644 src/ltbio/biosignals/modalities/_ECG.pyi create mode 100644 src/ltbio/biosignals/modalities/_EDA.py create mode 100644 src/ltbio/biosignals/modalities/_EDA.pyi create mode 100644 src/ltbio/biosignals/modalities/_EEG.py create mode 100644 src/ltbio/biosignals/modalities/_EEG.pyi create mode 100644 src/ltbio/biosignals/modalities/_EMG.py create mode 100644 src/ltbio/biosignals/modalities/_EMG.pyi create mode 100644 src/ltbio/biosignals/modalities/_PPG.py create mode 100644 src/ltbio/biosignals/modalities/_PPG.pyi create mode 100644 src/ltbio/biosignals/modalities/_RESP.py create mode 100644 src/ltbio/biosignals/modalities/_RESP.pyi create mode 100644 src/ltbio/biosignals/modalities/_TEMP.py create mode 100644 src/ltbio/biosignals/modalities/_TEMP.pyi create mode 100644 src/ltbio/biosignals/modalities/__init__.py create mode 100644 src/ltbio/biosignals/modalities/__init__.pyi delete mode 100644 src/ltbio/biosignals/sources.py create mode 100644 src/ltbio/biosignals/sources/_BITalino.py create mode 100644 src/ltbio/biosignals/sources/_BITalino.pyi create mode 100644 src/ltbio/biosignals/sources/_E4.py create mode 100644 src/ltbio/biosignals/sources/_E4.pyi create mode 100644 src/ltbio/biosignals/sources/_HEM.py create mode 100644 src/ltbio/biosignals/sources/_HEM.pyi create mode 100644 src/ltbio/biosignals/sources/_HSM.py create mode 100644 src/ltbio/biosignals/sources/_HSM.pyi create mode 100644 src/ltbio/biosignals/sources/_MITDB.py create mode 100644 src/ltbio/biosignals/sources/_MITDB.pyi create mode 100644 src/ltbio/biosignals/sources/_Seer.py create mode 100644 src/ltbio/biosignals/sources/_Seer.pyi create mode 100644 src/ltbio/biosignals/sources/_Sense.py create mode 100644 src/ltbio/biosignals/sources/_Sense.pyi create mode 100644 src/ltbio/biosignals/sources/__init__.py create mode 100644 src/ltbio/biosignals/sources/__init__.pyi delete mode 100644 src/ltbio/biosignals/statistics.py delete mode 100644 src/ltbio/biosignals/timeseries.py create mode 100644 src/ltbio/biosignals/units.pyi rename tests/biosignals/{modalities => Biosignal}/test_Biosignal.py (96%) create mode 100644 tests/biosignals/Biosignal/test_builtins.py create mode 100644 tests/biosignals/Biosignal/test_get_properties.py create mode 100644 tests/biosignals/Biosignal/test_set_properties.py create mode 100644 tests/dependencies/__init__.py create mode 100644 tests/dependencies/_import_from_source.py create mode 100644 tests/dependencies/test_import_from_source.py create mode 100644 tests/resources/biosignals.py create mode 100644 tests/resources/segments.py create mode 100644 tests/resources/timeseries.py diff --git a/docs/changelog/serialversions/Biosignal.md b/docs/changelog/serialversions/Biosignal.md index 531c0da0..e69de29b 100644 --- a/docs/changelog/serialversions/Biosignal.md +++ b/docs/changelog/serialversions/Biosignal.md @@ -1,18 +0,0 @@ -# Biosignal - -## Serial Version 1 - -_Date Created: 01-06-2022_ - -``` -(SERIALVERSION, name, source, patient, acquisition_location, associated_events, timeseries) -``` - -* `SERIALVERSION` equals 1. -* `name` is a `str` with the value of the biosignal's `__name` attribute. -* `source` is a `__BiosignalSource` class, or the state of a `__BiosignalSource` object, based on the value of the biosignal's `__source` attribute. -* `patient` is the state of the `Patient` referenced in the biosignal's `__patient` attribute. -* `acquisition_location` is a `BodyLocation` with the value of the biosignal's `__acquisition_location` attribute. -* `associated_events` is a tuple of the states of all `Event`s' referenced in the biosignal's `__associated_events` attribute. -* `timeseries` is a dictionary of the states of all `Timeseries`s' referenced in the biosignal's `__timeseries` attribute. - diff --git a/docs/changelog/serialversions/BiosignalSource.md b/docs/changelog/serialversions/BiosignalSource.md index 873c90e6..e69de29b 100644 --- a/docs/changelog/serialversions/BiosignalSource.md +++ b/docs/changelog/serialversions/BiosignalSource.md @@ -1,16 +0,0 @@ -# __BiosignalSource - -`__BiosignalSource` is usually not instantiated as an object, so there are no states to serialize. -However, there are some sources that are instantiated, e.g., `Sense`, `Bitalino`. In these cases, the following serial versions apply. - -## Serial Version 1 - -_Date Created: 01-06-2022_ - -``` -(SERIALVERSION, others) -``` - -* `SERIALVERSION` equals 1. -* `others` is a dictionary of properties an instantiated `__BiosignalSource` object may have. - diff --git a/resources/config.ini b/resources/config.ini index 30b291e4..e69de29b 100644 --- a/resources/config.ini +++ b/resources/config.ini @@ -1,2 +0,0 @@ -[DEFAULT] -Sense = /Users/saraiva/Desktop/LongTermBiosignals/resources/Sense_CSV_tests/sense_defaults.json \ No newline at end of file diff --git a/src/ltbio/__init__.py b/src/ltbio/__init__.py index e69de29b..2d744367 100644 --- a/src/ltbio/__init__.py +++ b/src/ltbio/__init__.py @@ -0,0 +1,4 @@ + + +__all__ = ["biosignals", "clinical"] + diff --git a/src/ltbio/_core/__init__.py b/src/ltbio/_core/__init__.py new file mode 100644 index 00000000..adf7f0db --- /dev/null +++ b/src/ltbio/_core/__init__.py @@ -0,0 +1,12 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals + +# Package: +# Module: +# Description: + +# Contributors: João Saraiva +# Created: +# Last Updated: +# =================================== diff --git a/src/ltbio/_core/exceptions.py b/src/ltbio/_core/exceptions.py new file mode 100644 index 00000000..b942a279 --- /dev/null +++ b/src/ltbio/_core/exceptions.py @@ -0,0 +1,109 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +from datetimerange import DateTimeRange + +#from ltbio.biosignals._Timeline import Timeline +#from ltbio.biosignals._Timeseries import Timeseries +#from ltbio.biosignals.units import Unit, Frequency +#from ltbio.clinical import BodyLocation + + +# Package: +# Module: +# Description: + +# Contributors: João Saraiva +# Created: +# Last Updated: +# =================================== + + +class IncompatibleTimeseriesError(Exception): + def __init__(self, why: str): + super().__init__(f"These Timeseries are incompatible because {why}") + + +class DifferentSamplingFrequenciesError(IncompatibleTimeseriesError): + def __init__(self, *frequencies): + super().__init__(f"these different sampling frequencies were found: {','.join(frequencies)}. " + f"Try to resample first.") + + +class DifferentUnitsError(IncompatibleTimeseriesError): + def __init__(self, *units): + super().__init__(f"these different units were found: {','.join(units)}. " + f"Try to convert first.") + + +class DifferentDomainsError(IncompatibleTimeseriesError): + def __init__(self, *timelines): + note = "they have different domains: " + note += '; '.join([f"({i+1}): {domain}" for i, domain in enumerate(timelines)]) + super().__init__(note) + + +class IncompatibleBiosignalsError(Exception): + def __init__(self, why: str): + super().__init__(f"These Biosignals are incompatible because {why}") + + +class DifferentPatientsError(IncompatibleTimeseriesError): + def __init__(self, first, second): + super().__init__(f"at least two different patients were found: {first} and {second}. " + f"Try to drop the patients first.") + +class IncompatibleSegmentsError(Exception): + def __init__(self, why: str): + super().__init__(f"These Segments are incompatible because {why}") + +class DifferentLengthsError(Exception): + def __init__(self, first: int, second: int): + super().__init__(f"the first has length {first} and the second has length {second}.") + + +class TimeError(Exception): + ... + + +class ChannelsWithDifferentStartTimepointsError(TimeError): + def __init__(self, first_name, first_start, second_name, second_start, additional: str = ''): + super().__init__(f"{first_name} starts at {first_start} and {second_name} starts at {second_start}. " + additional) + + +class OverlappingError(TimeError): + def __init__(self, what: str): + super().__init__(f"There is an overlap between {what}") + + +class TimeseriesOverlappingError(OverlappingError): + def __init__(self, first, second, *overlap: DateTimeRange): + super().__init__(f"Timeseries {first} and Timeseries {second}" + f" on {overlap}." if overlap else ".") + + +class OperationError(Exception): + ... + + +class UndoableOperationError(OperationError): + def __init__(self, operation, by_nature: bool): + note = f"Operation {operation} is undoable" + if by_nature: + note += " by nature, i.e. there is no mathematical way of reversing it or, at least, it's not implemented." + else: + note += ", most likely because this operation was what created this object." + super().__init__(note) + + +class BiosignalError(Exception): + ... + + +class ChannelNotFoundError(BiosignalError, IndexError, AttributeError): + def __init__(self, name): + super().__init__(f"There is no channel named '{name}'.") + + +class EventNotFoundError(BiosignalError, IndexError, AttributeError): + def __init__(self, name: str): + super().__init__(f"There is no event named '{name}'.") diff --git a/src/ltbio/_core/operations.py b/src/ltbio/_core/operations.py new file mode 100644 index 00000000..ea01ecff --- /dev/null +++ b/src/ltbio/_core/operations.py @@ -0,0 +1,108 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +from abc import ABC, abstractmethod +from datetime import datetime +from typing import Any, ClassVar + +from .exceptions import UndoableOperationError + + +# Package: +# Module: +# Description: + +# Contributors: João Saraiva +# Created: +# Last Updated: +# =================================== + + +class Operator(ABC): + NAME: str + DESCRIPTION: str + SHORT: str + + def __init__(self, **parameters): + ... + + def __call__(self, *args, **kwargs) -> Any: + result, _ = self._apply(*args, **kwargs) + return + + @abstractmethod + def _apply(self, *args, **kwargs) -> (Any, 'Operation'): + pass + + @abstractmethod + def _undo(self, *args, **kwargs): + raise NotImplementedError() + + @property + def is_reversible(self) -> bool: + # Check if "_undo" method is implemented, not if it is callable + try: + x = self._undo + return True + except NotImplementedError: + return False + + +class Operation: + def __init__(self, operator: Operator, when: datetime, allow_undo: bool, *objects, **kwargs): + self.__operator = operator + self.__when = when + self.__objects = objects + self.__kwargs = kwargs + self.__allow_undo = allow_undo + + # Getters + @property + def operator(self) -> Operator: + return self.__operator + + @property + def when(self) -> datetime: + return self.__when + + @property + def objects(self): + return self.__objects + + @property + def is_undoable(self) -> bool: + return self.__allow_undo and hasattr(self.__operator, "undo") + + def undo(self): + if not self.__allow_undo: + raise UndoableOperationError(self, by_nature=False) + if not hasattr(self, "undo"): + raise UndoableOperationError(self, by_nature=True) + return self.__operator.undo(*self.__objects, **self.__kwargs) + + def __str__(self): + return str(self.__operator) + + def __repr__(self): + return repr(self.__operator) + " performed in " + str(self.__when) + + +class ArithmeticOperator(Operator, ABC): ... + + +class BinaryOperator(Operator, ABC): ... + + +class UnaryOperator(Operator, ABC): ... + + +class Addition(ArithmeticOperator, BinaryOperator): + NAME = "Add" + DESCRIPTION = "Adds two Biosignals, Timeseries or Segments, sample by sample." + SHORT = "+" + + def _apply(self, first, second): + return first + second + + def _undo(self, first, second): + return first - second diff --git a/src/ltbio/_core/serializations/edf.py b/src/ltbio/_core/serializations/edf.py new file mode 100644 index 00000000..71d23af6 --- /dev/null +++ b/src/ltbio/_core/serializations/edf.py @@ -0,0 +1,237 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: core +# Module: serializations +# Description: Procedures to read and write to several file formats. +# +# Contributors: João Saraiva +# Created: +# Last Updated: +# =================================== +from datetime import datetime, timedelta +from typing import Sequence + +from dateutil.relativedelta import relativedelta +from pyedflib import EdfReader, EdfWriter, FILETYPE_EDFPLUS + +from ..exceptions import ChannelsWithDifferentStartTimepointsError +from ..serializations.nparray import from_array +from ltbio.biosignals._Event import Event +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals._Timeseries import Timeseries +from ltbio.clinical import BodyLocation +from ltbio.clinical.Patient import Sex, Patient + + +def save_to_edf(biosignal: Biosignal, filepath: str): + """ + Writes a Biosignal object to an EDF+ file. + Global start datetime = Start timepoint of all channels (must be the same!) + Channels Headers = Name, Units, Sampling Frequency, Max, Min. + Channels Data = As floats; interruptions and channels that start later filled with NaNs (just as to_array()). + Annotations = Events directly associated to the Biosignal and associated to the Patient (with second precision). + Patient-related data = Code, Name, Gender, approx. Birthdate, Additional notes. Although, the age at the time of recording is lost. + Equipment = BiosignalSource. + Recording Additional = Biosignal name and notes associated. + + What information is lost? + - Biosignal acquisition location. + - Timeseries internal names. + - Precision in each channel start timepoint and start and end of interruptions. + - All other data associated to the Patient, such as medical conditions, medications, and procedures. + - Processing history. + + :param biosignal: The Biosignal to be written. + :param filepath: The path to where to save the file. + :return: None + """ + + # Create a writer object + writer = EdfWriter(filepath, n_channels=biosignal.n_channels, file_type=FILETYPE_EDFPLUS) + + # Metadata about the Patient + writer.setPatientCode(biosignal.patient.code) + writer.setPatientName(biosignal.patient.name) + writer.setGender(1 if biosignal.patient.sex is Sex.M else 0) + writer.setBirthdate(datetime.now() - relativedelta(years=biosignal.patient.age)) + writer.setPatientAdditional(str(biosignal.patient.notes)) + + # Other metadata + writer.setEquipment(biosignal.source.__name__) + writer.setRecordingAdditional(str(biosignal.name) + " (saved with LTBio) | Notes: " + str(biosignal.notes)) + + # Global start timepoint + global_start = biosignal.start + writer.setStartdatetime(global_start) + + # Channels + channels_metadata = [] + channels_samples = [] + for channel_name, channel in biosignal: + if channel.start != global_start: + raise ChannelsWithDifferentStartTimepointsError("In EDF+, all channels must have the same start.") + channels_metadata.append( + { + 'label': channel_name, + 'dimension': str(channel.units) if channel.units is not None else '', + 'sample_rate': channel.sampling_frequency, + 'physical_max': channel.max() if channel.units is not None else '', + 'physical_min': channel.min() if channel.units is not None else '', + 'digital_max': channel.max() if not channel.units is not None else '', + 'digital_min': channel.min() if not channel.units is not None else '', + 'transducer': '', + 'prefilter': '' + } + ) + channels_samples = channel.to_array() # interruptions as NaNs + writer.setSignalHeaders(channels_metadata) + writer.writeSamples(channels_samples) + + # Make annotations from Events + for event in biosignal.events: + timepoint_to_mark = event.onset if event.has_onset else event.offset + annotation = { + 'onset_in_seconds': (timepoint_to_mark - global_start).total_seconds(), + 'duration_in_seconds': event.duration.total_seconds() if event.has_onset and event.has_offset else 0, + 'description': event.name + ' (offset)' if event.has_offset and not event.has_onset else '', + } + writer.writeAnnotation(**annotation) + + writer.close() + del writer + + +class LTBioEDFReader: + def __init__(self, filepath: str): + self.__handler = EdfReader(filepath) + + @property + def handler(self) -> EdfReader: + return self.__handler + + @property + def is_edf_plus(self) -> bool: + return self.__handler.filetype == FILETYPE_EDFPLUS + + @property + def was_saved_from_ltbio(self) -> bool: + return self.__handler.recording_additional.contains('(saved with LTBio)') + + def to_biosignal(self) -> Biosignal: + if not self.was_saved_from_ltbio: + raise RuntimeError("This file was not saved with LTBio. If this call was made from a personalized " + "BiosignalSource, you have to use the other methods individually to create a populate " + "a Biosignal object. 'to_biosignal' is an automatic reading method only available to " + "EDF+ files that were saved with LTBio previously in the past.") + + # Get name and notes + name, notes = self.__handler.recording_additional.split(' (saved with LTBio) ') + + # Create a Biosignal object + biosignal = Biosignal(timeseries=self.read_timeseries(), source=eval(self.read_equipment()), + patient=self.read_patient(), name=name) + + # Associate notes + biosignal.notes += eval(notes) + + # Associate Events + biosignal.associate(*self.read_events()) + + return biosignal + + def read_patient(self) -> Patient: + code = self.__handler.patientcode + if code == '': + code = Patient.generate_random_code() + name = self.__handler.patientname + sex = self.__handler.gender + try: + sex = Sex.M if sex == 1 else Sex.F if sex == 0 else None + except: + sex = None + notes = self.__handler.patient_additional + + patient = Patient(code, name, sex) + patient.add_note(notes) + return patient + + def read_equipment(self) -> str: + return self.__handler.equipment + + def read_timeseries(self) -> {str | BodyLocation: Timeseries}: + # Get global start + global_start = self.__handler.getStartdatetime() + + # Get Timeseries + timeseries = {} + for i in range(self.__handler.signals_in_file): + metadata = self.__handler.getSignalHeader(i) + channel_name = metadata['label'] + try: + channel_name = eval(channel_name) + except: + pass + sampling_frequency = metadata['sample_rate'] + units = metadata['dimension'] + try: + from ltbio.biosignals.units import Unit + units = eval(units) + finally: + if units == '': + units = None + + samples = self.__handler.readSignal(i, digital=units == '') + timeseries[channel_name] = from_array(samples, + start=global_start, + sampling_frequency=sampling_frequency, + units=units) + return timeseries + + def read_events(self) -> Sequence[Event]: + annotations = self.__handler.readAnnotations() + events = [] + for n in range(self.__handler.annotations_in_file): + onset = annotations[0][n] + duration = annotations[1][n] + name = annotations[2][n] + if name.endswith(' (offset)'): + name = name.replace(' (offset)', '') + event = Event(name=name, offset=onset) + else: + if duration == 0: + event = Event(name=name, onset=onset) + else: + event = Event(name=name, onset=onset, offset=onset + timedelta(seconds=duration)) + events.append(event) + return events + + def read_recording_additional_notes(self) -> str: + return self.__handler.recording_additional + + +def load_from_edf(filepath: str) -> Biosignal: + """ + Reads an EDF or EDF+ file into a Biosignal object. + """ + reader = LTBioEDFReader(filepath) + if reader.was_saved_from_ltbio: + return reader.to_biosignal() + else: + # Get name and notes + recording_additional = reader.read_recording_additional_notes() + name = recording_additional[:Biosignal.MAX_NAME_LENGTH] + notes = recording_additional[Biosignal.MAX_NAME_LENGTH + 1:] + + # Create a Biosignal object + biosignal = Biosignal(timeseries=reader.read_timeseries(), source=eval(reader.read_equipment()), + patient=reader.read_patient(), name=name) + + # Associate notes + biosignal.notes += notes + + # Associate Events + biosignal.associate(*reader.read_events()) + + return biosignal diff --git a/src/ltbio/_core/serializations/nparray.py b/src/ltbio/_core/serializations/nparray.py new file mode 100644 index 00000000..473bf053 --- /dev/null +++ b/src/ltbio/_core/serializations/nparray.py @@ -0,0 +1,130 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: +# Module: +# Description: +# +# Contributors: João Saraiva +# Created: +# Last Updated: +# =================================== +from datetime import datetime + +import numpy as np +from math import ceil +from multimethod import multimethod +from numpy import ndarray + +from ltbio.biosignals._Segment import Segment +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals._Timeseries import Timeseries + + +@multimethod +def to_array(biosignal: Biosignal) -> ndarray: + """ + Converts Biosignal to a numpy array. + The initial datetime is that of the earliest channel. The final datetime is that of the latest channel. + When a channel is not defined, the value is NaN (e.g. interruptions, beginings, ends). + If the channels are not sampled at the same frequency, the highest sampling frequency is used, and the channels with lower sampling + frequency are resampled. + :return: A 2D numpy array. + """ + + # Get the maximum sampling frequency of the Biosignal + max_sf = max(channel.sampling_frequency for _, channel in biosignal) + + # Get the arrays of all channels + channels_as_arrays = [] + for i, (_, channel) in enumerate(biosignal): + if channel.sampling_frequency != max_sf: # Resample the channel, if necessary + channel._resample(max_sf) + # Convert channel to array + channels_as_arrays.append(channel.to_array()) + + # Get the length of the samples axes + n_samples = ceil((biosignal.final_datetime - biosignal.initial_datetime).total_seconds() * max_sf) + + # Create the array full of NaNs + res = np.full((len(biosignal), n_samples), np.nan) + + # Fill the array + for i, ((_, channel), channel_as_array) in enumerate(zip(biosignal, channels_as_arrays)): + # Get the index of the first position of this channel in the array + initial_ix = round((channel.initial_datetime - biosignal.initial_datetime).total_seconds() * max_sf) + # Broadcat samples to the array + res[i, initial_ix: initial_ix + len(channel_as_array)] = channel_as_array + + return res + + +@multimethod +def to_array(timeseries: Timeseries) -> ndarray: + """ + Converts a Timeseries into a numpy array. + If the Timeseries is composed of multiple Segments, the interruptions are filled with NaNs. + :return: A 1D numpy array. + """ + res = np.array(timeseries.segments[0].samples) + for i in range(1, len(timeseries.segments)): + segment = timeseries.segments[i] + # get the time between the end of the current segment and the start of the next one + time_between_segments = timeseries.segments[i].initial_datetime - timeseries.segments[i - 1].final_datetime + # number of NaNs to fill the gap + n_nans = round(timeseries.sampling_frequency * time_between_segments.total_seconds()) + # fill the gap with NaNs + res = np.concatenate((res, [np.nan] * n_nans)) + # add the samples of the current segment + res = np.concatenate((res, segment.samples)) + return res + + +def _from_array_biosignal(array: ndarray, start: datetime, max_sampling_frequency: float, units=None) -> Biosignal: + """ + Creates a Biosignal from a 2D NumPy array, reversing the operation of to_array. + """ + timeseries = {f'ix{i}': _from_array_timeseries(channel, start, max_sampling_frequency, units) for i, channel in enumerate(array)} + return Biosignal(timeseries) + + +def _from_array_timeseries(array: ndarray, start: datetime, sampling_frequency: float, units=None) -> Timeseries: + """ + Creates a Timeseries from a 1D NumPy array, reversing the operation of to_array. + Example: [1 2 3 4 5 NaN NaN 6 7 8 9 NaN 10 11 12] -> will return a Timeseries with 3 segments: + - [1 2 3 4 5], + - [6 7 8 9], and + - [10 11 12]. + """ + + # Get the indices of the non-NaNs + non_nan_indices = np.where(~np.isnan(array))[0] + # Get the indices of the non-NaNs that are not followed by a NaN + non_nan_indices = non_nan_indices[np.where(np.diff(non_nan_indices) != 1)[0]] + # Make a list with (start, end) tuples of each segment + segment_indices = [(non_nan_indices[i], non_nan_indices[i + 1]) for i in range(len(non_nan_indices) - 1)] + + segments_by_time = {} + for start_ix, end_ix in segment_indices: + segments_by_time[start + (start_ix / sampling_frequency)] = Segment(array[start_ix: end_ix + 1]) + + return Timeseries(segments_by_time, sampling_frequency, units=units) + + +def from_array(array: ndarray, start: datetime, sampling_frequency: float, units=None) -> Biosignal | Timeseries: + """ + Creates a Biosignal (if 2D) or a Timeseries (if 1D) from a NumPy array. + Reverses the operation of to_array. + + :param array: The array to be converted. + :return: Biosignal or Timeseries + """ + + if len(array.shape) == 2: + return _from_array_biosignal(array, start, sampling_frequency, units) + elif len(array.shape) == 1: + return _from_array_timeseries(array, start, sampling_frequency, units) + else: + raise ValueError(f"Invalid array shape: {array.shape}") + diff --git a/src/ltbio/_core/statistics.py b/src/ltbio/_core/statistics.py new file mode 100644 index 00000000..ed9fa7d7 --- /dev/null +++ b/src/ltbio/_core/statistics.py @@ -0,0 +1,14 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals + +# Package: +# Module: +# Description: + +# Contributors: João Saraiva +# Created: +# Last Updated: +# =================================== + + diff --git a/src/ltbio/biosignals/_Biosignal.py b/src/ltbio/biosignals/_Biosignal.py new file mode 100644 index 00000000..7010ef74 --- /dev/null +++ b/src/ltbio/biosignals/_Biosignal.py @@ -0,0 +1,1616 @@ +# -- encoding: utf-8 -- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: init +# Description: Essential classes for .biosignals package: Biosignal, MultimodalBiosignal and Event + +# Contributors: João Saraiva +# Created: 07/03/2023 + +# =================================== + +import logging +from abc import ABC, ABCMeta +from copy import deepcopy +from dataclasses import dataclass +from datetime import datetime, timedelta +from inspect import isclass, signature +from logging import warning +from shutil import rmtree +from tempfile import mkdtemp +from typing import Dict, Tuple, Collection, Set, Callable, Type + +import matplotlib.pyplot as plt +import numpy as np +from datetimerange import DateTimeRange +from dateutil.parser import parse as to_datetime +from math import ceil +from multimethod import multimethod +from numpy import ndarray +from pandas import DataFrame + +from ._BiosignalSource import BiosignalSource as BS +from ._Event import Event +from ._Timeline import Timeline +from ._Timeseries import Timeseries +# from ..processing.noises.Noise import Noise # FIXME +from ._Timeseries import Timeseries as Noise +from .units import Unitless +from .._core.exceptions import EventNotFoundError, ChannelNotFoundError +# from ...processing.filters.Filter import Filter +from ..clinical.BodyLocation import BodyLocation +from ..clinical.Patient import Patient + + +# =================================== +# Base Class 'Biosignal' and 'MultimodalBiosignal' +# =================================== + +@dataclass +class Biosignal(ABC): + """ + A Biosignal is a set of channels (Timeseries) measuring a biological variable. + It has a start and end timepoints. + It may be associated with a source, a patient, and a body location. It can also have a name. + """ + + # =================================== + # INITIALIZERS + + def __check_and_set_attribute(self, attribute, value, type, error_label: str, optional: bool): + if optional: + if not isinstance(value, type | None): + raise TypeError(f"{error_label} must be a {type} or None.") + else: # mandatory + if value is None: + raise ValueError(f"{error_label} must not be None.") + if not isinstance(value, type): + raise TypeError(f"{error_label} must be a {type}.") + setattr(self, attribute, value) + + def __check_and_set_timeseries(self, timeseries, error_label: str): + # Check if all keys are strings or BodyLocation + for key in timeseries.keys(): + if not isinstance(key, str) and not isinstance(key, BodyLocation): + raise TypeError(f"All keys in {error_label} must be strings or BodyLocation.") + # Check if all values are Timeseries + for ts in timeseries.values(): + if not isinstance(ts, Timeseries): + raise TypeError(f"All values in {error_label} must be Timeseries.") + self.__timeseries = timeseries + + # A. Ad-hoc + @multimethod + def __init__(self, + timeseries: dict[str | BodyLocation, Timeseries], source: BS = None, patient: Patient = None, + acquisition_location: BodyLocation = None, name: str = None): + """ + Initializes a Biosignal from a dictionary of Timeseries. + Source is optional and won't influence the process. + """ + + # Set self.__timeseries + self.__check_and_set_timeseries(timeseries, "'timeseries'") + + # Check if Timeseries come with Events associated #FIXME + for ts in timeseries.values(): + for event in ts.events: + if event.name in self.__associated_events and self.__associated_events[event.name] != event: + raise AssertionError( + "There are different Events with the same name among the Timeseries given.") + else: + self.__associated_events[event.name] = event + + # Set other attributes + self.__check_and_set_attribute('__source', source, BS.__subclasses__(), "'source'", True) + self.__check_and_set_attribute('__patient', patient, Patient, "'patient'", True) + self.__check_and_set_attribute('__acquisition_location', acquisition_location, BodyLocation, + "'acquisition_location'", True) + self.__check_and_set_attribute('__name', name, str, "'name'", True) + + # B. From files + @multimethod + def __init__(self, path: str, source: BS = None, patient: Patient = None, acquisition_location: BodyLocation = None, + name: str = None): + """ + Initializes a Biosignal from files. + 'path' points to a directory organized in a way the given BiosignalSource understands and is capable of reading. + """ + + # Set source + self.__check_and_set_attribute('__source', source, BS.__subclasses__(), + "To read a Biosignal from files, the given 'source'", False) + + # BS can give the samples (required) and many other optional metadata. It's the BS that decides what it gives, + # depending on what it can get. Get all data that the source can read: + data = self.__source._read(path, type(self)) + + # Unwrap data: + # 'timeseries': dictionary of Timeseries (required) + # 'patient': Patient + # 'acquisition_location': BodyLocation + # 'name': string + # 'events': tuple of Events + # If user gives metadata, override what was read by the source. + self.__timeseries = data['timeseries'] + self.__check_and_set_attribute('__patient', data['patient'] if patient is None else patient, Patient, + "'patient''", True) + self.__check_and_set_attribute('__acquisition_location', data[ + 'acquisition_location'] if acquisition_location is None else acquisition_location, BodyLocation, + "'acquisition_location'", True) + self.__check_and_set_attribute('__name', data['name'] if name is None else name, str, "'name'", True) + if data['events'] is not None: + self.annotate(data['events']) + + # =================================== + # SPECIAL INITIALIZERS + @classmethod + def from_template(cls): + pass + + @classmethod + def with_additive_noise(cls, original, noise, name=None): + """ + Creates a new Biosignal from 'original' with added 'noise'. + + :param original: (Biosignal) The original Biosignal to be contaminated with noise. + :param noise: (Noise | Timeseries | Biosignal) The noise to add to the original Biosignal. + :param name: (str) The name to associate to the resulting Biosignal. + + When 'noise' is a Noise: + - A trench of noise, with the duration of the channel, will be generated to be added to each channel. + - 'noise' should be configured with the same sampling frequency has the channels. + + When 'noise' is a Biosignal: + When it has the same set of channels as 'original', sampled at the same frequency: + - Each noisy channel will be added to the corresponding channel of 'original', in a template-wise manner. + When it has a unique channel: + - That noisy channel will be added to every channel of 'original', in a template-wise manner. + - That noisy channel should have the same sampling frequency has every channel of 'original'. + - If 'noise' has multiple segments, they are concatenated to make a hyper-template. + - Exception: in the case where both Timeseries having the same domain, the noisy samples will be added in a + segment-wise manner. + + When 'noise' is a Timeseries sampled at the same frequency of 'original': + - Its samples will be added to every channel of 'original', in a template-wise manner. + - If 'noise' has multiple segments, they are concatenated to make a hyper-template. + - Exception: in the case where both Timeseries having the same domain, the noisy samples will be added in a + segment-wise manner. + - 'noise' should have been sampled at the same frequency as 'original'. + + What is "template-wise manner"? + - If the template segment is longer than any original segment, the template segment will be trimmed accordingly. + - If the template segment is shorter than any original segment, the template will repeated in time. + - If the two segments are of equal length, they are added as they are. + + :return: A Biosignal with the same properties as the 'original', but with noise added to the samples of every channel. + :rtype: Biosignal subclass + """ + + if not isinstance(original, Biosignal): + raise TypeError(f"Parameter 'original' must be of type Biosignal; but {type(original)} was given.") + + if not isinstance(noise, (Noise, Timeseries, Biosignal)): + raise TypeError( + f"Parameter 'noise' must be of types Noise, Timeseries or Biosignal; but {type(noise)} was given.") + + if name is not None and not isinstance(name, str): + raise TypeError( + f"Parameter 'name' must be of type str; but {type(name)} was given.") + + def __add_template_noise(samples: ndarray, template: ndarray): + # Case A + if len(samples) < len(template): + _template = template[:len(samples)] # cut where it is enough + return samples + _template # add values + # Case B + elif len(samples) > len(template): + _template = np.tile(template, ceil(len(samples) / len(template))) # repeat full-pattern + _template = _template[:len(samples)] # cut where it is enough + return samples + _template # add values + # Case C + else: # equal lengths + return samples + template # add values + + def __noisy_timeseries(original: Timeseries, noise: Timeseries) -> Timeseries: + # Case 1: Segment-wise + if original.domain == noise.domain: + template = [noise.samples, ] if noise.is_contiguous else noise.samples + return original._apply_operation_and_new(__add_template_noise, template=template, + iterate_over_each_segment_key='template') + # Case 2: Template-wise + elif noise.is_contiguous: + template = noise.samples + return original._apply_operation_and_new(__add_template_noise, template=template) + # Case 3: Template-wise, with hyper-template + else: + template = np.concatenate(noise.samples) # concatenate as a hyper-template + return original._apply_operation_and_new(__add_template_noise, template=template) + + noisy_channels = {} + + # Case Noise + if isinstance(noise, Noise): + for channel_name in original.channel_names: + channel = original._get_channel(channel_name) + if channel.sampling_frequency == noise.sampling_frequency: + template = noise[channel.duration] + noisy_channels[channel_name] = channel._apply_operation_and_new(__add_template_noise, + template=template) + else: + raise AssertionError( + f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." + f"Suggestion: Resample one of them first.") + + # Case Timeseries + elif isinstance(noise, Timeseries): + for channel_name in original.channel_names: + channel = original._get_channel(channel_name) + if channel.unit != noise.units and channel.unit != None and channel.unit != Unitless and noise.units != None and noise.units != Unitless: + raise AssertionError( + f"Noise does not have the same units as channel '{channel_name}' of 'original'." + f"Suggestion: If possible, convert one of them first or drop units.") + if channel.sampling_frequency == noise.sampling_frequency: + noisy_channel = __noisy_timeseries(channel, noise) + noisy_channels[channel_name] = noisy_channel + else: + raise AssertionError( + f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." + f"Suggestion: Resample one of them first.") + + + elif isinstance(noise, Biosignal): + # Case Biosignal channel-wise + if original.channel_names == noise.channel_names: + for channel_name in original.channel_names: + original_channel = original._get_channel(channel_name) + noise_channel = noise._get_channel(channel_name) + if original_channel.unit != noise_channel.unit and original_channel.unit != None and original_channel.unit != Unitless and noise_channel.unit != None and noise_channel.unit != Unitless: + raise AssertionError( + f"Noise does not have the same units as channel '{channel_name}' of 'original'." + f"Suggestion: If possible, convert one of them first or drop units.") + if original_channel.sampling_frequency == noise_channel.sampling_frequency: + noisy_channel = __noisy_timeseries(original_channel, noise_channel) + noisy_channels[channel_name] = noisy_channel + else: + raise AssertionError( + f"Channels '{channel_name}' do not have the same sampling frequency in 'original' and 'noise'." + f"Suggestion: Resample one of them first.") + + # Case Biosignal unique channel + elif len(noise) == 1: + _, x = tuple(iter(noise))[0] + for channel_name in original.channel_names: + channel = original._get_channel(channel_name) + if channel.unit != x.unit and channel.unit != None and channel.unit != Unitless and x.unit != None and x.unit != Unitless: + raise AssertionError( + f"Noise does not have the same units as channel '{channel_name}' of 'original'." + f"Suggestion: If possible, convert one of them first or drop units.") + if channel.sampling_frequency == x.sampling_frequency: + noisy_channel = __noisy_timeseries(channel, x) + noisy_channels[channel_name] = noisy_channel + else: + raise AssertionError( + f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." + f"Suggestion: Resample one of them first.") + + else: + raise ArithmeticError("Noise should have 1 channel only (to be added to every channel of 'original') " + "or the same channels as 'original' (for each to be added to the corresponding channel of 'original'.") + + events = events = set(original.__associated_events.values()).union( + set(noise._Biosignal__associated_events.values())) if isinstance( + noise, (Biosignal, Timeseries)) else None + + return original._new(timeseries=noisy_channels, name=name if name is not None else 'Noisy ' + original.name, + events=events, added_noise=noise) + + @classmethod + def from_noise(cls, noises, time_intervals, name=None): + """ + Creates a type of Biosignal from a noise source. + + :param noises: + - If a Noise object is given, the Biosignal will have 1 channel for the specified time interval. + - If a dictionary of Noise objects is given, the Biosignal will have multiple channels, with different + generated samples, for the specified time interval, named after the dictionary keys. + + :param time_interval: Interval [x, y[ where x will be the initial date and time of every channel, and y will be + the final date and time of every channel; on a union of intervals, in case a tuple is given. + + :param name: The name to be associated to the Biosignal. Optional. + + :return: Biosignal subclass + """ + + if not isinstance(time_intervals, DateTimeRange) and isinstance(time_intervals, tuple) and \ + not all([isinstance(x, DateTimeRange) for x in time_intervals]): + raise TypeError(f"Parameter 'time_interval' should be of type DateTimeRange or a tuple of them.") + + if isinstance(time_intervals, tuple) and len(time_intervals) == 1: + time_intervals = time_intervals[0] + + channels = {} + + if isinstance(noises, Noise): + if isinstance(time_intervals, DateTimeRange): + samples = noises[time_intervals.timedelta] + channels[noises.name] = Timeseries(samples, time_intervals.start_datetime, + noises.sampling_frequency, + units=Unitless(), name=noises.name) + else: + segments = {x.start_datetime: noises[x.timedelta] for x in time_intervals} + channels[noises.name] = Timeseries.withDiscontiguousSegments(segments, noises.sampling_frequency, + units=Unitless(), name=noises.name) + + elif isinstance(noises, dict): + if isinstance(time_intervals, DateTimeRange): + for channel_name, noise in noises.items(): + samples = noise[time_intervals.timedelta] + channels[channel_name] = Timeseries(samples, time_intervals.start_datetime, + noise.sampling_frequency, + units=Unitless(), name=noise.name + f" : {channel_name}") + else: + for channel_name, noise in noises.items(): + segments = {x.start_datetime: noise[x.timedelta] for x in time_intervals} + channels[channel_name] = Timeseries.withDiscontiguousSegments(segments, + noise.sampling_frequency, + units=Unitless(), + name=noise.name + f" : {channel_name}") + + return cls(channels, name=name) + + def _new(self, timeseries: Dict[str | BodyLocation, Timeseries] | str | Tuple[datetime] = None, + source: BS.__subclasses__() = None, patient: Patient = None, acquisition_location: BodyLocation = None, + name: str = None, + events: Collection[Event] = None, added_noise=None): + timeseries = {ts: self.__timeseries[ts] for ts in + self.__timeseries} if timeseries is None else timeseries # copy + source = self.__source if source is None else source # no copy + patient = self.__patient if patient is None else patient # no copy + acquisition_location = self.__acquisition_location if acquisition_location is None else acquisition_location # no copy + name = str(self.__name) if name is None else name # copy + + new = type(self)(timeseries, source, patient, acquisition_location, name) + + # Associate events; no need to copy + events = self.__associated_events if events is None else events + events = events.values() if isinstance(events, dict) else events + # Check if some event can be associated + logging.disable( + logging.WARNING) # if outside the domain of every channel -> no problem; the Event will not be associated + new.annotate(events) + logging.disable(logging.NOTSET) # undo supress warnings + + # Associate added noise reference: + if added_noise is not None: + new._Biosignal__added_noise = added_noise + + return new + + def _apply_operation_and_new(self, operation, + source: BS.__subclasses__() = None, patient: Patient = None, + acquisition_location: BodyLocation = None, name: str = None, + events: Collection[Event] = None, + **kwargs): + new_channels = {} + for channel_name in self.channel_names: + new_channels[channel_name] = self.__timeseries[channel_name]._apply_operation_and_new(operation, **kwargs) + return self._new(new_channels, source=source, patient=patient, acquisition_location=acquisition_location, + name=name, events=events) + + def _apply_operation_and_return(self, operation, **kwargs): + pass # TODO + + # =================================== + # PROPERTIES (Booleans) + @property + def has_single_channel(self): + """Returns True if the Biosignal has only one channel, False otherwise.""" + return self.n_channels == 1 + + # =================================== + # PROPERTIES (Getters) + @property + def name(self): + """Returns the associated name, or 'No Name' if none was provided.""" + return self.__name if self.__name != None else "No Name" + + @property + def n_channels(self): + """Returns the number of channels of the Biosignal.""" + return len(self.__timeseries) + + @property + def channels(self): + """Returns the channels of the Biosignal.""" + return set(self.__timeseries.values()) + + @property + def channel_names(self): + """Returns the set of names that allow to identify the channels.""" + return set(self.__timeseries.keys()) + + @property + def patient(self): + """Returns the associated patient, or None if none is associated.""" + return self.__patient + + @property + def acquisition_location(self): + """Returns the associated acquisition location, or None if none is associated.""" + return self.__acquisition_location + + @property + def source(self): + """Returns the source from where the data was read, or None if none is associated.""" + return self.__source + + @property + def start(self): + """Returns the start timepoint of the channel that starts the earliest.""" + return min([ts.start for ts in self.__timeseries.values()]) + + @property + def end(self): + """Returns the end timepoint of the channel that ends the latest.""" + return max([ts.end for ts in self.__timeseries.values()]) + + @property + def domain(self): + """ + Returns a Timeline with the domain of each channel, i.e. when the channel is defined, i.e. has recorded samples. + """ + if self.n_channels == 1: + domain = tuple(self.__timeseries.values())[0].domain + else: + channels = tuple(self.__timeseries.values()) + domain: Tuple[DateTimeRange] + for k in range(1, self.n_channels): + if k == 1: + domain = channels[k].overlap(channels[k - 1]) + else: + domain = channels[k].overlap(domain) + return Timeline(Timeline.Group(domain), name=self.name + ' Domain') + + @property + def duration(self): + """ + Returns the useful duration of the Biosignal, i.e. when all channels are simultaneously defined. + This is mathematically computed by the intersection of all channels' domains. + """ + return self.domain.duration + + @property + def __events(self): + return self.__associated_events | self.__get_events_from_medical_conditions() + + @property + def events(self): + """ + Tuple of associated Events sorted by datetime. + This includes both the events directly associated to the Biosignal and the events associated to the patient. + """ + return tuple(sorted(self.__events, key=lambda e: e.datetime)) + + def __get_property_based_on_channels(self, property: Callable): + """ + Returns a property of the Biosignal based on the value of that property in all its channels. + If all channels have the same value, returns that value. + If different, returns a dictionary with the value of each channel, indexed by the channel name. + """ + property_value = {} + last_pv = None + all_equal = True + for channel_name, channel in self.__timeseries.items(): + x = property(channel) + last_pv = x + property_value[channel_name] = x + if last_pv is not None and last_pv != x: + all_equal = False + if all_equal: + return last_pv + else: + return property_value + + @property + def sampling_frequency(self) -> float | dict[str, BodyLocation: float]: + """ + Returns the sampling frequency of every channel. + If equal, returns one frequency. + If different, returns a dictionary with the sampling frequency of each channel, indexed by the channel name. + """ + return self.__get_property_based_on_channels(Timeseries.sampling_frequency) + + # PROPERTIES (Setters) + @name.setter + def name(self, name): + """Associates the given name.""" + self.__name = name + + @patient.setter + def patient(self, patient: Patient): + """Associates the given patient.""" + self.__patient = patient + + @acquisition_location.setter + def acquisition_location(self, acquisition_location: BodyLocation): + """Associates the given acquisition location.""" + self.__acquisition_location = acquisition_location + + # =================================== + # GETTERS AND SETTERS FOR MEMBERS AND ASSOCIATIONS + + def __getattr__(self, name): + """ + Returns the Timeseries representing the channel with the given name, or the Event with the given name. + """ + try: + return self.get_channel(name) + except ChannelNotFoundError: + try: + return self.get_event(name) + except EventNotFoundError: + raise AttributeError(f"There is no channel nor event named '{name}'.") + + def get_event(self, name): + """ + Returns the Event with the given name. + """ + if name in self.__events: + return self.__associated_events[name] + raise EventNotFoundError(name) + + def set_event_name(self, current, new): + """ + Changes the current name of an Event to the new name. + """ + if current in self.__associated_events.keys(): + event = self.__associated_events[current] + self.__associated_events[new] = Event(new, event._Event__onset, event._Event__offset) + del self.__associated_events[current] + else: + raise EventNotFoundError(current) + + def get_channel(self, name): + """ + Returns the Timeseries representing the channel with the given name. + """ + if name in self.__timeseries: + return self.__timeseries[name] + raise ChannelNotFoundError(name) + + def set_channel_name(self, current, new): + """ + Changes the current name of a channel to the new name. + """ + if current in self.__timeseries: + self.__timeseries[new] = self.__timeseries[current] + del self.__timeseries[current] + else: + raise ChannelNotFoundError(current) + + def _get_single_channel(self) -> tuple[str | BodyLocation, Timeseries]: + """ + Returns the single channel of the Biosignal. + :return: channel_name, channel + """ + if not self.has_single_channel: + raise AttributeError(f"This Biosignal does not have a single channel. It has multiple channels.") + return tuple(self.__timeseries.items())[0] + + # =================================== + # EVENTS + def __associate_one_event(self, event: Event): + n_channels_associated = 0 + for _, channel in self: + try: + channel.associate(event) + n_channels_associated += 1 + except ValueError: + pass + if n_channels_associated > 0: # If at least one association was possible + self.__associated_events[event.name] = event + else: + warning(f"Event '{event.name}' was not associated, because it is outside of every channel's domain.") + + @multimethod + def associate(self, *events: Event): + """ + Associates an Event to all Timeseries. + Events names will serve as keys. + @param events: One or multiple Event objects. + """ + for event in events: + self.__associate_one_event(event) + + @multimethod + def associate(self, **events: Event): + """ + Associates an Event to all Timeseries. + The new keys given will serve as keys for the Events, and their names will be overwritten with these keys. + @param events: One or multiple Event objects. + """ + for event_key in events: + event = events[event_key] + self.__associate_one_event(Event(event_key, event._Event__onset, event._Event__offset)) + + @multimethod + def disassociate(self, *events: Event): + pass + + @multimethod + def disassociate(self, *events: str): + """ + Disassociates an Event from all Timeseries. + @param event_name: The name of the Event to be removed. + @rtype: None + """ + for event_name in events: + if event_name in self.__associated_events: + for _, channel in self: + try: + channel.disassociate(event_name) + except NameError: + pass + del self.__associated_events[event_name] + else: + raise warning(f"There's no Event '{event_name}' associated to this Biosignal.") + + def disassociate_all_events(self): + for _, channel in self: + channel.delete_events() + self.__associated_events = {} + + def __get_events_from_medical_conditions(self): + res = {} + for condition in self.patient_conditions: + res.update(condition._get_events()) + return res + + # =================================== + # BUILT-INS (Basic) + def __len__(self): + """ + Returns the number of samples of every channel. + If equal, returns one number. + If different, returns a dictionary with the number of samples of each channel, indexed by the channel name. + """ + return self.__get_property_based_on_channels(Timeseries.__len__) + + def __copy__(self): + return type(self)({ts: self.__timeseries[ts].__copy__() for ts in self.__timeseries}, self.__source, + self.__patient, self.__acquisition_location, self.__name.__copy__()) + + def __repr__(self): + """Returns a textual description of the Biosignal.""" + res = "Name: {}\nType: {}\nLocation: {}\nNumber of Channels: {}\nChannels: {}\nUseful Duration: {}\nSource: {}\n".format( + self.name, + self.type.__name__, + self.acquisition_location, + self.n_channels, + ''.join([(x + ', ') for x in self.channel_names]), + self.duration, + self.source.__str__(None) if isinstance(self.source, ABCMeta) else str(self.source)) + + if len(self.__associated_events) != 0: + res += "Events:\n" + for event in sorted(self.__associated_events.values()): + res += '- ' + str(event) + '\n' + events_from_medical_conditions = dict( + sorted(self.__get_events_from_medical_conditions().items(), key=lambda item: item[1])) + if len(events_from_medical_conditions) != 0: + res += "Events associated to Medical Conditions:\n" + for key, event in events_from_medical_conditions.items(): + res += f"- {key}:\n{event}\n" + return res + + def __str__(self): + """Represents the Biosignal as a short string with the name, modality and number of channels.""" + return f"{type(self) if self.name is None else self.name + ' (' + type(self).__name__ + ')'} ({self.n_channels} channels)" + + def __iter__(self): + """Iterates over the channels. Each yield is a tuple (channel name, Timeseries).""" + return self.__timeseries.items().__iter__() + + @multimethod + def __contains__(self, item: str): + """ + Returns True if the Biosignal has a channel or an event with the given name. + The event can be associated directly to the Biosignal or to the patient. + """ + return item in self.__timeseries or item in self.__events + + @multimethod + def __contains__(self, item: datetime | DateTimeRange): + """Returns True if any channel defines this point or interval in time.""" + return any([item in channel for _, channel in self]) + + # =================================== + # BUILT-INS (Arithmetic Operations) + @multimethod + def __add__(self, value: float): + """Adds a constant to every channel. Translation of the signal.""" + return self._apply_operation_and_new(lambda x: x + value, name=self.name + f' (shifted up by) {str(value)}') + + @multimethod + def __sub__(self, value: float): + return self + (value.__neg__()) + + @multimethod + def __mul__(self, value: float): + suffix = f' (dilated up by {str(value)})' if value > 1 else f' (compressed up by {str(value)})' + return self._apply_operation_and_new(lambda x: x * value, name=self.name + suffix) + + @multimethod + def __truediv__(self, value: float): + return self * (value.__invert__()) + + def __invert__(self): + self._apply_operation_and_new(lambda x: 1 / x) + + def __neg__(self): + return self * -1 + + # =================================== + # BUILT-INS (Joining Biosignals) + + @multimethod + def __add__(self, other: 'Biosignal'): + """ + Adds both Biosignals sample-by-sample, if they have the same domain. + Notes: + - If the two Biosignals have two distinct acquisition locations, they will be lost. + - If the two Biosignals have two distinct sources, they will be lost. + - If the two Biosignals have the distinct patients, they will be lost. + Raises: + - TypeError if Biosignals are not of the same type. + - ArithmeticError if Biosignals do not have the same domain or non-matching names. + """ + + # Check errors + if self.type != other.type: + while True: + answer = input( + f"Trying to add an {self.type.__name__} with an {other.type.__name__}. Do you mean to add templeates of the second as noise? (y/n)") + if answer.lower() in ('y', 'n'): + if answer.lower() == 'y': + return Biosignal.with_additive_noise(self, other) + else: + raise TypeError("Cannot add a {0} to a {1} if not as noise.".format(other.type.__name__, + self.type.__name__)) + + if ( + not self.has_single_channel or not other.has_single_channel) and self.channel_names != other.channel_names: + raise ArithmeticError( + "Biosignals to add must have the same number of channels and the same channel names.") # unless each only has one channel + if self.domain != other.domain: + raise ArithmeticError("Biosignals to add must have the same domains.") + + # Prepare common metadata + name = f"{self.name} + {other.name}" + acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None + patient = self.__patient if self.patient_code == other.patient_code else None + if isclass(self.source) and isclass(other.source): # Un-instatiated sources + if self.source == other.source: + source = self.__source + else: + source = None + else: + if type(self.source) == type(other.source) and self.source == other.source: + source = self.__source + else: + source = None + + # Perform addition + res_timeseries = {} + if self.has_single_channel and other.has_single_channel: + ch_name1, ch1 = self._get_single_channel() + ch_name2, ch2 = self._get_single_channel() + res_timeseries[f'{ch_name1}+{ch_name2}'] = ch1 + ch2 + else: + for channel_name in self.channel_names: + res_timeseries[channel_name] = self._to_dict()[channel_name] + other._to_dict()[channel_name] + + # Union of Events + events = set(self.__associated_events.values()).union(set(other._Biosignal__associated_events.values())) + + return self._new(timeseries=res_timeseries, source=source, patient=patient, + acquisition_location=acquisition_location, + name=name, events=events) + + @multimethod + def __mul__(self, other: 'Biosignal'): + pass + + def __and__(self, other: 'Biosignal'): + """ + Joins the channels of two Biosignals of the same type, if they do not have the same set of channel names. + Notes: + - If the two Biosignals have two distinct acquisition locations, they will be lost. + - If the two Biosignals have two distinct sources, they will be lost. + - If the two Biosignals have the distict patients, they will be lost. + Raises: + - TypeError if Biosignals are not of the same type. + - ArithmeticError if both Biosignals have any channel name in common. + """ + + # Check errors + if not isinstance(other, Biosignal): + raise TypeError(f"Operation join channels is not valid with object of type {type(other)}.") + if self.type != other.type: + raise TypeError("Cannot join a {0} to a {1}".format(other.type.__name__, self.type.__name__)) + if len(self.channel_names.intersection(other.channel_names)) != 0: + raise ArithmeticError("Channels to join cannot have the same names.") + + # Prepare common metadata + name = f"{self.name} and {other.name}" + acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None + patient = self.__patient if self.patient_code == other.patient_code else None + if isclass(self.source) and isclass(other.source): # Un-instatiated sources + if self.source == other.source: + source = self.__source + else: + source = None + else: + if type(self.source) == type(other.source) and self.source == other.source: + source = self.__source + else: + source = None + + # Join channels + res_timeseries = {} + res_timeseries.update(self._to_dict()) + res_timeseries.update(other._to_dict()) + + # Union of Events + events = set(self.__associated_events.values()).union(set(other._Biosignal__associated_events.values())) + + return self._new(timeseries=res_timeseries, source=source, patient=patient, + acquisition_location=acquisition_location, name=name, + events=events) + + def concat(self, other: 'Biosignal'): + """ + Temporally concatenates two Biosignal, if they have the same set of channel names. + Notes: + - If the two Biosignals have two distinct acquisition locations, they will be lost. + - If the two Biosignals have two distinct sources, they will be lost. + - If the two Biosignals have the distict patients, they will be lost. + Raises: + - TypeError if Biosignals are not of the same type. + - ArithmeticError if both Biosignals do not have the same channel names. + - ArithmeticError if the second comes before the first. + """ + + # Check errors + if not isinstance(other, Biosignal): + raise TypeError(f"Operation join channels is not valid with object of type {type(other)}.") + if self.type != other.type: + raise TypeError("Cannot join a {0} to a {1}".format(other.type.__name__, self.type.__name__)) + if self.channel_names != other.channel_names: + raise ArithmeticError("Biosignals to concatenate must have the same channel names.") + if other.start < self.end: + raise ArithmeticError("The second Biosignal comes before (in time) the first Biosignal.") + + # Prepare common metadata + name = f"{self.name} >> {other.name}" + acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None + patient = self.__patient if self.patient_code == other.patient_code else None + if isclass(self.source) and isclass(other.source): # Un-instatiated sources + if self.source == other.source: + source = self.__source + else: + source = None + else: + if type(self.source) == type(other.source) and self.source == other.source: + source = self.__source + else: + source = None + + # Perform concatenation + res_timeseries = {} + for channel_name in self.channel_names: + res_timeseries[channel_name] = self._get_channel(channel_name) >> other._get_channel(channel_name) + + # Union of Events + events = set(self.__associated_events.values()).union(set(other._Biosignal__associated_events.values())) + + return self._new(timeseries=res_timeseries, source=source, patient=patient, + acquisition_location=acquisition_location, name=name, + events=events) + + # =================================== + # BUILT-INS (Logic using Time and Amplitude values) + @multimethod + def __lt__(self, other: Type['Biosignal'] | datetime | DateTimeRange | Timeline | Event): + return self.end < other.start + + @multimethod + def __lt__(self, value: float | int): + res = self.when(lambda x: x < value) + res.name = self.name + ' < ' + str(value) + return res + + @multimethod + def __le__(self, other: Type['Biosignal'] | datetime | DateTimeRange | Timeline | Event) -> bool: + return self.end <= other.start + + @multimethod + def __le__(self, value: float | int) -> Timeline: + res = self.when(lambda x: x <= value) + res.name = self.name + ' >= ' + str(value) + return res + + @multimethod + def __gt__(self, other: Type['Biosignal'] | datetime | DateTimeRange | Timeline | Event): + return self.start > other.end + + @multimethod + def __gt__(self, value: float | int): + res = self.when(lambda x: x > value) + res.name = self.name + ' >= ' + str(value) + return res + + @multimethod + def __ge__(self, other: Type['Biosignal'] | datetime | DateTimeRange | Timeline | Event): + return self.start >= other.end + + @multimethod + def __ge__(self, value: float | int): + res = self.when(lambda x: x >= value) + res.name = self.name + ' >= ' + str(value) + return res + + @multimethod + def __eq__(self, other: Type['Biosignal'] | datetime | DateTimeRange | Timeline | Event) -> bool: + return self.start == other.start and self.end == other.end + + @multimethod + def __eq__(self, value: float | int) -> Timeline: + res = self.when(lambda x: x == value) + res.name = self.name + ' == ' + str(value) + return res + + @multimethod + def __ne__(self, other: Type['Biosignal'] | datetime | DateTimeRange | Timeline | Event) -> bool: + return not self.__eq__(other) + + @multimethod + def __ne__(self, value: float | int) -> Timeline: + res = self.when(lambda x: x != value) + res.name = self.name + ' != ' + str(value) + return res + + # INDEXATION + @multimethod # A. Index by channel or Event + def __getitem__(self, index: str | BodyLocation) -> 'Biosignal': + if index in self.channel_names: + if self.has_single_channel: + raise IndexError("This Biosignal only has 1 channel. Index only the datetimes.") + ts = {index: self.__timeseries[index].__copy__(), } + return self._new(timeseries=ts) + + elif index in self.__associated_events or index in self.__get_events_from_medical_conditions(): + if index in self.__associated_events: # Internal own Events + event = self.__associated_events[index] + else: # Events associated to MedicalConditions + event = self.__get_events_from_medical_conditions()[index] + + if event.has_onset and event.has_offset: + return self[DateTimeRange(event.onset, event.offset)] + elif event.has_onset: + return self[event.onset] + elif event.has_offset: + return self[event.offset] + + else: + try: + self.__timeseries[to_datetime(index)] + except: + raise IndexError( + "Datetime in incorrect format or '{}' is not a channel nor an event of this Biosignal.".format( + index)) + + @multimethod # B. Index by datetime + def __getitem__(self, index: datetime) -> 'Biosignal': + if not self.has_single_channel: + raise IndexError("This Biosignal has multiple channels. Index the channel before indexing the datetime.") + return tuple(self.__timeseries.values())[0][index] + + @multimethod # C. Index by DateTimeRange # Pass item directly to each channel + def __getitem__(self, index: DateTimeRange) -> 'Biosignal': + ts = {} + events = set() + for k in self.channel_names: + res = self.__timeseries[k][index] + if res is not None: + ts[k] = res + # Events outside the new domain get discarded, hence collecting the ones that remained + events.update(set(self.__timeseries[k].events)) + + if len(ts) == 0: + raise IndexError(f"Event is outside every channel's domain.") + + new = self._new(timeseries=ts, events=events) + return new + + @multimethod # D. Index by Timeline + def __getitem__(self, index: Timeline) -> 'Biosignal': + if index.is_index: + res = self[index._as_index()] + res.name += f" indexed by '{index.name}'" + return res + else: + raise IndexError( + "This Timeline cannot serve as index, because it contains multiple groups of intervals or points.") + + @multimethod # D. Index with a condition + def __getitem__(self, index: callable) -> 'Biosignal': + pass + + @multimethod # B. Index with slice of datetimes or padded events + def __getitem__(self, index: slice) -> 'Biosignal': + def __get_events_with_padding(event_name, padding_before=timedelta(seconds=0), + padding_after=timedelta(seconds=0), + exclude_event=False): + # Get Event object + if event_name in self.__associated_events: + event = self.__associated_events[event_name] + elif event_name in self.__get_events_from_medical_conditions(): + event = self.__get_events_from_medical_conditions()[event_name] + else: + raise IndexError(f"No Event named '{event_name}' associated to this Biosignal.") + + if isinstance(padding_before, datetime) and isinstance(padding_after, datetime) and exclude_event: + if event.has_onset and event.has_offset: + return self[DateTimeRange(padding_before, event.onset)] >> self[ + DateTimeRange(event.offset + timedelta(seconds=1 / self.sampling_frequency), + padding_after)] # FIXME: Sampling frequency might not be the same for all channels! + else: + raise IndexError(f"Event {event_name} is a point in time, not an event with a duration.") + + # Convert specific datetimes to timedeltas; is this inneficient? + if isinstance(padding_before, datetime): + if event.has_onset: + padding_before = event.onset - padding_before + elif event.has_offset: + padding_before = event.offset - padding_before + if exclude_event: + padding_after = - event.duration + if isinstance(padding_after, datetime): + if event.has_offset: + padding_after = padding_after - event.offset + elif event.has_onset: + padding_after = padding_after - event.onset + if exclude_event: + padding_before = - event.duration + + # Index + if event.has_onset and event.has_offset: + return self[DateTimeRange(event.onset - padding_before, event.offset + padding_after)] + elif event.has_onset: + return self[DateTimeRange(event.onset - padding_before, event.onset + padding_after)] + elif event.has_offset: + return self[DateTimeRange(event.offset - padding_before, event.offset + padding_after)] + + # Everything but event + if isinstance(index.stop, str) and index.start is None and index.step is None: + if not index.stop.startswith('-'): + raise ValueError( + "Indexing a Biosignal like x[:'event':] is equivalent to having its entire domain. Did you mean x[:'-event':]?") + return __get_events_with_padding(index.stop[1:], padding_before=self.start, + padding_after=self.end, + exclude_event=True) + + # Everything before event + if isinstance(index.stop, str) and index.start is None: + event_name, exclude_event = index.stop, False + if event_name.startswith('-'): + event_name, exclude_event = event_name[1:], True + return __get_events_with_padding(event_name, padding_before=self.start, + exclude_event=exclude_event) + + # Everything after event + if isinstance(index.start, str) and index.stop is None: + event_name, exclude_event = index.start, False + if event_name.startswith('-'): + event_name, exclude_event = event_name[1:], True + return __get_events_with_padding(event_name, padding_after=self.end, exclude_event=exclude_event) + + # Event with padding + if isinstance(index.start, (timedelta, int)) and isinstance(index.step, (timedelta, int)) and isinstance( + index.stop, str): + start = timedelta(seconds=index.start) if isinstance(index.start, + int) else index.start # shortcut for seconds + step = timedelta(seconds=index.step) if isinstance(index.step, int) else index.step # shortcut for seconds + return __get_events_with_padding(index.stop, padding_before=start, padding_after=step) + elif isinstance(index.start, (timedelta, int)) and isinstance(index.stop, str): + start = timedelta(seconds=index.start) if isinstance(index.start, + int) else index.start # shortcut for seconds + return __get_events_with_padding(index.stop, padding_before=start) + elif isinstance(index.start, str) and isinstance(index.stop, (timedelta, int)): + stop = timedelta(seconds=index.stop) if isinstance(index.stop, int) else index.stop # shortcut for seconds + return __get_events_with_padding(index.start, padding_after=stop) + + # Index by datetime + if isinstance(index.start, datetime) and isinstance(index.stop, datetime) and index.stop < index.start: + raise IndexError("Given final datetime comes before the given initial datetime.") + + if self.has_single_channel: # one channel + channel_name = tuple(self.__timeseries.keys())[0] + channel = self.__timeseries[channel_name] + return self._new(timeseries={ + channel_name: channel[index]}) # FIXME: Why aren't events being updated here? (See below) + + else: # multiple channels + ts = {} + events = set() + for k in self.channel_names: + ts[k] = self.__timeseries[k][index] + # Events outside the new domain get discarded, hence collecting the ones that remained + events.update(set(self.__timeseries[k].events)) # FIXME: (See Above) Like in here! + new = self._new(timeseries=ts, events=events) + return new + + @multimethod # Z. Multiple of the above indices + def __getitem__(self, index: tuple) -> 'Biosignal': + # Structure-related: Channels + if all(isinstance(k, (str, BodyLocation)) and k in self.channel_names for k in index): + ts = {} + events = set() + for k in index: + ts[k] = self.__timeseries[k] + events.update(set(self.__timeseries[k].events)) + new = self._new(timeseries=ts, events=events) + return new + + # Time-related: Slices, Datetimes, Events, ... + else: + if isinstance(index[0], DateTimeRange): + index = sorted(index, key=lambda x: x.start_datetime) + else: + index = sorted(index) + + return self._new({channel_name: channel[tuple(index)] for channel_name, channel in self}) + + # =================================== + # USEFUL TOOLS + @property + def preview(self): + """Returns 5 seconds of the middle of the signal.""" + domain = self.domain + middle_of_domain: DateTimeRange = domain[len(domain) // 2] + middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) + try: + return self[middle - timedelta(seconds=2): middle + timedelta(seconds=3)] + except IndexError: + raise AssertionError( + f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") + + def when(self, condition: Callable, window: timedelta = None): + if len(signature(condition).parameters) > 1: + assert set(signature(condition).parameters) + sf = self.sampling_frequency # that all channels have the same sampling frequnecy + window = 1 if window is None else int(window * sf) + intervals = [] + for i in range(len(self._n_segments)): # gives error if not all channles have the same domain + x = self._vblock(i) + evaluated = [] + for i in range(0, len(x[0]), window): + y = x[:, i: i + window] + evaluated += [y] * window + intervals.append(Timeseries._Timeseries__Segment._Segment__when(evaluated)) + intervals = self.__timeseries[0]._indices_to_timepoints(intervals) + return Timeline( + *[Timeline.Group(channel._when(condition, window), name=channel_name) for channel_name, channel in + self], + name=self.name + " when '" + condition.__name__ + "' is True" + f" (in windows of {window})" if window else "") + + else: + return Timeline( + *[Timeline.Group(channel._when(condition, window), name=channel_name) for channel_name, channel in + self], + name=self.name + " when '" + condition.__name__ + "' is True" + f" (in windows of {window})" if window else "") + + def restructure_domain(self, time_intervals: tuple[DateTimeRange]): + domain = self.domain + if len(domain) >= len(time_intervals): + for _, channel in self: + # 1. Concatenate segments + channel.contiguous() + # 2. Partition according to new domain + channel.reshape(time_intervals) + else: + NotImplementedError("Not yet implemented.") + + def acquisition_scores(self): + print(f"Acquisition scores for '{self.name}'") + completness_score = self.completeness_score() + print("Completness Score = " + ("%.2f" % (completness_score * 100) + "%" if completness_score else "n.d.")) + onbody_score = self.onbody_score() + print("On-body Score = " + ("%.2f" % (onbody_score * 100) + "%" if onbody_score else "n.d.")) + quality_score = self.quality_score( + _onbody_duration=onbody_score * self.duration if onbody_score else self.duration) + print("Quality Score = " + ("%.2f" % (quality_score * 100) + "%" if quality_score else "n.d.")) + + def completeness_score(self): + recorded_duration = self.duration + expected_duration = self.end - self.start + return recorded_duration / expected_duration + + def onbody_score(self): + if hasattr(self.source, + 'onbody'): # if the BS defines an 'onbody' method, then this score exists, it's computed and returned + x = self.source.onbody(self) + if x: + return self.source.onbody(self).duration / self.duration + + def quality_score(self, _onbody_duration=None): + if _onbody_duration: + if hasattr(self, + 'acceptable_quality'): # if the Biosignal modality defines an 'acceptable_quality' method, then this score exists, it's computed and returned + return self.acceptable_quality().duration / _onbody_duration + else: + if hasattr(self, 'acceptable_quality') and hasattr(self.source, 'onbody'): + return self.acceptable_quality().duration / self.source.onbody(self).duration + + # =================================== + # PROCESSING + def apply(self, operation, **kwargs): + """ + Applies the given operation in-place to every channel. + """ + for channel in self.__timeseries.values(): + channel._apply_operation(operation, **kwargs) + + @multimethod + def undo(self, operation: Callable): + pass + + @multimethod + def undo(self, operation: int): + pass + + # Processing Shortcuts + def resample(self, frequency: float): + """ + Resamples every channel to the new sampling frequency given, using Fourier method. + @param frequency: New sampling frequency (in Hertz). + """ + for channel in self.__timeseries.values(): + channel._resample(frequency) + + def invert(self, channel_label: str = None): + inversion = lambda x: -1 * x + if channel_label is None: # apply to all channels + self.apply_operation(inversion) + else: # apply only to one channel + self.__timeseries[channel_label]._apply_operation(inversion) + + def undo_segmentation(self, time_intervals: tuple[DateTimeRange]): + for _, channel in self: + channel._merge(time_intervals) + + # =================================== + # PLOTS + def __draw_plot(self, timeseries_plotting_method, title, xlabel, ylabel, grid_on: bool, show: bool = True, + save_to: str = None): + ''' + Draws a base plot to display every channel in a subplot. It is independent of the content that is plotted. + + @param timeseries_plotting_method: The method to be called in Timeseries, that defines what content to plot. + @param title: What the content is about. The Biosignal's name and patient code will be added. + @param xlabel: Label for the horizontal axis. + @param ylabel: Label for the vertical axis. + @param grid_on: True if grid in to be drawn or not; False otherwise. + @param show: True if plot is to be immediately displayed; False otherwise. + @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. + @return: + ''' + fig = plt.figure(figsize=(13, 2.5 * self.n_channels)) + + all_events = self.events + all_onsets = [e.onset for e in all_events if e.has_onset] + all_offsets = [e.offset for e in all_events if e.has_offset] + all_vlines = all_onsets + all_offsets + + for i, channel_name in zip(range(self.n_channels), self.channel_names): + channel = self.__timeseries[channel_name] + ax = plt.subplot(self.n_channels, 1, i + 1, title=channel_name) + ax.title.set_size(10) + ax.margins(x=0) + ax.set_xlabel(xlabel, fontsize=8, rotation=0, loc="right") + ax.set_ylabel(ylabel, fontsize=8, rotation=90, loc="top") + plt.xticks(fontsize=9) + plt.yticks(fontsize=9) + if grid_on: + ax.grid() + timeseries_plotting_method(self=channel) + + _vlines = [int((t - channel.start).total_seconds() * channel.sampling_frequency) for t in all_vlines if + t in channel] + plt.vlines(_vlines, ymin=channel.min(), ymax=channel.max(), colors='red') + + fig.suptitle((title + ' ' if title is not None else '') + self.name + ' from patient ' + str(self.patient_code), + fontsize=11) + fig.tight_layout() + if save_to is not None: + fig.savefig(save_to) + plt.show() if show else plt.close() + + # return fig + + def plot_spectrum(self, show: bool = True, save_to: str = None): + ''' + Plots the Bode plot of every channel. + @param show: True if plot is to be immediately displayed; False otherwise. + @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. + ''' + self.__draw_plot(Timeseries._plot_spectrum, 'Power Spectrum of', 'Frequency (Hz)', 'Power (dB)', True, show, + save_to) + + def plot(self, show: bool = True, save_to: str = None): + ''' + Plots the amplitude in time of every channel. + @param show: True if plot is to be immediately displayed; False otherwise. + @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. + ''' + return self.__draw_plot(Timeseries._plot, None, 'Time', 'Amplitude (n.d.)', False, show, save_to) + + def plot_summary(self, show: bool = True, save_to: str = None): + ''' + Plots a summary of relevant aspects of common analysis of the Biosignal. + ''' + pass # Implemented in each type + + # =================================== + # CONVERT TO OTHER DATA STRUCTURES + def to_dict(self) -> Dict[str | BodyLocation, Timeseries]: + return deepcopy(self.__timeseries) + + def to_array(self) -> ndarray: + """ + Converts Biosignal to a numpy array. + The initial datetime is that of the earliest channel. The final datetime is that of the latest channel. + When a channel is not defined, the value is NaN (e.g. interruptions, beginings, ends). + If the channels are not sampled at the same frequency, the highest sampling frequency is used, and the channels with lower sampling + frequency are resampled. + :return: A 2D numpy array each channel in each line. + :rtype: numpy.ndarray + + Example: + Given a Biosignal with 3 channels sampled at 1.1 Hz: + Channel 1: 0, 1, 2, 3, 4 (starts at 10:00:02.500) + Channel 2: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 (starts at 10:00:04.200) + Channel 3: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 (starts at 10:00:00.000) + Result: [[np.nan, np.nan, 0, 1, 2, 3, 4, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]] + """ + + # Get the maximum sampling frequency of the Biosignal + max_sf = max(channel.sampling_frequency for _, channel in self) + + # Get the arrays of all channels + channels_as_arrays = [] + for i, (_, channel) in enumerate(self): + if channel.sampling_frequency != max_sf: # Resample the channel, if necessary + channel._resample(max_sf) + # Convert channel to array + channels_as_arrays.append(channel.to_array()) + + # Get the length of the samples axes + n_samples = ceil((self.final_datetime - self.initial_datetime).total_seconds() * max_sf) + + # Create the array full of NaNs + res = np.full((len(self), n_samples), np.nan) + + # Fill the array + for i, ((_, channel), channel_as_array) in enumerate(zip(self, channels_as_arrays)): + # Get the index of the first position of this channel in the array + initial_ix = round((channel.initial_datetime - self.initial_datetime).total_seconds() * max_sf) + # Broadcat samples to the array + res[i, initial_ix: initial_ix + len(channel_as_array)] = channel_as_array + + return res + + def to_dataframe(self) -> DataFrame: + pass + + # =================================== + # SERIALIZATION + + def __getstate__(self): + """ + 1: __name (str) + 2: __source (BS subclass (instantiated or not)) + 3: __patient (Patient) + 4: __acquisition_location (BodyLocation) + 5: __associated_events (tuple) + 6: __timeseries (dict) + """ + return (self.__SERIALVERSION, self.__name, self.__source, self.__patient, self.__acquisition_location, + tuple(self.__associated_events.values()), self.__timeseries) + + def __setstate__(self, state): + if state[0] in (1, 2): + self.__name, self.__source, self.__patient, self.__acquisition_location = state[1:5] + self.__timeseries = state[6] + self.__associated_events = {} + self.annotate(state[5]) + else: + raise IOError( + f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' + f'Supported versions: 1 and 2.') + + EXTENSION = '.biosignal' + + def save(self, save_to: str): + # Check extension + if not save_to.endswith(Biosignal.EXTENSION): + save_to += Biosignal.EXTENSION + + # Make memory maps + temp_dir = mkdtemp(prefix='ltbio.') + for _, channel in self: + channel._memory_map(temp_dir) + + # Write + from _pickle import dump + with open(save_to, 'wb') as f: + dump(self, f) + + # Clean up memory maps + rmtree(temp_dir) + + @classmethod + def load(cls, filepath: str): + # Check extension + if not filepath.endswith(Biosignal.EXTENSION): + raise IOError("Only .biosignal files are allowed.") + + from _pickle import load + from _pickle import UnpicklingError + + # Read + try: # Versions >= 2023.0: + f = open(filepath, 'rb') + biosignal = load(f) + except UnpicklingError as e: # Versions 2022.0, 2022.1 and 2022.2: + from bz2 import BZ2File + print( + "Loading...\nNote: Loading a version older than 2023.0 takes significantly more time. It is suggested you save this Biosignal again, so you can have it in the newest fastest format.") + f = BZ2File(filepath, 'rb') + biosignal = load(f) + f.close() + return biosignal + + # ============================== + # ML PACKAGE + def _vblock(self, i: int): + """ + Returns a block of timelly allined segments, vertially alligned for all channels. + Note: This assumes all channels are segmented in the same way, i.e., have exactly the same set of subdomains. + :param i: The block index + :return: ndarray of vertical stacked segmetns + """ + N = self._n_segments + if isinstance(N, int): + if i < N: + return np.vstack([channel[i] for channel in self.__timeseries.values()]) + else: + IndexError(f"This Biosignal as only {N} blocks.") + else: + raise AssertionError("Not all channels are segmented in the same way, hence blocks cannot be created.") + + def _block_subdomain(self, i: int) -> DateTimeRange: + if self.n_channels == 1: + return tuple(self.__timeseries.values())[0]._block_subdomain(i) + else: + raise NotImplementedError() + + +class DerivedBiosignal(Biosignal): + """ + A DerivedBiosignal is a set of Timeseries of some extracted feature from an original Biosignal. + It is such a feature that it is useful to manipulate it as any other Biosignal. + """ + + def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, + original: Biosignal = None): + if original is not None: + super().__init__(timeseries, original.source, original._Biosignal__patient, original.acquisition_location, + original.name) + else: + super().__init__(timeseries, source, patient, acquisition_location, name) + + self.original = original # Save reference + + +class MultimodalBiosignal(Biosignal): + + def __init__(self, **biosignals): + + timeseries = {} + # sources = {} + patient = None + # locations = {} + name = "Union of" + events = {} + + for label, biosignal in biosignals.items(): + if patient is None: + patient = biosignal._Biosignal__patient + elif patient != biosignal._Biosignal__patient: + raise ValueError("When joining Biosignals, they all must be from the same Patient.") + + for channel_label, ts in biosignal._to_dict().items(): + timeseries[label + ':' + channel_label] = ts # Join Timeseries in a single dictionary + + # sources[label] = biosignal.source # Join sources + + # if biosignal.acquisition_location is not None: + # locations[label] = biosignal.acquisition_location + + name += f" '{biosignal.name}'," if biosignal.name != "No Name" else f" '{label}'," + + for event in biosignal.events: + if event.name in events and events[event.name] != event: + raise ValueError( + "There are two event names associated to different onsets/offsets in this set of Biosignals.") + else: + events[event.name] = event + + super(MultimodalBiosignal, self).__init__(timeseries, None, patient, None, name[:-1]) + self.annotate(events) + self.__biosignals = biosignals + + if (len(self.type)) == 1: + raise TypeError("Cannot create Multimodal Biosignal of just 1 modality.") + + @property + def type(self): + return {biosignal.type for biosignal in self.__biosignals.values()} + + @property + def source(self) -> Set[BS]: + return {biosignal.source for biosignal in self.__biosignals.values()} + + @property + def acquisition_location(self) -> Set[BodyLocation]: + return {biosignal.acquisition_location for biosignal in self.__biosignals.values()} + + def __getitem__(self, item): + if isinstance(item, tuple): + if len(item) == 2: + biosignal = self.__biosignals[item[0]] + return biosignal[item[1]] + + elif isinstance(item, str) and item in self.__biosignals.keys(): + return self.__biosignals[item] + + raise IndexError("Indexing a Multimodal Biosignal should have two arguments, like 'multisignal['ecg'][V5]," + "where 'ecg' is the Biosignal to address and 'V5' is the channel to get.") + + def __contains__(self, item): + if isinstance(item, str) and item in self.__biosignals.keys(): + return True + if isinstance(item, Biosignal) and item in self.__biosignals.values(): + return True + + super(MultimodalBiosignal, self).__contains__(item) + + def __str__(self): + '''Returns a textual description of the MultimodalBiosignal.''' + res = f"MultimodalBiosignal containing {len(self.__biosignals)}:\n" + for i, biosignal in enumerate(self.__biosignals): + res += "({})\n{}".format(i, str(biosignal)) + return res + + def plot_summary(self, show: bool = True, save_to: str = None): + raise TypeError("Functionality not available for Multimodal Biosignals.") diff --git a/src/ltbio/biosignals/_Biosignal.pyi b/src/ltbio/biosignals/_Biosignal.pyi new file mode 100644 index 00000000..8956d6f9 --- /dev/null +++ b/src/ltbio/biosignals/_Biosignal.pyi @@ -0,0 +1,248 @@ +from abc import ABC +from dataclasses import dataclass +from datetime import datetime +from datetime import timedelta +from typing import Callable + +from datetimerange import DateTimeRange +from ltbio.processing.noises.Noise import Noise +from multimethod import multimethod +from numpy import ndarray +from pandas import DataFrame + +from ltbio.clinical import BodyLocation, Patient +from units import Unit +from . import BiosignalSource as BS +from . import Timeline, Timeseries, Event + + +@dataclass +class Biosignal(ABC): + + DEFAULT_UNIT: Unit + + # INITIALIZERS + # A. Ad-hoc + @multimethod + def __init__(self, + timeseries: dict[str | BodyLocation, Timeseries], source: BS = None, patient: Patient = None, + acquisition_location: BodyLocation = None, name: str = None): ... + # B. From files + @multimethod + def __init__(self, path: str, source: BS = None, patient: Patient = None, acquisition_location: BodyLocation = None, + name: str = None): ... + + # SPECIAL INITIALIZERS + @classmethod + def from_template(cls) -> Biosignal: ... + @classmethod + def with_additive_noise(cls, original: Biosignal, noise: Noise, name: str = None) -> Biosignal: ... + @classmethod + def from_noise(cls, noises: Noise | dict[str | BodyLocation, Noise], time_intervals: DateTimeRange | tuple[DateTimeRange], name: str = None) -> Biosignal: ... + + # PROPERTIES (Booleans) + @property + def has_single_channel(self) -> bool: ... + + # PROPERTIES (Getters) + @property + def name(self) -> str: ... + @property + def n_channels(self) -> int: ... + @property + def channels(self) -> set(Timeseries): ... + @property + def channel_names(self) -> set[str | BodyLocation]: ... + @property + def patient(self) -> Patient: ... + @property + def acquisition_location(self) -> BodyLocation: ... + @property + def source(self) -> BS: ... + @property + def start(self) -> datetime: ... + @property + def end(self) -> datetime: ... + @property + def domain(self) -> Timeline: ... + @property + def duration(self) -> timedelta: ... + @property + def events(self) -> tuple[Event]: ... + @property + def sampling_frequency(self) -> float | dict[str, BodyLocation: float]: ... + + # PROPERTIES (Setters) + @name.setter + def name(self, name: str) -> None: ... + @patient.setter + def patient(self, patient: Patient) -> None: ... + @acquisition_location.setter + def acquisition_location(self, acquisition_location: BodyLocation) -> None: ... + + # GETTERS AND SETTERS FOR MEMBERS AND ASSOCIATIONS + def __getattr__(self, name: str) -> Timeseries | Event: ... + def get_event(self, name: str) -> Event: ... + def set_event_name(self, current: str, new: str) -> None: ... + def get_channel(self, name: str | BodyLocation) -> Timeseries: ... + def set_channel_name(self, current: str | BodyLocation, new: str | BodyLocation) -> None: ... + + # EVENTS + @multimethod + def associate(self, *events: Event) -> None: ... + @multimethod + def associate(self, **events: Event) -> None: ... + @multimethod + def disassociate(self, *events: Event) -> None: ... + @multimethod + def disassociate(self, *events: str) -> None: ... + def disassociate_all_events(self) -> None: ... + + # BUILT-INS (Basic) + def __len__(self) -> int | dict[str | BodyLocation: int]: ... + def __copy__(self) -> Biosignal: ... + def __repr__(self) -> str: ... + def __str__(self) -> str: ... + def __iter__(self) -> iter: ... + @multimethod + def __contains__(self, item: str) -> bool: ... + @multimethod + def __contains__(self, item: datetime | DateTimeRange) -> bool: ... + + # BUILT-INS (Arithmetic Operations) + @multimethod + def __add__(self, value: float) -> Biosignal: ... + @multimethod + def __sub__(self, value: float) -> Biosignal: ... + @multimethod + def __mul__(self, value: float) -> Biosignal: ... + @multimethod + def __truediv__(self, value: float) -> Biosignal: ... + def __neg__(self) -> Biosignal: ... + def __invert__(self) -> Biosignal: ... + + # BUILT-INS (Joining Biosignals) + @multimethod + def __add__(self, other: Biosignal) -> Biosignal: ... + @multimethod + def __sub__(self, other: Biosignal) -> Biosignal: ... + @multimethod + def __mul__(self, other: Biosignal) -> Biosignal: ... + @multimethod + def __truediv__(self, other: Biosignal) -> Biosignal: ... + def __and__(self, other: Biosignal) -> Biosignal: ... + def __rshift__(self, other: Biosignal) -> Biosignal: ... + + # BUILT-INS (Logic using Time and Amplitude values) + @multimethod + def __lt__(self, other: Biosignal | datetime | DateTimeRange | Timeline | Event) -> bool: ... + @multimethod + def __lt__(self, value: float | int) -> Timeline: ... + @multimethod + def __le__(self, other: Biosignal | datetime | DateTimeRange | Timeline | Event) -> bool: ... + @multimethod + def __le__(self, value: float | int) -> Timeline: ... + @multimethod + def __gt__(self, other: Biosignal | datetime | DateTimeRange | Timeline | Event) -> bool: ... + @multimethod + def __gt__(self, value: float | int) -> Timeline: ... + @multimethod + def __ge__(self, other: Biosignal | datetime | DateTimeRange | Timeline | Event) -> bool: ... + @multimethod + def __ge__(self, value: float | int) -> Timeline: ... + @multimethod + def __eq__(self, other: Biosignal | datetime | DateTimeRange | Timeline | Event) -> bool: ... + @multimethod + def __eq__(self, value: float | int) -> Timeline: ... + @multimethod + def __ne__(self, other: Biosignal | datetime | DateTimeRange | Timeline | Event) -> bool: ... + @multimethod + def __ne__(self, value: float | int) -> Timeline: ... + + # INDEXATION + @multimethod # A. Index by channel or Event + def __getitem__(self, index: str | BodyLocation) -> Biosignal: ... + @multimethod # B. Index by datetime + def __getitem__(self, index: datetime) -> Biosignal: ... + @multimethod # C. Index by DateTimeRange + def __getitem__(self, index: DateTimeRange) -> Biosignal: ... + @multimethod # D. Index by Timeline + def __getitem__(self, index: Timeline) -> Biosignal: ... + @multimethod # D. Index with a condition + def __getitem__(self, index: callable) -> Biosignal: ... + @multimethod # B. Index with slice of datetimes or padded events + def __getitem__(self, index: slice) -> Biosignal: ... + @multimethod # Z. Multiple of the above indices + def __getitem__(self, index: tuple) -> Biosignal: ... + + # USEFUL TOOLS + @property + def preview(self) -> Biosignal: ... + def when(self, condition: callable, window: timedelta = None) -> Timeline: ... + def restructure_domain(self, time_intervals: tuple[DateTimeRange]) -> None: ... + def acquisition_scores(self) -> tuple[float, float, float]: ... + def completeness_score(self) -> float: ... + def onbody_score(self) -> float: ... + def quality_score(self, _onbody_duration=None) -> float: ... + + # PROCESSING + def apply(self, operation: Callable, inplace: bool = True, **kwargs) -> Biosignal | None: ... + @multimethod + def undo(self, operation: Callable) -> None: ... + @multimethod + def undo(self, operation: int) -> None:... + # Processing Shortcuts + def resample(self, frequency: float) -> None: ... + def invert(self, channel_label: str = None) -> None: ... + def undo_segmentation(self, time_intervals: tuple[DateTimeRange]) -> None: ... + + # PLOTS + def plot_spectrum(self, show: bool = True, save_to: str = None) -> None: ... + def plot(self, show: bool = True, save_to: str = None) -> None: ... + def plot_summary(self, show: bool = True, save_to: str = None) -> None: ... + + # CONVERT TO OTHER DATA STRUCTURES + def to_dict(self) -> dict[str | BodyLocation, Timeseries]: ... + def to_array(self) -> ndarray: ... + def to_dataframe(self) -> DataFrame: ... + + # SERIALIZATION + EXTENSION = '.biosignal' + __SERIALVERSION: int = 2 + def __getstate__(self) -> tuple: ... + def __setstate__(self, state: tuple) -> None: ... + def save(self, filepath: str) -> None: ... + @classmethod + def load(cls, filepath: str) -> Biosignal: ... + + +class DerivedBiosignal(Biosignal): + @classmethod + def derived_from(cls, biosignal: Biosignal) -> DerivedBiosignal: ... + + +class MultimodalBiosignal(Biosignal): + + @multimethod + def __init__(self, **biosignals: Biosignal) -> MultimodalBiosignal: ... + + # PROPERTIES + @property + def modalities(self) -> set[Biosignal.__subclasses__()]: ... + @property + def source(self) -> set[BS]: ... + @property + def acquisition_location(self) -> set[BodyLocation]: ... + + # BUILT-INS (Basic) + def __repr__(self) -> str: ... + def __str__(self) -> str: ... + + # INDEXATION + @multimethod + def __getitem__(self, item: Biosignal.__subclasses__()) -> Biosignal: ... + + # PLOTS + def plot_summary(self, show: bool = True, save_to: str = None): + raise TypeError("Functionality not available for Multimodal Biosignals.") + diff --git a/src/ltbio/biosignals/_BiosignalSource.py b/src/ltbio/biosignals/_BiosignalSource.py new file mode 100644 index 00000000..cc6d6571 --- /dev/null +++ b/src/ltbio/biosignals/_BiosignalSource.py @@ -0,0 +1,54 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: BiosignalSource +# Description: Abstract class BiosignalSource, with static procedures to ease the reading and writting files from any +# source (hospitals, devices, datasets ...). + +# Contributors: João Saraiva, Mariana Abreu +# Created: 25/04/2022 +# Last Updated: 29/06/2022 + +# =================================== + +from abc import ABC + +class BiosignalSource(ABC): + + # =================================== + # BUILT-INS + def __eq__(self, other): + return type(self) == type(other) + + # =================================== + # READ FROM FILES + @classmethod + def _read(cls, path, type, **options): + return { + 'timeseries': cls._timeseries(path, type, **options), + 'patient': cls._patient(path, **options), + 'acquisition_location': cls._acquisition_location(path, type, **options), + 'events': cls._events(path, **options), + 'name': cls._name(path, type, **options) + } + + # =================================== + # SERIALIZATION + def __getstate__(self): + """ + 1: other... (dict) + """ + other_attributes = self.__dict__.copy() + return (self.__SERIALVERSION, ) if len(other_attributes) == 0 else (self.__SERIALVERSION, other_attributes) + + def __setstate__(self, state): + if state[0] == 1: + if len(state) == 2: + self.__dict__.update(state[1]) + else: + raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' + f'Supported versions: 1.') diff --git a/src/ltbio/biosignals/_BiosignalSource.pyi b/src/ltbio/biosignals/_BiosignalSource.pyi new file mode 100644 index 00000000..c751f3ae --- /dev/null +++ b/src/ltbio/biosignals/_BiosignalSource.pyi @@ -0,0 +1,68 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# Package: biosignals +# Class: BiosignalSource +# =================================== + +from abc import ABC, abstractmethod + +from numpy import ndarray + +from ltbio.biosignals import Event +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals._Timeseries import Timeseries +from ltbio.clinical import BodyLocation, Patient + + +class BiosignalSource(ABC): + + NAME_MAX_LENGTH: int = 100 + + # INITIALIZER + def __init__(self) -> BiosignalSource: ... + + # BUILT-INS + @abstractmethod + def __repr__(self) -> str: ... + def __eq__(self, other) -> bool: ... + + # READ FROM FILES + @staticmethod + @abstractmethod + def _timeseries(path: str, type, **options) -> dict[str | BodyLocation, Timeseries]: ... + + @staticmethod + def _events(path: str, **options) -> tuple[Event]: ... + + @staticmethod + def _patient(path: str, **options) -> Patient: + ... + + @staticmethod + def _acquisition_location(path, type, **options) -> BodyLocation: + ... + + @staticmethod + def _name(path, type, **options) -> str: + ... + + @classmethod + def _read(cls, path: str, type: Biosignal, **options) -> dict[str, object]: ... + + # WRITE TO FILES + @staticmethod + @abstractmethod + def _write(path: str, timeseries: dict) -> None: + ... + + # TRANSFER FUNCTIONS + @staticmethod + @abstractmethod + def _transfer(samples: ndarray, type) -> ndarray: + ... + + # SERIALIZATION + __SERIALVERSION: int = 1 + def __getstate__(self) -> tuple: ... + def __setstate__(self, state: tuple) -> None: ... diff --git a/src/ltbio/biosignals/_Event.py b/src/ltbio/biosignals/_Event.py new file mode 100644 index 00000000..a9e9b00f --- /dev/null +++ b/src/ltbio/biosignals/_Event.py @@ -0,0 +1,139 @@ +from datetime import datetime, timedelta + +from datetimerange import DateTimeRange +from pandas import to_datetime + + +class Event(): + __SERIALVERSION: int = 1 + + def __init__(self, name: str, onset: datetime | str = None, offset: datetime | str = None): + if onset is None and offset is None: # at least one + raise AssertionError("At least an onset or an offset must be given to create an Event.") + self.__onset = to_datetime(onset) if isinstance(onset, str) else onset + self.__offset = to_datetime(offset) if isinstance(offset, str) else offset + if onset is not None and offset is not None and offset < onset: + raise AssertionError(f"In Event '{name}', the offset cannot come before the onset.") + self.__name = name + + @property + def has_onset(self) -> bool: + return self.__onset != None + + @property + def has_offset(self) -> bool: + return self.__offset != None + + @property + def onset(self) -> datetime: + if self.has_onset: + return self.__onset + else: + raise AttributeError(f"Event {self.name} has no onset.") + + @onset.setter + def onset(self, datetime: datetime): + self.__onset = datetime + + @property + def offset(self) -> datetime: + if self.has_offset: + return self.__offset + else: + raise AttributeError(f"Event {self.name} has no offset.") + + @offset.setter + def offset(self, datetime: datetime): + self.__offset = datetime + + @property + def duration(self) -> timedelta: + if self.__onset is None: + raise AttributeError(f"Event has no duration, only an {self.name} has no offset.") + if self.__offset is None: + raise AttributeError(f"Event has no duration, only an {self.name} has no onset.") + return self.__offset - self.__onset + + @property + def domain(self) -> DateTimeRange: + if self.__onset is None: + raise AttributeError(f"Event has no duration, only an {self.name} has no offset.") + if self.__offset is None: + raise AttributeError(f"Event has no duration, only an {self.name} has no onset.") + return DateTimeRange(self.__onset, self.__offset) + + @property + def name(self) -> str: + return self.__name + + def domain_with_padding(self, before: timedelta = timedelta(seconds=0), after: timedelta = timedelta(seconds=0)): + """ + The Event domain with before, after, or both paddings. Negative paddings go back in time; positive paddings go forward in time. + :param before: Padding before onset if defined, or offset otherwised. + :param after: Padding after offset if defined, or onset otherwised. + :return: DateTimeRange of the padded domain. + """ + + if not isinstance(before, timedelta) or not isinstance(after, timedelta): + raise TypeError('At least one padding (before or after) is necessary. Also, they should be timedelta objects.') + + # return: event [start, end[ + start = self.__onset if self.__onset is not None else self.__offset + end = self.__offset if self.__offset is not None else self.__onset + + # return: event [start + before, end + after[ + start, end = start + before, end + after + + return DateTimeRange(start, end) + + def __repr__(self): + if self.__offset is None: + return self.__name + ': Starts at ' + self.__onset.strftime("%d %b, %H:%M:%S") + elif self.__onset is None: + return self.__name + ': Ends at ' + self.__offset.strftime("%d %b, %H:%M:%S") + else: + return self.__name + ': [' + self.__onset.strftime("%d %b, %H:%M:%S") + '; ' + self.__offset.strftime("%d %b, %H:%M:%S") + ']' + + def __hash__(self): + return hash((self.__name, self.__onset, self.__offset)) + + def __eq__(self, other): + return self.__name == other.name and self.__onset == other._Event__onset and self.__offset == other._Event__offset + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): # A Segment comes before other Segment if its end is less than the other's start. + after = other._Event__onset if other._Event__onset is not None else other._Event__offset + before = self.__offset if self.__offset is not None else self.__onset + return before < after + + def __le__(self, other): + return self < other or self == other + + def __gt__(self, other): + return not self < other + + def __ge__(self, other): + return self > other or self == other + + def __getstate__(self): + """ + 1: name (str) + 2: onset (datetime) + 3: offset (datetime) + 4: other... (dict) + """ + other_attributes = self.__dict__.copy() + del other_attributes['_Event__name'], other_attributes['_Event__onset'], other_attributes['_Event__offset'] + return (self.__SERIALVERSION, self.__name, self.__onset, self.__offset) if len(other_attributes) == 0 \ + else (self.__SERIALVERSION, self.__name, self.__onset, self.__offset, other_attributes) + + def __setstate__(self, state): + if state[0] == 1: + self.__name, self.__onset, self.__offset = state[1], state[2], state[3] + if len(state) == 5: + self.__dict__.update(state[4]) + else: + raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' + f'Supported versions: 1.') \ No newline at end of file diff --git a/src/ltbio/biosignals/_Event.pyi b/src/ltbio/biosignals/_Event.pyi new file mode 100644 index 00000000..412850c7 --- /dev/null +++ b/src/ltbio/biosignals/_Event.pyi @@ -0,0 +1,55 @@ +from datetime import datetime, timedelta + +from datetimerange import DateTimeRange +from multipledispatch import dispatch + + +class Event(): + @dispatch(str, datetime) + def __init__(self, name: str, onset: datetime | str) -> Event: ... + @dispatch(str, datetime) + def __init__(self, name: str, offset: datetime | str) -> Event: ... + @dispatch(str, datetime) + def __init__(self, name: str, onset: datetime | str, offset: datetime | str) -> Event: ... + + # BOOLEAN CHECKERS + @property + def has_onset(self) -> bool: ... + @property + def has_offset(self) -> bool: ... + + # GETTERS + @property + def duration(self) -> timedelta: ... + @property + def domain(self) -> DateTimeRange: ... + @property + def name(self) -> str: ... + @property + def onset(self) -> datetime: ... + @property + def offset(self) -> datetime: ... + + # SETTERS + @onset.setter + def onset(self, datetime: datetime): ... + @offset.setter + def offset(self, datetime: datetime): ... + + # BUILT-INS (Basic) + def __repr__(self) -> str: ... + def __str__(self) -> str: ... + + # BUILT-INS (Logic with Time) + def __eq__(self, other: Event) -> bool: ... + def __ne__(self, other: Event) -> bool: ... + def __lt__(self, other: Event) -> bool: ... + def __le__(self, other: Event) -> bool: ... + def __gt__(self, other: Event) -> bool: ... + def __ge__(self, other: Event) -> bool: ... + + # SERIALIZATION + __SERIALVERSION: int = 1 + def __hash__(self) -> int: ... + def __getstate__(self) -> tuple: ... + def __setstate__(self, state: tuple) -> None: ... diff --git a/src/ltbio/biosignals/_Segment.py b/src/ltbio/biosignals/_Segment.py new file mode 100644 index 00000000..936c73b6 --- /dev/null +++ b/src/ltbio/biosignals/_Segment.py @@ -0,0 +1,255 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: Timeseries +# Description: Class Timeseries, which mathematically conceptualizes timeseries and their behaviour. +# Class OverlappingTimeseries, a special kind of Timeseries for signal processing purposes. + +# Contributors: João Saraiva, Mariana Abreu +# Created: 20/04/2022 +# Last Updated: 22/07/2022 + +# =================================== + +from datetime import datetime, timedelta +from os.path import join +from tempfile import mkstemp +from typing import Callable, Sequence, Any + +import numpy as np +from multimethod import multimethod +from numpy import ndarray, memmap + +from ltbio._core.exceptions import DifferentLengthsError + + +class Segment(): + """ + A Segment is an interrupted sequence of samples, i.e. a 1-dimensional array of real values. + It also has a start timepoint, so we can locate it in time. + """ + + # =================================== + # Initializers + + def __init__(self, samples: ndarray | Sequence[float]): + """ + A Segment is an uninterrupted sequence of samples. + + Parameters + ------------ + samples: ndarray + The samples to store. + + start: datetime + The date and time of the first sample. + """ + + # Save samples + self.__samples = np.array(samples, dtype=float) + + # =================================== + # Properties (Getters) + + @property + def samples(self) -> ndarray: + return self.__samples.view() + + # =================================== + # Built-ins (Basics) + + def __len__(self): + return len(self.__samples) + + def __copy__(self): + return Segment(self.__samples.copy()) + + # =================================== + # Built-ins (Joining Segments) + + def append(self, samples: ndarray | Sequence[float]): + """ + Appends more samples to the Segment. + + Parameters + ------------ + samples: ndarray + The samples to append. + """ + self.__samples = np.append(self.__samples, samples) + + @classmethod + def concatenate(cls, *other: 'Segment') -> 'Segment': + """ + Concatenates the Segments in the given order. + """ + # Get the samples + all_samples = np.concatenate([segment.samples for segment in other]) + return Segment(all_samples) + + # =================================== + # Built-ins (Arithmetic) + + @classmethod + def _check_length_compatibility(cls, first: 'Segment', second: 'Segment'): + if len(first) != len(second): + raise DifferentLengthsError(len(first), len(second)) + + @classmethod + def _binary_operation(cls, operation: Callable, first: 'Segment', second: 'Segment') -> 'Segment': + Segment._check_length_compatibility(first, second) + return Segment(operation(first, second)) + + @classmethod + def _unary_operation(cls, segment: 'Segment', operation: Callable) -> 'Segment': + return Segment(operation(segment)) + + @multimethod + def __add__(self, other: 'Segment'): + """Adds two Segments, sample by sample.""" + return self._binary_operation((lambda x, y: x + y), self, other) + + @multimethod + def __add__(self, other: float): + """Translates the Segment by a constant.""" + return self._unary_operation(self, (lambda x: x + other)) + + @multimethod + def __sub__(self, other): + """Subtracts two Segments, sample by sample.""" + return self._binary_operation((lambda x, y: x - y), self, other) + + @multimethod + def __sub__(self, other: float): + """Translates the Segment by a constant.""" + return self._unary_operation(self, (lambda x: x - other)) + + @multimethod + def __mul__(self, other: 'Segment'): + """Multiplies two Segments, sample by sample.""" + return self._binary_operation((lambda x, y: x * y), self, other) + + @multimethod + def __mul__(self, other: float): + """Multiplies the Segment by a constant (contraction).""" + return self._unary_operation(self, (lambda x: x * other)) + + @multimethod + def __truediv__(self, other: 'Segment'): + """Divides two Segments, sample by sample.""" + return self._binary_operation((lambda x, y: x / y), self, other) + + @multimethod + def __truediv__(self, other: float): + """Divides the Segment by a constant (expansion).""" + return self._unary_operation(self, (lambda x: x / other)) + + @multimethod + def __floordiv__(self, other: 'Segment'): + """Divides two Segments, sample by sample.""" + return self._binary_operation((lambda x, y: x // y), self, other) + + @multimethod + def __floordiv__(self, other: float): + """Divides the Segment by a constant (expansion).""" + return self._unary_operation(self, (lambda x: x // other)) + + # =================================== + # Built-ins (Indexing) + def __getitem__(self, index: int | slice | tuple): + """ + The built-in slicing and indexing (segment[x:y]) operations. + """ + return self.__samples[index] + + def __iter__(self) -> iter: + return iter(self.__samples) + + # =================================== + # Amplitude methods + + def max(self): + return np.max(self.__samples) + + def min(self): + return np.min(self.__samples) + + # =================================== + # Binary Logic + + def __eq__(self, other): + return self.__samples == other.samples + + def __ne__(self, other): + return self.__samples != other.samples + + # =================================== + # PROCESSING + + def apply(self, operation: Callable, inplace: bool = True, **kwargs): + """ + Applies a procedure to its samples. + """ + processed_samples = operation(self.samples, **kwargs) + if inplace: + self.__samples = processed_samples + return + else: + return Segment(processed_samples) + + def apply_and_return(self, operation: Callable, **kwargs) -> Any: + """ + Applies a procedure to its samples and returns the output. + """ + return operation(self.samples, **kwargs) + + # =================================== + # SERIALIZATION + + def _memory_map(self, path): + if not isinstance(self.__samples, memmap): # Create a memory map for the array + _, file_name = mkstemp(dir=path, suffix='.segment') + filepath = join(path, file_name) + self.__memory_map = memmap(filepath, dtype='float32', mode='r+', shape=self.__samples.shape) + self.__memory_map[:] = self.__samples[:] + self.__memory_map.flush() # release memory in RAM; don't know if this is actually helping + + def __hash__(self): + return hash(self.__initial_datetime) * hash(self.__final_datetime) * hash(self.__samples) + + __SERIALVERSION: int = 2 + + def __getstate__(self): + """ + 1: __initial_datetime (datetime) + 2: __samples (ndarray) + """ + if isinstance(self.__samples, memmap): # Case: has been saved as .biosignal before + return (Segment._Segment__SERIALVERSION, self.__initial_datetime, self.__samples) + elif hasattr(self, '_Segment__memory_map'): # Case: being saved as .biosignal for the first time + return (Segment._Segment__SERIALVERSION, self.__initial_datetime, self.__memory_map) + else: # Case: being called by deepcopy + return (Segment._Segment__SERIALVERSION, self.__initial_datetime, self.__samples) + + def __setstate__(self, state): + """ + Version 1 and 2: + 1: __initial_datetime (datetime) + 2: __samples (ndarray) + 3: __sampling_frequency (Frequency) + """ + if state[0] == 1 or state[0] == 2: + self.__initial_datetime, self.__samples, self.__sampling_frequency = state[1], state[2], state[3] + self.__final_datetime = self.initial_datetime + timedelta(seconds=len(self.__samples) / self.__sampling_frequency) + self.__is_filtered = False + self.__raw_samples = self.__samples + else: + raise IOError( + f'Version of Segment object not supported. Serialized version: {state[0]}; ' + f'Supported versions: 1, 2.') + + diff --git a/src/ltbio/biosignals/_Segment.pyi b/src/ltbio/biosignals/_Segment.pyi new file mode 100644 index 00000000..7a5dcac5 --- /dev/null +++ b/src/ltbio/biosignals/_Segment.pyi @@ -0,0 +1,104 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: Timeseries +# Description: Class Timeseries, which mathematically conceptualizes timeseries and their behaviour. +# Class OverlappingTimeseries, a special kind of Timeseries for signal processing purposes. + +# Contributors: João Saraiva, Mariana Abreu +# Created: 20/04/2022 +# Last Updated: 22/07/2022 + +# =================================== + +from datetime import datetime +from typing import Sequence + +from multimethod import multimethod +from numpy import ndarray + + +class Segment(): + # INITIALIZERS + def __init__(self, samples: ndarray | Sequence[float]): ... + + # GETTERS + @property + def samples(self) -> ndarray: ... + + # BUILT-INS (Basics) + def __copy__(self) -> Segment: ... + def __hash__(self) -> int: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + def __len__(self) -> int: ... + + # BUILT-INS (Joining Segments) + def append(self, samples: ndarray | Sequence[float]) -> None: ... + @classmethod + def concatenate(cls, *other: 'Segment') -> 'Segment': ... + + # BUILT-INS (Arithmetic) + @multimethod + def __add__(self, other: 'Segment') -> 'Segment': ... + + @multimethod + def __add__(self, other: float) -> 'Segment': ... + + @multimethod + def __sub__(self, other) -> 'Segment': ... + + @multimethod + def __sub__(self, other: float) -> 'Segment': ... + + @multimethod + def __mul__(self, other: 'Segment') -> 'Segment': ... + + @multimethod + def __mul__(self, other: float) -> 'Segment': ... + + @multimethod + def __truediv__(self, other: 'Segment') -> 'Segment': ... + + @multimethod + def __truediv__(self, other: float) -> 'Segment': ... + + @multimethod + def __floordiv__(self, other: 'Segment') -> 'Segment': ... + + @multimethod + def __floordiv__(self, other: float) -> 'Segment': ... + + + # BUILT-INS (Indexing) + def __getitem__(self, index: int) -> float | Segment: ... + def __iter__(self) -> iter: ... + + # BUILT-INS (Binary Logic) + def __eq__(self, other: Segment) -> bool: ... + def __ne__(self, other: Segment) -> bool: ... + + # OTHER LOGIC WITH TIME + def overlaps(self, other) -> bool: ... + def adjacent(self, other) -> bool: ... + + # SHORTCUT STATISTICS + def max(self) -> float: ... + def argmax(self) -> tuple[datetime]: ... + def min(self) -> float: ... + def argmin(self) -> tuple[datetime]: ... + def mean(self) -> float: ... + def median(self) -> float: ... + def std(self) -> float: ... + def var(self) -> float: ... + def abs(self) -> Segment: ... + def diff(self) -> Segment: ... + + # SERIALIZATION + __SERIALVERSION: int = 2 + def __getstate__(self) -> tuple: ... + def __setstate__(self, state: tuple) -> None: ... diff --git a/src/ltbio/biosignals/_Timeline.py b/src/ltbio/biosignals/_Timeline.py new file mode 100644 index 00000000..24e75206 --- /dev/null +++ b/src/ltbio/biosignals/_Timeline.py @@ -0,0 +1,459 @@ +# -- encoding: utf-8 -- +from copy import deepcopy +# =================================== + +# IT - LongTermBiosignals + +# Package: src/ltbio/biosignals/timeseries +# Module: Timeline +# Description: + +# Contributors: João Saraiva +# Created: 08/02/2023 + +# =================================== +from datetime import datetime, timedelta +from functools import reduce +from typing import Sequence, List + +import matplotlib.pyplot as plt +from datetimerange import DateTimeRange +from matplotlib import cm +from matplotlib.dates import date2num +from matplotlib.lines import Line2D +from matplotlib.patches import Rectangle + + +class Timeline(): + + class Group(): + + def __init__(self, intervals: Sequence[DateTimeRange] = [], points: Sequence[datetime] = [], name: str = None, color_hex: str = None): + self.intervals = list(intervals) + self.points = list(points) + self.name = name + self.color_hex = color_hex + + def __repr__(self): + res = '' + if 0 < len(self.intervals): + if len(self.intervals) < 10: + res += ' U '.join(['[' + str(interval) + '[' for interval in self.intervals]) + else: + res += f'{len(self.intervals)} intervals with {self.duration} of total duration' + if 0 < len(self.points): + if len(self.points) < 10: + res += '\nand the following timepoints:\n' + res += ', '.join(['[' + str(point) + '[' for point in self.points]) + else: + res += f'\nand {len(self.points)} timepoints.\n' + return res + + @property + def initial_datetime(self) -> datetime | None: + all_datetimes = [interval.start_datetime for interval in self.intervals] + self.points + if len(all_datetimes) > 0: + return min([interval.start_datetime for interval in self.intervals] + self.points) + else: + return None + + @property + def final_datetime(self) -> datetime | None: + all_datetimes = [interval.end_datetime for interval in self.intervals] + self.points + if len(all_datetimes) > 0: + return max([interval.end_datetime for interval in self.intervals] + self.points) + else: + return None + + @property + def duration(self) -> timedelta: + return sum([interval.timedelta for interval in self.intervals], timedelta()) + + @property + def has_only_intervals(self) -> bool: + return len(self.intervals) > 0 and len(self.points) == 0 + + @property + def has_intervals(self) -> bool: + return len(self.intervals) > 0 + + @property + def has_only_points(self) -> bool: + return len(self.intervals) == 0 and len(self.points) > 0 + + @property + def has_points(self) -> bool: + return len(self.points) > 0 + + @property + def is_empty(self): + return len(self.intervals) == 0 and len(self.points) == 0 + + def _as_index(self) -> tuple: + if self.has_only_intervals: + return tuple(self.intervals) + if self.has_only_points: + return tuple(self.points) + return None + + def __init__(self, *groups: Group, name: str = None): + self.groups = list(groups) + self.__name = name + + @property + def name(self): + return self.__name if self.__name else "No Name" + + @name.setter + def name(self, name: str): + self.__name = name + + def __repr__(self): + if len(self.groups) == 1: + return repr(self.groups[0]) + else: + res = '' + for g in self.groups: + res += f'\nGroup {g.name}\n' + res += repr(g) + return res + + def __and__(self, other): + if isinstance(other, Timeline): + groups = [] + groups += self.groups + groups += other.groups + group_names = [g.name for g in groups] + if len(set(group_names)) != len(group_names): + raise NameError('Cannot join Timelines with groups with the same names.') + return Timeline(*groups, name = self.name + " and " + other.name) + + def __getitem__(self, key): + if isinstance(key, str): + for g in self.groups: + if g.name == key: + return g + else: + raise TypeError('Invalid argument type.') + + @property + def group_names(self) -> set[str]: + return set(g.name for g in self.groups) + + @property + def initial_datetime(self) -> datetime: + """ + Finds the minimum initial datetime of all groups. + Careful: Some groups return None if they are empty. + """ + return min([g.initial_datetime for g in self.groups if g.initial_datetime is not None]) + + @property + def final_datetime(self) -> datetime: + """ + Finds the maximum final datetime of all groups. + Careful: Some groups return None if they are empty. + """ + return max([g.final_datetime for g in self.groups if g.final_datetime is not None]) + + @property + def has_single_group(self) -> bool: + return len(self.groups) == 1 + + @property + def single_group(self) -> Group: + return self.groups[0] if self.has_single_group else None + + @property + def duration(self) -> timedelta: + if len(self.groups) == 1: + return self.groups[0].duration + else: + return NotImplementedError() + + @property + def is_empty(self) -> bool: + return all([g.is_empty for g in self.groups]) + + @property + def is_index(self) -> bool: + """ + Returns whether or not this can serve as an index to a Biosignal. + A Timeline can be an index when: + - It only contains one interval or a union of intervals (serves as a subdomain) + - It only contains one point or a set of points (serves as set of objects) + """ + return len(self.groups) == 1 and (self.groups[0].has_only_intervals ^ self.groups[0].has_only_points) + + def _as_index(self) -> tuple | None: + if self.is_index: + return self.groups[0]._as_index() + + def plot(self, show: bool = True, save_to: str = None): + fig = plt.figure(figsize=(len(self.groups)*10, len(self.groups)*2)) + ax = plt.gca() + legend_elements = [] + + cmap = cm.get_cmap('tab20b') + + if not self.is_empty: + for y, g in enumerate(self.groups): + color = g.color_hex + if color is None: + color = cmap(y/len(self.groups)) + + for interval in g.intervals: + start = date2num(interval.start_datetime) + end = date2num(interval.end_datetime) + rect = Rectangle((start, y + 0.4), end - start, 0.4, facecolor=color, alpha=0.5) + ax.add_patch(rect) + + for point in g.points: + ax.scatter(date2num(point), y + 0.95, color=color, alpha=0.5, marker = 'o', markersize=10) + + if len(self.groups) > 1: + legend_elements.append(Line2D([0], [0], marker='o', color=color, label=g.name, markerfacecolor='g', markersize=10)) + + ax.set_xlim(date2num(self.initial_datetime), date2num(self.final_datetime)) + ax.set_ylim(0, len(self.groups)) + ax.get_yaxis().set_visible(False) + for pos in ['right', 'top', 'left']: + plt.gca().spines[pos].set_visible(False) + ax.xaxis_date() + fig.autofmt_xdate() + + if len(self.groups) > 1: + ax.legend(handles=legend_elements, loc='center') + + if self.name: + fig.suptitle(self.name, fontsize=11) + fig.tight_layout() + if save_to is not None: + fig.savefig(save_to) + plt.show() if show else plt.close() + + def _repr_png_(self): + self.plot() + + @classmethod + def union(cls, *timelines): + # Check input + if not all(isinstance(tl, Timeline) for tl in timelines): + raise TypeError("Give objects Timeline to Timeline.union.") + if len(timelines) < 2: + raise ValueError("Give at least 2 Timelines to compute their union.") + + # Get sets of intervals of each Timeline + tl_intervals = [] + for i, tl in enumerate(timelines): + if tl.has_single_group and tl.single_group.has_only_intervals: + tl_intervals.append(tl.single_group.intervals) + else: + raise AssertionError(f"The {i+1}th Timeline does not have a single group with only intervals.") + + # Binary function + def union_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): + intervals = intervals1 + intervals2 + intervals.sort(key=lambda x: x.start_datetime) + union = [intervals[0]] + for i in range(1, len(intervals)): + if union[-1].end_datetime >= intervals[i].start_datetime: + union[-1].set_end_datetime(max(union[-1].end_datetime, intervals[i].end_datetime)) + else: + union.append(intervals[i]) + return union + + res_intervals = reduce(union_of_two_timelines, tl_intervals) + return Timeline(Timeline.Group(res_intervals), name=f"Union of " + ', '.join(tl.name for tl in timelines)) + + @classmethod + def intersection(cls, *timelines): + # Check input + if not all(isinstance(tl, Timeline) for tl in timelines): + raise TypeError("Give objects Timeline to Timeline.intersection.") + if len(timelines) < 2: + raise ValueError("Give at least 2 Timelines to compute their intersection.") + + # Binary function + def intersection_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): + intervals1.sort(key=lambda x: x.start_datetime) + intervals2.sort(key=lambda x: x.start_datetime) + + intersection = [] + i, j = 0, 0 + while i < len(intervals1) and j < len(intervals2): + if intervals1[i].end_datetime <= intervals2[j].start_datetime: + i += 1 + elif intervals2[j].end_datetime <= intervals1[i].start_datetime: + j += 1 + else: + start = max(intervals1[i].start_datetime, intervals2[j].start_datetime) + end = min(intervals1[i].end_datetime, intervals2[j].end_datetime) + intersection.append(DateTimeRange(start, end)) + if intervals1[i].end_datetime <= intervals2[j].end_datetime: + i += 1 + else: + j += 1 + + return intersection + + # Get sets of intervals of each Timeline + # Case A: all Timelines have a single group with only intervals + if all(tl.has_single_group for tl in timelines): + if any(tl.single_group.has_points for tl in timelines): + raise NotImplementedError("Give Timelines with only intervals.") + elif any(tl.single_group.has_intervals for tl in timelines): + tl_intervals = [] + for i, tl in enumerate(timelines): + tl_intervals.append(tl.single_group.intervals) + + res_intervals = reduce(intersection_of_two_timelines, tl_intervals) + return Timeline(Timeline.Group(res_intervals, name=tl.single_group.name), + name=f"Intersection of " + ', '.join(tl.name for tl in timelines)) + else: + return Timeline(Timeline.Group(name=timelines[0].single_group.name), + name=f"Intersection of " + ', '.join(tl.name for tl in timelines)) # empty Timeline + + # Case B: all Timelines have the same number of groups with matching names, and each group has only intervals: + else: + group_names = timelines[0].group_names + if all(tl.group_names == group_names for tl in timelines): + if all([g.has_only_intervals for g in tl.groups] for tl in timelines): + intervals_by_group = {name: [] for name in group_names} + for i, tl in enumerate(timelines): + for g in tl.groups: + intervals_by_group[g.name].append(g.intervals) + + for name in group_names: + intervals_by_group[name] = reduce(intersection_of_two_timelines, intervals_by_group[name]) + + return Timeline(*[Timeline.Group(intervals_by_group[name], name=name) for name in group_names], + name=f"Intersection of " + ', '.join(tl.name for tl in timelines)) + else: + raise NotImplementedError("Give Timelines with only intervals.") + + def __sub__(self, other): + """ + Returns A\B (A except B), where A and B are Timelines. + """ + + if not isinstance(other, Timeline): + raise TypeError("The second value should be a Timeline.") + + def _binary_except(i1: DateTimeRange, i2: DateTimeRange) -> List[DateTimeRange]: + return i1.subtract(i2) # Returns a list of 0, 1 or 2 intervals + + def _set_except_set(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): + # Create a copy of intervals1 + new_intervals1 = deepcopy(intervals1) + # For each interval in intervals2 + for i2 in intervals2: + # Check if it intersects with any interval in new_intervals1 + for index, i1 in enumerate(new_intervals1): + if i1.is_intersection(i2): + # If yes, compute i1\i2 and substitute the result in new_intervals1, in the exact location where i1 was + new_intervals1.pop(index) + i1_except_i2 = _binary_except(i1, i2) + if len(i1_except_i2) == 1: # this is needed because there might be two intervals in i1_except_i2 + new_intervals1.insert(index, i1_except_i2[0]) + if len(i1_except_i2) == 2: + new_intervals1.insert(index, i1_except_i2[0]) + new_intervals1.insert(index+1, i1_except_i2[1]) + + #break # go to the next interval in intervals2, which its subtraction is now going to be computed based on the updated new_intervals1 + + return new_intervals1 # return the last updated version of new_intervals1 + + + # Case A: both Timelines have a single group with only intervals + if self.has_single_group and other.has_single_group: + if self.single_group.has_points or other.single_group.has_points: + raise NotImplementedError("Give Timelines with only intervals.") + elif not self.single_group.has_intervals: + return self # A\B = A, when A is empty + else: + if not other.single_group.has_intervals: + return self # A\B = A, when B is empty + else: + intervals1 = self.single_group.intervals + intervals2 = other.single_group.intervals + res = _set_except_set(intervals1, intervals2) + return Timeline(Timeline.Group(res, name=self.single_group.name), + name=f"{self.name} except {other.name}") + + # Case B: both Timelines have the same number of groups with matching names, and each group has only intervals: + else: + if self.group_names == other.group_names: + if self.single_group.has_points or other.single_group.has_points: + raise NotImplementedError("Give Timelines with only intervals.") + + intervals_by_group = {name: [] for name in self.group_names} + for g in self.groups: + intervals1 = g.intervals + intervals2 = other[g.name].intervals + res = _set_except_set(intervals1, intervals2) + intervals_by_group[g.name] = res + + return Timeline(*[Timeline.Group(intervals_by_group[g.name], name=g.name) for g in self.groups], + name=f"{self.name} except {other.name}") + else: + raise NotImplementedError("Give Timelines with matching group names.") + + def agglomerate(self, min_interval: timedelta, max_delta: timedelta): + """ + Agglomerates the intervals of a Timeline, based on a minimum interval and a maximum delta. + It can be thought as a filter or a smoother of time intervals. + It's useful when the Timeline has a lot of small intervals, for instance, distancing milliseconds from each + other, that would be rather relevant for analysis if agglomerated. + :param min_interval: minimum interval duration every interval should have, inclusively + :param max_delta: maximum duration between two consecutive intervals to be agglomerated, inclusively + :return: a new Timeline with agglomerated intervals + """ + + def _agglomerate(intervals: List[DateTimeRange], min_interval: timedelta, max_delta: timedelta): + if len(intervals) == 0: + return intervals + + # Agglomerate every consecutive pair of intervals + new_intervals = [intervals[0], ] + for i in intervals[1:]: + if i.start_datetime - new_intervals[-1].end_datetime <= max_delta: + new_intervals[-1] = DateTimeRange(new_intervals[-1].start_datetime, i.end_datetime) # substitute + else: + new_intervals.append(i) # add + + # Remove any interval is smaller than min_interval + new_intervals = [i for i in new_intervals if i.timedelta >= min_interval] + return new_intervals + + # Repeat the agglomeration for every group + new_groups = [] + for g in self.groups: + new_groups.append(Timeline.Group(_agglomerate(g.intervals, min_interval, max_delta), name=g.name)) + + # Return a new Timeline with the agglomerated groups + return Timeline(*new_groups, name=f"{self.name} (agglomerated)") + + + EXTENSION = '.timeline' + + def save(self, save_to: str): + # Check extension + if not save_to.endswith(Timeline.EXTENSION): + save_to += Timeline.EXTENSION + # Write + from _pickle import dump + with open(save_to, 'wb') as f: + dump(self, f) + + @classmethod + def load(cls, filepath: str): + # Check extension + if not filepath.endswith(Timeline.EXTENSION): + raise IOError("Only .timeline files are allowed.") + + # Read + from _pickle import load + with open(filepath, 'rb') as f: + timeline = load(f) + return timeline diff --git a/src/ltbio/biosignals/_Timeline.pyi b/src/ltbio/biosignals/_Timeline.pyi new file mode 100644 index 00000000..945edf46 --- /dev/null +++ b/src/ltbio/biosignals/_Timeline.pyi @@ -0,0 +1,120 @@ +# -- encoding: utf-8 -- +# =================================== + +# IT - LongTermBiosignals + +# Package: src/ltbio/biosignals/timeseries +# Module: Timeline +# Description: + +# Contributors: João Saraiva +# Created: 08/02/2023 + + +# =================================== + +from datetime import datetime, timedelta +from typing import Iterable + +from datetimerange import DateTimeRange +from multipledispatch import dispatch + + +class Timeline(): + class Group(): + + # INITIALIZERS + @dispatch(DateTimeRange, str, str) # Set of Intervals in time + def __init__(self, *intervals: DateTimeRange, name: str = None) -> Timeline.Group: ... + @dispatch(datetime, str, str) # Set of Points in time + def __init__(self, *points: datetime, name: str = None) -> Timeline.Group:... + + # BUILT-INS (Basics) + def __repr__(self) -> str: ... + def __str__(self) -> str: ... + + # GETTERS + @property + def name(self) -> str: ... + @property + def start(self) -> datetime | None: ... + @property + def end(self) -> datetime | None: ... + @property + def duration(self) -> timedelta: ... + + # SETTERS + @name.setter + def name(self, name: str) -> None: ... + + # BOOLEAN CHECKERS + @property + def has_only_intervals(self) -> bool: ... + @property + def has_intervals(self) -> bool: ... + @property + def has_only_points(self) -> bool: ... + @property + def has_points(self) -> bool: ... + @property + def is_empty(self) -> bool: ... + + + # INITIALIZERS + def __init__(self, *groups: Group, name: str = None) -> Timeline: ... + + # GETTERS + @property + def name(self) -> str: ... + @property + def group_names(self) -> set[str]: ... + @property + def start(self) -> datetime | None: ... + @property + def end(self) -> datetime | None: ... + @property + def duration(self) -> timedelta: ... + @property + def single_group(self) -> Group: ... + + # SETTERS + @name.setter + def name(self, name: str) -> None: ... + + # BOOLEAN CHECKERS + @property + def has_single_group(self) -> bool: ... + @property + def is_empty(self) -> bool: ... + @property + def is_index(self) -> bool: ... + + # BUILT-INS (Basics) + def __repr__(self) -> str: ... + def __str__(self) -> str: ... + + # BUILT-INS (Joining Timelines) + def __and__(self, other: Timeline) -> Timeline: ... + def __rshift__(self, other: Timeline) -> Timeline: ... + def __sub__(self, other) -> Timeline: ... + @classmethod + def union(cls, *timelines) -> Timeline: ... + @classmethod + def intersection(cls, *timelines) -> Timeline: ... + + # BUILT-INS (Indexing) + def __getitem__(self, *group: str) -> Timeline: ... + def __iter__(self) -> Iterable[Timeline.Group]: ... + + # USEFUL TOOLS + def agglomerate(self, min_interval: timedelta, max_delta: timedelta) -> Timeline: ... + + # PLOTTING + def plot(self, show: bool = True, save_to: str = None): ... + def _repr_png_(self): ... + + # SERIALIZATION + EXTENSION = '.timeline' + def save(self, filepath: str) -> None: ... + @classmethod + def load(cls, filepath: str) -> Timeline: ... diff --git a/src/ltbio/biosignals/_Timeseries.py b/src/ltbio/biosignals/_Timeseries.py new file mode 100644 index 00000000..852c2fd4 --- /dev/null +++ b/src/ltbio/biosignals/_Timeseries.py @@ -0,0 +1,810 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: Timeseries +# Description: Class Timeseries, which mathematically conceptualizes timeseries and their behaviour. +# Class OverlappingTimeseries, a special kind of Timeseries for signal processing purposes. + +# Contributors: João Saraiva, Mariana Abreu +# Created: 20/04/2022 +# Last Updated: 22/07/2022 + +# =================================== + +from datetime import datetime, timedelta +from math import ceil +from os.path import join +from tempfile import mkstemp +from typing import List, Iterable, Collection, Dict, Tuple, Callable, Sequence + +import matplotlib.pyplot as plt +import numpy as np +from biosppy.signals.tools import power_spectrum +from datetimerange import DateTimeRange +from dateutil.parser import parse as to_datetime +from multimethod import multimethod +from numpy import array, append, ndarray, divide, concatenate, tile, memmap +from scipy.signal import resample + +from ._Event import Event +from ._Segment import Segment +from ._Timeline import Timeline +from .units import Unit, Frequency +from .._core.exceptions import DifferentSamplingFrequenciesError, DifferentUnitsError, TimeseriesOverlappingError, \ + DifferentDomainsError +from .._core.operations import Operator, Operation + + +#from ltbio.processing.filters.Filter import Filter + +class Timeseries(): + """ + A Timeseries is a sequence of data points that occur in successive order over some period of time. + In a Biosignal, one Timeseries' data points are the measurement of a biological variable, in some unit, taken from a + sensor or channel. This data points are often called samples, and are acquired at fixed sampling frequency. + + To each time point of a Timeseries' domain corresponds one and only one sample. However, a Timeseries might be + contiguous if a sample was acquired at every sampling time point, or discontiguous if there were interruptions. Each + interval/sequence of contiguous samples is called a Segment, but those are managed internally. + + Constructors / Initializers + ______________ + + Timeseries: default + Instantiates a Timeseries with a contiguous sequence of samples. + + Timeseries.withDiscontiguousSegments + Instantiates a Timeseries with discontiguous sequences of samples. + + + Properties: + ______________ + + name: str + The name of the Timeseries, if any. + + samples: array # FIXME + Contiguous or discontiguous sequence of samples. + + sampling_frequency: float + The frequency at which the samples were acquired, in Hz. + + units: Unit + The physical unit at which the samples should be interpreted. + + events: tuple[Event] + The events timely associated to the Timeseries. + + initial_datetime: datetime + The date and time of the first sample. + + final_datetime: datetime + The date and time of the last sample. + + duration: timedelta + The total time of acquired samples, excluding interruptions. + + domain: tuple[DateTimeRange] + The intervals of date and time in which the Timeseries is defined, i.e., samples were acquired. + + is_equally_segmented: bool + The logic value stating if each interval in the domain has the same duration. + + segment_duration: timedelta: + Duration of all segments, if is_equally_segmented is True. + + + Built-ins: + ______________ + + len: Returns the total number of samples. + + copy: Copies all Timeseries' content. + + iter: Returns an iterator over the samples of all Timeseries' Segments. + + in: Returns True if a date, time or event is contained in the Timeseries. + + [] : Indexes by date, time or events. + + + : Adds Timeseries. + + += : Appends more samples to the last Timeseries' Segment. + + Methods: + ______________ + + append(datetime, array): + Appends a new sequence of samples in a separate Segment. + + associate(Event): + Timely associates a given Event to the Timeseries. + + dissociate(str): + Removes any association the Timeseries has with an Event with the given name. + + filter(Filter): + Filters the Timeseries with the given design. + + undo_filters(): + Reverts the effect of all filters. + + plot(): + Plots the Timeseries amplitude over time, with all its interruptions, if any. + + plot(): + Plots the Timeseries frequency spectrum. + + ______________ + + Full documentation in: + https://github.com/jomy-kk/IT-LongTermBiosignals/wiki/%5BClass%5D-Timeseries + """ + + # INITIALIZERS + @multimethod + def __init__(self, segments_by_time: dict[datetime, ndarray | Sequence[float] | Segment], sampling_frequency: float, + units: Unit = None, name: str = None): + """ + Give one or multiple instantiated Segments. + It also receives the sampling frequency of the samples. + Additionally, it can receive the samples' units and a name, if needed. + + Parameters + ------------ + samples: ndarray | list | tuple + The samples to store, without interruptions. + + initial_datetime: datetime + The date and time of the first sample. + + sampling_frequency: float | Frequency + The frequency at which the samples where sampled. + + units: Unit + The physical units of the variable measured. + + name: str + A symbolic name for the Timeseries. It is mentioned in plots, reports, error messages, etc. + """ + # Metadata + self.__sampling_frequency = Frequency(sampling_frequency) + self.__units = units + self.__name = name + + # Segments + self.__segments = [Segment(samples, start) for start, samples in segments_by_time.items()] + self.__segments = sorted(self.__segments, key = lambda segment: segment.start) # Sort by start datetime + + # =================================== + # Properties (Getters) + @property + def segments(self) -> tuple[Segment]: + return tuple(self.__segments) + + @property + def __samples(self) -> ndarray: + return np.concatenate(seg.samples for seg in self.__segments) + + @property + def n_segments(self) -> int: + return len(self.__segments) + + @property + def sampling_frequency(self) -> float: + """The frequency at which the samples were acquired, in Hz.""" + return float(self.__sampling_frequency) + + @property + def start(self) -> datetime: + """The date and time of the first sample.""" + return self.__segments[0].start # Is the initial datetime of the first Segment + + @property + def end(self) -> datetime: + """The date and time of the last sample.""" + return self.__segments[-1].end # Is the final datetime of the last Segment + + def __segment_duration(self, segment: Segment) -> timedelta: + return timedelta(seconds = len(segment) / self.sampling_frequency) + + def __segment_end(self, segment: Segment) -> datetime: + return segment.start + self.__segment_duration(segment) + + @property + def duration(self) -> timedelta: + """The actual recorded time without interruptions.""" + return sum(self.__segment_duration(segment) for segment in self.__segments) + + @property + def domain(self) -> Timeline: + intervals = [DateTimeRange(segment.start, segment.end) for segment in self.__segments] + return Timeline(Timeline.Group(intervals=intervals), f"{self.name} Domain") + + @property + def unit(self) -> Unit: + """The physical unit at which the samples should be interpreted.""" + return self.__units + + @property + def name(self) -> str: + """The name of the Timeseries, if any.""" + return self.__name + + # =================================== + # SETTERS + @name.setter + def name(self, name: str) -> None: + """Set or reset a name for the Timeseries.""" + self.__name = name + + # =================================== + # BOOLEAN CHECKERS + @property + def is_contiguous(self) -> bool: + """States if there are no interruptions in time.""" + return len(self.__segments) == 1 + + # =================================== + # BUILT-INS (Basics) + def __copy__(self) -> 'Timeseries': + return Timeseries([seg.__copy__() for seg in self.__segments], self.sampling_frequency.__copy__(), + self.__units.__copy__(), self.__name.__copy__()) + + def __len__(self) -> int: + return sum([len(seg) for seg in self.__segments]) + + def __iter__(self) -> iter: + for segment in self.__segments: + yield from segment + + @multimethod + def __contains__(self, item: datetime | DateTimeRange) -> bool: + return any([item in segment for segment in self.__segments]) + + @multimethod + def __contains__(self, item: str) -> bool: + ... + + # BUILT-INS (Indexing) + @multimethod + def __getitem__(self, item: int) -> Segment: + ... + + @multimethod + def __getitem__(self, item: datetime) -> float: + return self.__get_samples(item).samples[0] + + @multimethod + def __getitem__(self, item: str): + return self[to_datetime(item)] + + @multimethod + def __getitem__(self, item: slice): + # Discard step + if item.step is not None: + raise IndexError("Indexing with step is not allowed for Timeseries. Try downsample it first.") + # Get start and end + start = item.start if item.start is not None else self.start + end = item.stop if item.stop is not None else self.end + # Convert to datetime, if needed + start = to_datetime(start) if isinstance(start, str) else start + end = to_datetime(end) if isinstance(end, str) else end + # Get the samples + return Timeseries(segments=self.__get_samples(start, end), sampling_frequency=self.sampling_frequency, + units=self.unit, name=self.name) + @multimethod + def __getitem__(self, item: DateTimeRange): + return self[item.start_datetime:item.end_datetime] + + @multimethod + def __getitem__(self, item: tuple): + # Get each result individually + sub_timeseries = [self[ix] for ix in item] + return Timeseries.concatenate(sub_timeseries) + + @multimethod + def __getitem__(self, item: Timeline): + if not item.is_index: + raise IndexError("Indexing with a non-index Timeline is not allowed for Timeseries.") + if len(item) != 1: + raise IndexError("Indexing with a Timeline with more than one Group is not allowed for Timeseries.") + else: + return self[item._as_index()] + + # BUILT-INS (Joining Timeseries) + @classmethod + def _check_meta_compatibility(cls, *timeseries: 'Timeseries', raise_errors: bool = True) -> bool: + reference = timeseries[0] # use the first Timeseries as the comparison reference + + # Find which Timeseries do not have the same sampling frequency + incompatible = [ts for ts in timeseries if ts.sampling_frequency != reference.sampling_frequency] + if len(incompatible) != 0: + incompatible = [reference, ] + incompatible + if raise_errors: + raise DifferentSamplingFrequenciesError(*incompatible) + else: + return False + # Find which Timeseries do not have the same units + incompatible = [ts for ts in timeseries if ts.unit != reference.unit] + if len(incompatible) != 0: + incompatible = [reference, ] + incompatible + if raise_errors: + raise DifferentUnitsError(*incompatible) + else: + return False + + return True # If no issue was found, then the Timeseries are compatible + + @multimethod + def concatenate(self, other: 'Timeseries') -> 'Timeseries': + # Check compatibility + Timeseries._check_meta_compatibility(self, other) + + # Check overlap + overlap = Timeseries.overlap(self, other) + if len(overlap) != 0: + raise TimeseriesOverlappingError(self, other, *overlap) + + # Concatenate + all_segments = self.__segments + list(other.segments) + all_segments = sorted(all_segments, key=lambda seg: seg.start) + name = self.name + " concatenated with " + other.name + return Timeseries(all_segments, self.__sampling_frequency, self.__units, name) + + def __add_segments(self, *segments: Segment): + # Check if self.__segments exists + if not hasattr(self, "_Timeseries__segments"): + self.__segments = [] + + # Check if self.__segments is empty => Yes: Merge Sort the segments and assign + if len(self.__segments) == 0: + self.__segments = sorted(segments, key=lambda seg: seg.start) + + # => No: Do an Insertion Sort to self.__segments + for segment in segments: + # Find the position to insert the segment + for ix, self_segment in enumerate(self.__segments): + if segment.start < self_segment.start: + self.__segments.insert(ix, segment) + break + else: + self.__segments.append(segment) # If no position was found, append at the end + + @multimethod + def append(self, other: Segment): + self.__add_segments(other) + + @property + def __sampling_period(self) -> timedelta: + return timedelta(seconds = 1 / self.sampling_frequency) + + @multimethod + def append(self, other: ndarray | Sequence[float | int]): + if not self.is_contiguous: + raise ValueError("Cannot append samples directly to a Timeseries with interruptions.") + self.__segments[0].append(other) + + @classmethod + def _check_domain_compatibility(cls, *timeseries: 'Timeseries', raise_errors: bool = True) -> bool: + reference = timeseries[0].domain # use the first Timeseries as the comparison reference + + # Find which Timeseries do not have the domain + incompatible = [] + for ts in timeseries: + domain = ts.domain + if domain != reference: + incompatible.append(domain) + + if len(incompatible) != 0: # If there are incompatible domains + incompatible = [reference, ] + incompatible + if raise_errors: + raise DifferentDomainsError(*incompatible) + else: + return False + else: + return True # If no incompatibilities + + @classmethod + def overlap(cls, first: 'Timeseries', second: 'Timeseries') -> Timeline: + return Timeline.intersection(first.domain, second.domain) + + # BUILT-INS (Arithmetic) + @classmethod + def _binary_operation(cls, operation: Callable, operator_string: str, + first: 'Timeseries', second: 'Timeseries') -> 'Timeseries': + # Check compatibility + Timeseries._check_meta_compatibility(first, second) + Timeseries._check_domain_compatibility(first, second) + # Apply operation + new_segments = [operation(x, y) for x, y in zip(first.segments, second.segments)] + return Timeseries(segments=new_segments, sampling_frequency=first.sampling_frequency, units=first.unit, + name=first.name + ' ' + operator_string + ' ' + second.name) + + @classmethod + def _unary_operation(cls, timeseries: 'Timeseries', operation: Callable, operator_string: str) -> 'Timeseries': + # Apply operation + new_segments = [operation(x) for x in timeseries.segments] + return Timeseries(segments=new_segments, sampling_frequency=first.sampling_frequency, units=first.unit, + name=timeseries.name + ' ' + operator_string) + + @multimethod + def __add__(self, other: 'Timeseries') -> 'Timeseries': + return Timeseries._binary_operation(lambda x, y: x + y, '+', self, other) + + @multimethod + def __add__(self, other: float) -> 'Timeseries': + return Timeseries._unary_operation(self, lambda x: x + other, f'+ {other}') + + @multimethod + def __sub__(self, other: 'Timeseries') -> 'Timeseries': + return Timeseries._binary_operation(lambda x, y: x - y, '-', self, other) + + @multimethod + def __sub__(self, other: float) -> 'Timeseries': + return Timeseries._unary_operation(self, lambda x: x - other, f'- {other}') + + @multimethod + def __mul__(self, other: 'Timeseries') -> 'Timeseries': + return Timeseries._binary_operation(lambda x, y: x * y, '*', self, other) + + @multimethod + def __mul__(self, other: float) -> 'Timeseries': + return Timeseries._unary_operation(self, lambda x: x * other, f'* {other}') + + @multimethod + def __truediv__(self, other: 'Timeseries') -> 'Timeseries': + return Timeseries._binary_operation(lambda x, y: x / y, '/', self, other) + + @multimethod + def __truediv__(self, other: float) -> 'Timeseries': + return Timeseries._unary_operation(self, lambda x: x / other, f'/ {other}') + + @multimethod + def __floordiv__(self, other: 'Timeseries') -> 'Timeseries': + return Timeseries._binary_operation(lambda x, y: x // y, '//', self, other) + + @multimethod + def __floordiv__(self, other: float) -> 'Timeseries': + return Timeseries._unary_operation(self, lambda x: x // other, f'// {other}') + + # SHORTCUT STATISTICS + def max(self) -> float: + """Returns the maximum amplitude value of the Timeseries.""" + return max([seg.max() for seg in self.__segments]) + + def argmax(self) -> tuple[datetime]: + """ + Returns the datetime(s) where the maximum amplitude value of is verified. + If the max value verifies in multiple timepoints, even in different segments, all of them are returned. + """ + max_value = self.max() + return tuple([seg.argmax() for seg in self.__segments if seg.max() == max_value]) + + def min(self) -> float: + """Returns the minimum amplitude value of the Timeseries.""" + return max([seg.max() for seg in self.__segments]) + + def argmin(self) -> datetime: + """ + Returns the datetime(s) where the minimum amplitude value of is verified. + If the min value verifies in multiple timepoints, even in different segments, all of them are returned. + """ + max_value = self.max() + return tuple([seg.argmax() for seg in self.__segments if seg.max() == max_value]) + + def mean(self) -> float: + """ + Returns the mean amplitude value of the Timeseries. + """ + return float(np.mean(self.__samples)) + + def median(self) -> float: + """ + Returns the median amplitude value of the Timeseries. + """ + return float(np.median(self.__samples)) + + def std(self) -> float: + """ + Returns the standard deviation of the amplitude values of the Timeseries. + """ + return float(np.std(self.__samples)) + + def var(self) -> float: + """ + Returns the variance of the amplitude values of the Timeseries. + """ + return float(np.var(self.__samples)) + + def abs(self) -> 'Timeseries': + """ + Returns a new Timeseries with the absolute value of all samples. + """ + return Timeseries(segments=[seg.abs() for seg in self.__segments], sampling_frequency=self.__sampling_frequency, + units=self.__units, name=f'Absolute of {self.__name})') + + def diff(self) -> 'Timeseries': + """ + Returns a new Timeseries with the difference between consecutive samples, i.e. the discrete derivative. + """ + return Timeseries(segments=[seg.diff() for seg in self.__segments], sampling_frequency=self.__sampling_frequency, + units=self.__units, name=f'Derivative of {self.__name})') + + # =================================== + # INTERNAL USAGE - Convert indexes <-> timepoints && Get Samples + + def __get_sample(self, datetime: datetime) -> float: + self.__check_boundaries(datetime) + for segment in self.__segments: # finding the first Segment + if datetime in segment: + return segment[int((datetime - segment.start).total_seconds() * self.sampling_frequency)] + raise IndexError("Datetime given is in not defined in this Timeseries.") + + def __get_samples(self, initial_datetime: datetime, final_datetime: datetime) -> List[Segment]: + '''Returns the samples between the given initial and end datetimes.''' + self.__check_boundaries(initial_datetime) + self.__check_boundaries(final_datetime) + res_segments = [] + for i in range(len(self.__segments)): # finding the first Segment + segment = self.__segments[i] + if segment.start <= initial_datetime <= segment.end: + if final_datetime <= segment.end: + trimmed_segment = segment[int(( + initial_datetime - segment.start).total_seconds() * self.sampling_frequency):int( + (final_datetime - segment.start).total_seconds() * self.sampling_frequency)] + res_segments.append(trimmed_segment) + return res_segments + else: + if not initial_datetime == segment.end: # skip what would be an empty set + trimmed_segment = segment[int((initial_datetime - segment.start).total_seconds() * self.sampling_frequency):] + res_segments.append(trimmed_segment) + for j in range(i + 1, + len(self.__segments)): # adding the remaining samples, until the last Segment is found + segment = self.__segments[j] + if final_datetime <= segment.end: + trimmed_segment = segment[:int( + (final_datetime - segment.start).total_seconds() * self.sampling_frequency)] + res_segments.append(trimmed_segment) + return res_segments + else: + trimmed_segment = segment[:] + res_segments.append(trimmed_segment) + + def __check_boundaries(self, datetime_or_range: datetime | DateTimeRange) -> None: + intersects = False + if isinstance(datetime_or_range, datetime): + for subdomain in self.domain: + if datetime_or_range in subdomain: + intersects = True + break + if not intersects: + raise IndexError( + f"Datetime given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") + + elif isinstance(datetime_or_range, DateTimeRange): + for subdomain in self.domain: + if subdomain.is_intersection(datetime_or_range) and datetime_or_range.start_datetime != subdomain.end_datetime: + intersects = True + break + if not intersects: + raise IndexError( + f"Interval given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") + + def _indices_to_timepoints(self, indices: list[list[int]], by_segment=False) -> tuple[datetime] | tuple[list[datetime]]: + all_timepoints = [] + for index, segment in zip(indices, self.__segments): + timepoints = divide(index, self.__sampling_frequency) # Transform to timepoints + x = [segment.start + timedelta(seconds=tp) for tp in timepoints] + if by_segment: + all_timepoints.append(x) # Append as list + else: + all_timepoints += x # Join them all + return tuple(all_timepoints) + + def _to_array(self) -> ndarray: + """ + Converts Timeseries to NumPy ndarray, if it is equally segmented. + :return: MxN array, where M is the number of segments and N is their length. + :rtype: numpy.ndarray + """ + if not self.__is_equally_segmented: + raise AssertionError("Timeseries needs to be equally segmented to produce a matricial NumPy ndarray.") + return np.vstack([segment.samples for segment in self.__segments]) + + # =================================== + # PLOTS + + def plot_spectrum(self, show: bool = True, save_to: str = None) -> None: + ... + + def plot(self, show: bool = True, save_to: str = None) -> None: + ... + + def _plot_spectrum(self): + colors = ('blue', 'green', 'red') + n_columns = len(self.__segments) + for i in range(n_columns): + segment = self.__segments[i] + x, y = power_spectrum(signal=segment.samples) + plt.plot(x, y, alpha=0.6, linewidth=0.5, + label='From {0} to {1}'.format(segment.start, segment.end)) + + def _plot(self, label:str = None): + xticks, xticks_labels = [], [] # to store the initial and final ticks of each Segment + SPACE = int(self.__sampling_frequency) * 2 # the empty space between each Segment + + for i in range(len(self.__segments)): + segment = self.__segments[i] + x, y = range(len(segment)), segment.samples + if i > 0: # except for the first Segment + x = array(x) + (xticks[-1] + SPACE) # shift right in time + plt.gca().axvspan(x[0] - SPACE, x[0], alpha=0.05, color='black') # add empty space in between Segments + plt.gca().plot(x, y, linewidth=0.5, alpha=0.7, label=label) + + xticks += [x[0], x[-1]] # add positions of the first and last samples of this Segment + + # add datetimes of the first and last samples of this Segment + if segment.duration > timedelta(days=1): # if greater that a day, include dates + time_format = "%d-%m-%Y %H:%M:%S" + else: # otherwise, just the time + time_format = "%H:%M:%S" + xticks_labels += [segment.start.strftime(time_format), + segment.end.strftime(time_format)] + + plt.gca().set_xticks(xticks, xticks_labels) + plt.tick_params(axis='x', direction='in') + + if self.unit is not None: # override ylabel + plt.gca().set_ylabel("Amplitude ({})".format(str(self.unit))) + + # =================================== + # PROCESSING + + def apply(self, operator: Operator, inplace: bool = True, **kwargs): + ... + + @multimethod(Operation) + def undo(self, operation) -> None: + ... + + @multimethod(int) + def undo(self, operation) -> None: + ... + + def _apply_operation(self, operation, **kwargs): + """ + Applies operation in-place to every Segment's samples. + """ + for segment in self.__segments: + segment._apply_operation(operation, **kwargs) + + def _apply_operation_and_return(self, operation, iterate_along_segments_key: [str] = None, **kwargs) -> list: + """ + Applies operation out-of-place to every Segment's samples and returns the ordered output of each in a list. + + Procedure 'operation' must receive a ndarray of samples as first argument. + It can receive other arguments, which should be passed in '**kwargs'. + Procedure output can return whatever, which shall be returned. + """ + res = [] + + if isinstance(iterate_along_segments_key, str): + items = kwargs[iterate_along_segments_key] + for segment, item in zip(self, items): + kwargs[iterate_along_segments_key] = item + new_segment = segment._apply_operation_and_return(operation, **kwargs) + res.append(new_segment) + elif isinstance(iterate_along_segments_key, list) and all(isinstance(x, str) for x in iterate_along_segments_key): + items = [kwargs[it] for it in iterate_along_segments_key] + for segment, item in zip(self, *items): + for it in iterate_along_segments_key: + kwargs[it] = item + new_segment = segment._apply_operation_and_return(operation, *items, **kwargs) + res.append(new_segment) + + else: + for segment in self.__segments: + res.append(segment._apply_operation_and_return(operation, **kwargs)) + return res + + # Processing Shortcuts + def resample(self, frequency: float) -> None: + frequency = frequency if isinstance(frequency, Frequency) else Frequency(frequency) + for segment in self.__segments: + segment.resample(frequency) + self.__sampling_frequency = frequency # The sf of all Segments points to this property in Timeseries. So, this is only changed here. + + def undo_segmentation(self, time_intervals: tuple[DateTimeRange]) -> None: + ... + + def contiguous(self): + """ + Returns a contiguous Timeseries, by dropping all interruptions, i.e., concatenating all Segments into one, if any. + """ + if len(self.__segments) > 1: + single_segment = Segment.concatenate(self.__segments) + return Timeseries(single_segment, self.__sampling_frequency, self.unit, "Contiguous " + self.name) + + def reshape(self, time_intervals:tuple[DateTimeRange]): + assert len(self.__segments) == 1 + samples = self.__segments[0] + partitions = [] + i = 0 + for x in time_intervals: + n_samples_required = ceil(x.timedelta.total_seconds() * self.__sampling_frequency) + if n_samples_required > len(samples): + samples = tile(samples, ceil(n_samples_required/len(samples))) # repeat + samples = samples[:n_samples_required] # cut where it is enough + partitions.append(Timeseries.__Segment(samples, x.start_datetime, self.__sampling_frequency)) + i = 0 + else: + f = i + n_samples_required + partitions.append(Timeseries.__Segment(samples[i: f], x.start_datetime, self.__sampling_frequency)) + i += f + + self.__segments = partitions + + # =================================== + # SERIALIZATION + + __SERIALVERSION: int = 2 + + def _memory_map(self, path): + # Create a memory map for the array + for seg in self: + seg._memory_map(path) + + def __getstate__(self): + """ + Version 1: + 1: __name (str) + 2: __sampling_frequency (Frequency) + 3: _Units (Unit) + 4: __is_equally_segmented (bool) + 5: segments_state (list) + + Version 2: + 1: __name (str) + 2: __sampling_frequency (Frequency) + 3: _Units (Unit) + 4: __is_equally_segmented (bool) + 5: __tags (set) + 6: segments_state (list) + """ + segments_state = [segment.__getstate__() for segment in self.__segments] + return (self.__SERIALVERSION, self.__name, self.__sampling_frequency, self._Units, self.__is_equally_segmented, self.__tags, + segments_state) + + def __setstate__(self, state): + if state[0] == 1: + self.__name, self.__sampling_frequency, self._Units = state[1], state[2], state[3] + self.__is_equally_segmented = state[4] + self.__segments = [] + for segment_state in state[5]: + segment_state = list(segment_state) + segment_state.append(self.__sampling_frequency) + segment = object.__new__(Timeseries.__Segment) + segment.__setstate__(segment_state) + self.__segments.append(segment) + self.__associated_events = {} # empty; to be populated by Biosignal + self.__tags = set() # In version 1, tags were not a possibility, so none existed. + elif state[0] == 2: + self.__name, self.__sampling_frequency, self._Units = state[1], state[2], state[3] + self.__is_equally_segmented = state[4] + self.__segments = [] + for segment_state in state[6]: + segment_state = list(segment_state) + segment_state.append(self.__sampling_frequency) + segment = object.__new__(Timeseries.__Segment) + segment.__setstate__(segment_state) + self.__segments.append(segment) + self.__associated_events = {} # empty; to be populated by Biosignal + self.__tags = state[5] + else: + raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' + f'Supported versions: 1 and 2.') diff --git a/src/ltbio/biosignals/_Timeseries.pyi b/src/ltbio/biosignals/_Timeseries.pyi new file mode 100644 index 00000000..983c9dcf --- /dev/null +++ b/src/ltbio/biosignals/_Timeseries.pyi @@ -0,0 +1,169 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: Timeseries +# Description: Class Timeseries, which mathematically conceptualizes timeseries and their behaviour. +# Class OverlappingTimeseries, a special kind of Timeseries for signal processing purposes. + +# Contributors: João Saraiva, Mariana Abreu +# Created: 20/04/2022 +# Last Updated: 22/07/2022 + +# =================================== + +from datetime import datetime, timedelta +from typing import Tuple, Sequence + +from datetimerange import DateTimeRange +from multimethod import multimethod +from multipledispatch import dispatch +from numpy import ndarray +from pandas import DataFrame + +from ltbio._core.operations import Operation, Operator +from ltbio.biosignals import Timeline +from ltbio.biosignals._Segment import Segment +from ltbio.biosignals.units import Unit + + +class Timeseries(): + + # INITIALIZERS + @multimethod + def __init__(self, segments_by_time: dict[datetime, ndarray[float] | Sequence[float] | Segment], sampling_frequency: float, + units: Unit = None, name: str = None): ... + + # GETTERS + @property + def segments(self) -> tuple[Segment]: ... + + @property + def n_segments(self) -> int: ... + @property + def sampling_frequency(self) -> float: ... + @property + def start(self) -> datetime: ... + @property + def end(self) -> datetime: ... + @property + def duration(self) -> timedelta: ... + @property + def domain(self) -> Timeline: ... + @property + def unit(self) -> Unit: ... + @property + def name(self) -> str: ... + + # SETTERS + @name.setter + def name(self, name: str) -> None: ... + + # BOOLEAN CHECKERS + @property + def is_contiguous(self) -> bool: ... + + # BUILT-INS (Basics) + def __copy__(self) -> Timeseries: ... + def __len__(self) -> int: ... + def __iter__(self) -> iter: ... + @multimethod + def __contains__(self, item: datetime | DateTimeRange) -> bool: ... + @multimethod + def __contains__(self, item: str) -> bool: ... + + # BUILT-INS (Indexing) + @multimethod + def __getitem__(self, item: int) -> Segment: ... + @multimethod + def __getitem__(self, item: datetime) -> float: ... + @multimethod + def __getitem__(self, item: str) -> float | Timeseries: ... + @multimethod + def __getitem__(self, item: DateTimeRange) -> float | Timeseries: ... + @multimethod + def __getitem__(self, item: Timeline) -> float | Timeseries: ... + @multimethod + def __getitem__(self, item: slice) -> float | Timeseries: ... + @multimethod + def __getitem__(self, item: tuple) -> float | Timeseries: ... + + # BUILT-INS (Joining Timeseries) + @multimethod + def concatenate(self, other: Timeseries) -> Timeseries: ... + @multimethod + def append(self, other: Segment) -> Timeseries: ... + @multimethod + def append(self, other: ndarray | Sequence[float | int]) -> Timeseries: ... + @classmethod + def overlap(cls, first: 'Timeseries', second: 'Timeseries') -> Tuple[DateTimeRange]: ... + + # BUILT-INS (Arithmetic) + @multimethod + def __add__(self, other: 'Timeseries') -> 'Timeseries': ... + + @multimethod + def __add__(self, other: float) -> 'Timeseries': ... + + @multimethod + def __sub__(self, other: 'Timeseries') -> 'Timeseries': ... + + @multimethod + def __sub__(self, other: float) -> 'Timeseries': ... + + @multimethod + def __mul__(self, other: 'Timeseries') -> 'Timeseries': ... + + @multimethod + def __mul__(self, other: float) -> 'Timeseries': ... + + @multimethod + def __truediv__(self, other: 'Timeseries') -> 'Timeseries': ... + + @multimethod + def __truediv__(self, other: float) -> 'Timeseries': ... + + @multimethod + def __floordiv__(self, other: 'Timeseries') -> 'Timeseries': ... + + @multimethod + def __floordiv__(self, other: float) -> 'Timeseries': ... + + # SHORTCUT STATISTICS + def max(self) -> float: ... + def argmax(self) -> datetime: ... + def min(self) -> float: ... + def argmin(self) -> datetime: ... + def mean(self) -> float: ... + def median(self) -> float: ... + def std(self) -> float: ... + def var(self) -> float: ... + def abs(self) -> 'Timeseries': ... + def diff(self) -> 'Timeseries': ... + + # PLOTS + def plot_spectrum(self, show: bool = True, save_to: str = None) -> None: ... + def plot(self, show: bool = True, save_to: str = None) -> None: ... + + # PROCESSING + def apply(self, operator: Operator, inplace: bool = True, **kwargs) -> Timeseries | None: ... + @multimethod(Operation) + def undo(self, operation) -> None: ... + @multimethod(int) + def undo(self, operation) -> None: ... + + # Processing Shortcuts + def resample(self, frequency: float) -> None: ... + def undo_segmentation(self, time_intervals: tuple[DateTimeRange]) -> None: ... + + # CONVERT TO OTHER DATA STRUCTURES + def to_array(self) -> ndarray: ... + def to_dataframe(self) -> DataFrame: ... + + # SERIALIZATION + __SERIALVERSION: int = 2 + def __getstate__(self) -> tuple: ... + def __setstate__(self, state: tuple) -> None: ... diff --git a/src/ltbio/biosignals/__init__.py b/src/ltbio/biosignals/__init__.py index 1c46359c..565804ae 100644 --- a/src/ltbio/biosignals/__init__.py +++ b/src/ltbio/biosignals/__init__.py @@ -1,1608 +1,14 @@ -# -- encoding: utf-8 -- +from matplotlib import pyplot as plt +from numpy import correlate -# =================================== +from ._Biosignal import Biosignal +from ._BiosignalSource import BiosignalSource +from ._Event import Event +from ._Timeline import Timeline +from ._Timeseries import Timeseries -# IT - LongTermBiosignals -# Package: biosignals -# Module: init -# Description: Essential classes for .biosignals package: Biosignal, MultimodalBiosignal and Event - -# Contributors: João Saraiva -# Created: 07/03/2023 - -# =================================== - -import logging -from abc import ABC, abstractmethod, ABCMeta -from copy import deepcopy -from datetime import datetime, timedelta -from inspect import isclass, signature -from logging import warning -from math import ceil -from shutil import rmtree -from tempfile import mkdtemp -from typing import Dict, Tuple, Collection, Set, ClassVar, Callable - -import matplotlib.pyplot as plt -import numpy as np -from datetimerange import DateTimeRange -from dateutil.parser import parse as to_datetime -from numpy import ndarray -from pandas import DataFrame -from scipy.signal import correlate - -from .sources import __BiosignalSource as BS -from .timeseries import Timeseries, Timeline -from .units import Unitless -# from ...processing.filters.Filter import Filter -from ..clinical.BodyLocation import BodyLocation -from ..clinical.Patient import Patient -from ..clinical.conditions.MedicalCondition import MedicalCondition -from ..processing.noises.Noise import Noise - - -class Event(): - __SERIALVERSION: int = 1 - - def __init__(self, name: str, onset: datetime | str = None, offset: datetime | str = None): - if onset is None and offset is None: # at least one - raise AssertionError("At least an onset or an offset must be given to create an Event.") - self.__onset = to_datetime(onset) if isinstance(onset, str) else onset - self.__offset = to_datetime(offset) if isinstance(offset, str) else offset - if onset is not None and offset is not None and offset < onset: - raise AssertionError(f"In Event '{name}', the offset cannot come before the onset.") - self.__name = name - - @property - def has_onset(self) -> bool: - return self.__onset != None - - @property - def has_offset(self) -> bool: - return self.__offset != None - - @property - def onset(self) -> datetime: - if self.has_onset: - return self.__onset - else: - raise AttributeError(f"Event {self.name} has no onset.") - - @onset.setter - def onset(self, datetime: datetime): - self.__onset = datetime - - @property - def offset(self) -> datetime: - if self.has_offset: - return self.__offset - else: - raise AttributeError(f"Event {self.name} has no offset.") - - @offset.setter - def offset(self, datetime: datetime): - self.__offset = datetime - - @property - def duration(self) -> timedelta: - if self.__onset is None: - raise AttributeError(f"Event has no duration, only an {self.name} has no offset.") - if self.__offset is None: - raise AttributeError(f"Event has no duration, only an {self.name} has no onset.") - return self.__offset - self.__onset - - @property - def domain(self) -> DateTimeRange: - if self.__onset is None: - raise AttributeError(f"Event has no duration, only an {self.name} has no offset.") - if self.__offset is None: - raise AttributeError(f"Event has no duration, only an {self.name} has no onset.") - return DateTimeRange(self.__onset, self.__offset) - - @property - def name(self) -> str: - return self.__name - - def domain_with_padding(self, before: timedelta = timedelta(seconds=0), after: timedelta = timedelta(seconds=0)): - """ - The Event domain with before, after, or both paddings. Negative paddings go back in time; positive paddings go forward in time. - :param before: Padding before onset if defined, or offset otherwised. - :param after: Padding after offset if defined, or onset otherwised. - :return: DateTimeRange of the padded domain. - """ - - if not isinstance(before, timedelta) or not isinstance(after, timedelta): - raise TypeError('At least one padding (before or after) is necessary. Also, they should be timedelta objects.') - - # return: event [start, end[ - start = self.__onset if self.__onset is not None else self.__offset - end = self.__offset if self.__offset is not None else self.__onset - - # return: event [start + before, end + after[ - start, end = start + before, end + after - - return DateTimeRange(start, end) - - def __repr__(self): - if self.__offset is None: - return self.__name + ': Starts at ' + self.__onset.strftime("%d %b, %H:%M:%S") - elif self.__onset is None: - return self.__name + ': Ends at ' + self.__offset.strftime("%d %b, %H:%M:%S") - else: - return self.__name + ': [' + self.__onset.strftime("%d %b, %H:%M:%S") + '; ' + self.__offset.strftime("%d %b, %H:%M:%S") + ']' - - def __hash__(self): - return hash((self.__name, self.__onset, self.__offset)) - - def __eq__(self, other): - return self.__name == other.name and self.__onset == other._Event__onset and self.__offset == other._Event__offset - - def __ne__(self, other): - return not self == other - - def __lt__(self, other): # A Segment comes before other Segment if its end is less than the other's start. - after = other._Event__onset if other._Event__onset is not None else other._Event__offset - before = self.__offset if self.__offset is not None else self.__onset - return before < after - - def __le__(self, other): - return self < other or self == other - - def __gt__(self, other): - return not self < other - - def __ge__(self, other): - return self > other or self == other - - def __getstate__(self): - """ - 1: name (str) - 2: onset (datetime) - 3: offset (datetime) - 4: other... (dict) - """ - other_attributes = self.__dict__.copy() - del other_attributes['_Event__name'], other_attributes['_Event__onset'], other_attributes['_Event__offset'] - return (self.__SERIALVERSION, self.__name, self.__onset, self.__offset) if len(other_attributes) == 0 \ - else (self.__SERIALVERSION, self.__name, self.__onset, self.__offset, other_attributes) - - def __setstate__(self, state): - if state[0] == 1: - self.__name, self.__onset, self.__offset = state[1], state[2], state[3] - if len(state) == 5: - self.__dict__.update(state[4]) - else: - raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' - f'Supported versions: 1.') - - -# =================================== -# Base Class 'Biosignal' and 'MultimodalBiosignal' -# =================================== - -class Biosignal(ABC): - """ - A Biosignal is a set of channels (Timeseries), each of which with samples measuring a biological variable. - It may be associated with a source, a patient, and a body location. It can also have a name. - It has an initial and final datetime. Its length is its number of channels. - It can be resampled, filtered, and concatenated to other Biosignals. - Amplitude and spectrum plots can be displayed and saved. - """ - - __SERIALVERSION: int = 2 - - def __init__(self, - timeseries: Dict[str | BodyLocation, timeseries.Timeseries] | str | Tuple[datetime], - source: BS.__subclasses__() = None, - patient: Patient = None, - acquisition_location: BodyLocation = None, - name: str = None, - **options): - - # Save BS, if given - if source and not isinstance(source, tuple(BS.__subclasses__())): - raise TypeError("Source must be a BS.") - self.__source = source - - # Create some empty properites - self.__patient = None - self.__acquisition_location = None - self.__name = None - self.__associated_events = {} - self.__added_noise = None - - # Option 1: 'timeseries' is a string path -> Read samples from files - if isinstance(timeseries, str): - filepath = timeseries - if source is None: - raise ValueError("To read a Biosignal from files, specify a BS in 'source'.") - else: - # BS can give the samples (required) and many other optional metadata. - # It's the BS that decides what it gives, depending on what it can get. - - # Get all data that the source can read: - data = self.__source._get(filepath, type(self), **options) - - # Unwrap data: - # 'timeseries': dictionary of Timeseries (required) - # 'patient': Patient - # 'acquisition_location': BodyLocation - # 'events': tuple of Events - # 'name': string - self.__timeseries = data['timeseries'] - if data['patient'] is not None: - self.__patient = data['patient'] - if data['acquisition_location'] is not None: - self.__acquisition_location = data['acquisition_location'] - if data['events'] is not None: - self.associate(data['events']) - if data['name'] is not None: - self.__name = data['name'] - - # Option 2: 'timeseries' is a dictionary {chanel name: Timeseries} -> Save directly - if isinstance(timeseries, dict): - # Check if all keys are strings or BodyLocation - for key in timeseries.keys(): - if not isinstance(key, str) and not isinstance(key, BodyLocation): - raise TypeError("All keys in 'timeseries' must be strings or BodyLocation.") - # Check if all values are Timeseries - for ts in timeseries.values(): - if not isinstance(ts, Timeseries): - raise TypeError("All values in 'timeseries' must be Timeseries.") - - # Save dictionary of Timeseries - self.__timeseries = timeseries - - # Check if Timeseries come with Events associated - for ts in timeseries.values(): - for event in ts.events: - if event.name in self.__associated_events and self.__associated_events[event.name] != event: - raise AssertionError("There are different Events with the same name among the Timeseries given.") - else: - self.__associated_events[event.name] = event - - # If user gives metadata, override what was given by the source: - if patient is not None: - self.__patient = patient - if acquisition_location is not None: - self.__acquisition_location = acquisition_location - if name is not None: - self.__name = name - - def __copy__(self): - return type(self)({ts: self.__timeseries[ts].__copy__() for ts in self.__timeseries}, self.__source, self.__patient, - self.__acquisition_location, str(self.__name)) - - def _new(self, timeseries: Dict[str | BodyLocation, timeseries.Timeseries] | str | Tuple[datetime] = None, - source: BS.__subclasses__() = None, patient: Patient = None, acquisition_location: BodyLocation = None, name: str = None, - events: Collection[Event] = None, added_noise=None): - timeseries = {ts: self.__timeseries[ts] for ts in self.__timeseries} if timeseries is None else timeseries # copy - source = self.__source if source is None else source # no copy - patient = self.__patient if patient is None else patient # no copy - acquisition_location = self.__acquisition_location if acquisition_location is None else acquisition_location # no copy - name = str(self.__name) if name is None else name # copy - - new = type(self)(timeseries, source, patient, acquisition_location, name) - - # Associate events; no need to copy - events = self.__associated_events if events is None else events - events = events.values() if isinstance(events, dict) else events - # Check if some event can be associated - logging.disable(logging.WARNING) # if outside the domain of every channel -> no problem; the Event will not be associated - new.associate(events) - logging.disable(logging.NOTSET) # undo supress warnings - - # Associate added noise reference: - if added_noise is not None: - new._Biosignal__added_noise = added_noise - - return new - - def _apply_operation_and_new(self, operation, - source: BS.__subclasses__() = None, patient: Patient = None, - acquisition_location: BodyLocation = None, name: str = None, events: Collection[Event] = None, - **kwargs): - new_channels = {} - for channel_name in self.channel_names: - new_channels[channel_name] = self.__timeseries[channel_name]._apply_operation_and_new(operation, **kwargs) - return self._new(new_channels, source=source, patient=patient, acquisition_location=acquisition_location, - name=name, events=events) - - def _apply_operation_and_return(self, operation, **kwargs): - pass # TODO - - @property - def has_single_channel(self) -> bool: - return len(self) == 1 - - def _get_channel(self, channel_name: str | BodyLocation) -> timeseries.Timeseries: - if channel_name in self.channel_names: - return self.__timeseries[channel_name] - else: - raise AttributeError(f"No channel named '{channel_name}'.") - - def _get_single_channel(self) -> tuple[str | BodyLocation, timeseries.Timeseries]: - """ - :return: channel_name, channel - """ - if not self.has_single_channel: - raise AttributeError(f"This Biosignal does not have a single channel. It has multiple channels.") - return tuple(self.__timeseries.items())[0] - - def get_event(self, name: str) -> Event: - if name in self.__associated_events: - return self.__associated_events[name] - from_conditions = self.__get_events_from_medical_conditions() - if name in from_conditions: - return from_conditions[name] - else: - raise NameError(f"No Event named '{name}' associated to the Biosignal or its paitent's conditions.") - - @property - def preview(self): - """Returns 5 seconds of the middle of the signal.""" - domain = self.domain - middle_of_domain: DateTimeRange = domain[len(domain) // 2] - middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) - try: - return self[middle - timedelta(seconds=2): middle + timedelta(seconds=3)] - except IndexError: - raise AssertionError( - f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") - - def when(self, condition: Callable, window: timedelta = None): - if len(signature(condition).parameters) > 1: - assert set(signature(condition).parameters) - sf = self.sampling_frequency # that all channels have the same sampling frequnecy - window = 1 if window is None else int(window * sf) - intervals = [] - for i in range(len(self._n_segments)): # gives error if not all channles have the same domain - x = self._vblock(i) - evaluated = [] - for i in range(0, len(x[0]), window): - y = x[:, i: i + window] - evaluated += [y] * window - intervals.append(timeseries.Timeseries._Timeseries__Segment._Segment__when(evaluated)) - intervals = self.__timeseries[0]._indices_to_timepoints(intervals) - return Timeline(*[Timeline.Group(channel._when(condition, window), name=channel_name) for channel_name, channel in self], - name=self.name + " when '" + condition.__name__ + "' is True" + f" (in windows of {window})" if window else "") - - else: - return Timeline(*[Timeline.Group(channel._when(condition, window), name=channel_name) for channel_name, channel in self], - name=self.name + " when '" + condition.__name__ + "' is True" + f" (in windows of {window})" if window else "") - - def __getitem__(self, item): - '''The built-in slicing and indexing operations.''' - - if isinstance(item, datetime): - if len(self) != 1: - raise IndexError("This Biosignal has multiple channels. Index the channel before indexing the datetime.") - return tuple(self.__timeseries.values())[0][item] - - if isinstance(item, (str, BodyLocation)): - if item in self.channel_names: - if len(self) == 1: - raise IndexError("This Biosignal only has 1 channel. Index only the datetimes.") - ts = {item: self.__timeseries[item].__copy__(), } - return self._new(timeseries=ts) - - elif item in self.__associated_events or item in self.__get_events_from_medical_conditions(): - if item in self.__associated_events: # Internal own Events - event = self.__associated_events[item] - else: # Events associated to MedicalConditions - event = self.__get_events_from_medical_conditions()[item] - - if event.has_onset and event.has_offset: - return self[DateTimeRange(event.onset, event.offset)] - elif event.has_onset: - return self[event.onset] - elif event.has_offset: - return self[event.offset] - - else: - try: - self.__timeseries[to_datetime(item)] - except: - raise IndexError("Datetime in incorrect format or '{}' is not a channel nor an event of this Biosignal.".format(item)) - - def __get_events_with_padding(event_name, padding_before=timedelta(seconds=0), padding_after=timedelta(seconds=0), - exclude_event=False): - # Get Event object - if event_name in self.__associated_events: - event = self.__associated_events[event_name] - elif event_name in self.__get_events_from_medical_conditions(): - event = self.__get_events_from_medical_conditions()[event_name] - else: - raise IndexError(f"No Event named '{event_name}' associated to this Biosignal.") - - if isinstance(padding_before, datetime) and isinstance(padding_after, datetime) and exclude_event: - if event.has_onset and event.has_offset: - return self[DateTimeRange(padding_before, event.onset)] >> self[ - DateTimeRange(event.offset + timedelta(seconds=1 / self.sampling_frequency), - padding_after)] # FIXME: Sampling frequency might not be the same for all channels! - else: - raise IndexError(f"Event {event_name} is a point in time, not an event with a duration.") - - # Convert specific datetimes to timedeltas; is this inneficient? - if isinstance(padding_before, datetime): - if event.has_onset: - padding_before = event.onset - padding_before - elif event.has_offset: - padding_before = event.offset - padding_before - if exclude_event: - padding_after = - event.duration - if isinstance(padding_after, datetime): - if event.has_offset: - padding_after = padding_after - event.offset - elif event.has_onset: - padding_after = padding_after - event.onset - if exclude_event: - padding_before = - event.duration - - # Index - if event.has_onset and event.has_offset: - return self[DateTimeRange(event.onset - padding_before, event.offset + padding_after)] - elif event.has_onset: - return self[DateTimeRange(event.onset - padding_before, event.onset + padding_after)] - elif event.has_offset: - return self[DateTimeRange(event.offset - padding_before, event.offset + padding_after)] - - if isinstance(item, slice): - - # Everything but event - if isinstance(item.stop, str) and item.start is None and item.step is None: - if not item.stop.startswith('-'): - raise ValueError( - "Indexing a Biosignal like x[:'event':] is equivalent to having its entire domain. Did you mean x[:'-event':]?") - return __get_events_with_padding(item.stop[1:], padding_before=self.initial_datetime, padding_after=self.final_datetime, - exclude_event=True) - - # Everything before event - if isinstance(item.stop, str) and item.start is None: - event_name, exclude_event = item.stop, False - if event_name.startswith('-'): - event_name, exclude_event = event_name[1:], True - return __get_events_with_padding(event_name, padding_before=self.initial_datetime, exclude_event=exclude_event) - - # Everything after event - if isinstance(item.start, str) and item.stop is None: - event_name, exclude_event = item.start, False - if event_name.startswith('-'): - event_name, exclude_event = event_name[1:], True - return __get_events_with_padding(event_name, padding_after=self.final_datetime, exclude_event=exclude_event) - - # Event with padding - if isinstance(item.start, (timedelta, int)) and isinstance(item.step, (timedelta, int)) and isinstance(item.stop, str): - start = timedelta(seconds=item.start) if isinstance(item.start, int) else item.start # shortcut for seconds - step = timedelta(seconds=item.step) if isinstance(item.step, int) else item.step # shortcut for seconds - return __get_events_with_padding(item.stop, padding_before=start, padding_after=step) - elif isinstance(item.start, (timedelta, int)) and isinstance(item.stop, str): - start = timedelta(seconds=item.start) if isinstance(item.start, int) else item.start # shortcut for seconds - return __get_events_with_padding(item.stop, padding_before=start) - elif isinstance(item.start, str) and isinstance(item.stop, (timedelta, int)): - stop = timedelta(seconds=item.stop) if isinstance(item.stop, int) else item.stop # shortcut for seconds - return __get_events_with_padding(item.start, padding_after=stop) - - # Index by datetime - if isinstance(item.start, datetime) and isinstance(item.stop, datetime) and item.stop < item.start: - raise IndexError("Given final datetime comes before the given initial datetime.") - - if self.has_single_channel: # one channel - channel_name = tuple(self.__timeseries.keys())[0] - channel = self.__timeseries[channel_name] - return self._new(timeseries={channel_name: channel[item]}) # FIXME: Why aren't events being updated here? (See below) - - else: # multiple channels - ts = {} - events = set() - for k in self.channel_names: - ts[k] = self.__timeseries[k][item] - # Events outside the new domain get discarded, hence collecting the ones that remained - events.update(set(self.__timeseries[k].events)) # FIXME: (See Above) Like in here! - new = self._new(timeseries=ts, events=events) - return new - - if isinstance(item, DateTimeRange): # Pass item directly to each channel - ts = {} - events = set() - for k in self.channel_names: - res = self.__timeseries[k][item] - if res is not None: - ts[k] = res - # Events outside the new domain get discarded, hence collecting the ones that remained - events.update(set(self.__timeseries[k].events)) - - if len(ts) == 0: - raise IndexError(f"Event is outside every channel's domain.") - - new = self._new(timeseries=ts, events=events) - return new - - if isinstance(item, tuple): - - # Structure-related: Channels - if all(isinstance(k, (str, BodyLocation)) and k in self.channel_names for k in item): - ts = {} - events = set() - for k in item: - ts[k] = self.__timeseries[k] - events.update(set(self.__timeseries[k].events)) - new = self._new(timeseries=ts, events=events) - return new - - # Time-related: Slices, Datetimes, Events, ... - else: - if isinstance(item[0], DateTimeRange): - item = sorted(item, key=lambda x: x.start_datetime) - else: - item = sorted(item) - - return self._new({channel_name: channel[tuple(item)] for channel_name, channel in self}) - - if isinstance(item, Timeline): - if item.is_index: - res = self[item._as_index()] - res.name += f" indexed by '{item.name}'" - return res - else: - return IndexError("This Timeline cannot serve as index, because it contains multiple groups of intervals or points.") - - raise IndexError("Index types not supported. Give a datetime (can be in string format), a slice or a tuple of those.") - - @property - def channel_names(self) -> set[str | BodyLocation]: - '''Returns a tuple with the labels associated to every channel.''' - return set(self.__timeseries.keys()) - - @property - def name(self): - '''Returns the associated name, or 'No Name' if none was provided.''' - return self.__name if self.__name != None else "No Name" - - @name.setter - def name(self, name: str): - self.__name = name - - @property - def patient(self) -> Patient: - """Returns the associated patient, if any.""" - return self.__patient - - @property - def patient_code(self): # TODO: Delete method - """deprecated: use .patient.code instead.""" - return self.__patient.code if self.__patient != None else 'n.d.' - - @property - def patient_conditions(self) -> Set[MedicalCondition]: # TODO: Delete method - """deprecated: use .patient.conditions instead.""" - return self.__patient.conditions if self.__patient != None else set() - - @property - def acquisition_location(self): - '''Returns the associated acquisition location, or None if none was provided.''' - return self.__acquisition_location - - @property - def source(self) -> BS: - '''Returns the BS from where the data was read, or None if was not specified.''' - return self.__source - - @property - def type(self) -> ClassVar: - '''Returns the biosignal modality class. E.g.: ECG, EMG, EDA, ...''' - return type(self) - - @property - def initial_datetime(self) -> datetime: - '''Returns the initial datetime of the channel that starts the earliest.''' - return min([ts.initial_datetime for ts in self.__timeseries.values()]) - - @property - def final_datetime(self) -> datetime: - '''Returns the final datetime of the channel that ends the latest.''' - return max([ts.final_datetime for ts in self.__timeseries.values()]) - - @property - def domain(self) -> Tuple[DateTimeRange]: - if len(self) == 1: - return tuple(self.__timeseries.values())[0].domain - else: - channels = tuple(self.__timeseries.values()) - cumulative_intersection: Tuple[DateTimeRange] - for k in range(1, len(self)): - if k == 1: - cumulative_intersection = channels[k].overlap(channels[k - 1]) - else: - cumulative_intersection = channels[k].overlap(cumulative_intersection) - return cumulative_intersection - - @property - def domain_timeline(self) -> Timeline: # TODO: merge with domain - return Timeline(Timeline.Group(self.domain), name=self.name + ' Domain') - - @property - def subdomains(self) -> Tuple[DateTimeRange]: - if len(self) == 1: - return tuple(self.__timeseries.values())[0].subdomains - else: - raise NotImplementedError() - - def _vblock(self, i: int): - """ - Returns a block of timelly allined segments, vertially alligned for all channels. - Note: This assumes all channels are segmented in the same way, i.e., have exactly the same set of subdomains. - :param i: The block index - :return: ndarray of vertical stacked segmetns - """ - N = self._n_segments - if isinstance(N, int): - if i < N: - return np.vstack([channel[i] for channel in self.__timeseries.values()]) - else: - IndexError(f"This Biosignal as only {N} blocks.") - else: - raise AssertionError("Not all channels are segmented in the same way, hence blocks cannot be created.") - - def _block_subdomain(self, i: int) -> DateTimeRange: - if len(self) == 1: - return tuple(self.__timeseries.values())[0]._block_subdomain(i) - else: - raise NotImplementedError() - - @property - def _n_segments(self) -> int | dict: - """ - Returns the number of segments of each Timeseries. - :rtype: dict, with the number of segments labelled by channel name; or int if they are all the same - """ - n_segments = {} - last_n = None - all_equal = True - for channel_name, channel in self.__timeseries.items(): - x = channel.n_segments - last_n = x - n_segments[channel_name] = x - if last_n is not None and last_n != x: - all_equal = False - if all_equal: - return last_n - else: - return n_segments - - @property - def duration(self): - common_duration = tuple(self.__timeseries.values())[0].duration - for _, channel in self: - if channel.duration != common_duration: - raise AssertionError("Not all channels have the same duration.") - return common_duration - - def __get_events_from_medical_conditions(self): - res = {} - for condition in self.patient_conditions: - res.update(condition._get_events()) - return res - - @property - def events(self): - '''Tuple of associated Events, ordered by datetime.''' - return tuple(sorted(list(self.__associated_events.values()) + list(self.__get_events_from_medical_conditions().values()))) - - @property - def sampling_frequency(self) -> float: - '''Returns the sampling frequency of every channel (if equal), or raises an error if they are not equal.''' - if len(self) == 1: - return tuple(self.__timeseries.values())[0].sampling_frequency - else: - common_sf = None - for _, channel in self: - if common_sf is None: - common_sf = channel.sampling_frequency - elif channel.sampling_frequency != common_sf: - raise AttributeError("Biosignal contains 2+ channels, all not necessarly with the same sampling frequency.") - return common_sf - - @property - def added_noise(self): - '''Returns a reference to the noisy component, if the Biosignal was created with added noise; else the property does not exist.''' - if self.__added_noise is not None: - return self.__added_noise - else: - raise AttributeError("No noise was added to this Biosignal.") - - def __len__(self): - '''Returns the number of channels.''' - return len(self.__timeseries) - - def __repr__(self): - '''Returns a textual description of the Biosignal.''' - res = "Name: {}\nType: {}\nLocation: {}\nNumber of Channels: {}\nChannels: {}\nUseful Duration: {}\nSource: {}\n".format( - self.name, - self.type.__name__, - self.acquisition_location, - len(self), - ''.join([(x + ', ') for x in self.channel_names]), - self.duration, - self.source.__str__(None) if isinstance(self.source, ABCMeta) else str(self.source)) - - if len(self.__associated_events) != 0: - res += "Events:\n" - for event in sorted(self.__associated_events.values()): - res += '- ' + str(event) + '\n' - events_from_medical_conditions = dict(sorted(self.__get_events_from_medical_conditions().items(), key=lambda item: item[1])) - if len(events_from_medical_conditions) != 0: - res += "Events associated to Medical Conditions:\n" - for key, event in events_from_medical_conditions.items(): - res += f"- {key}:\n{event}\n" - return res - - def _to_dict(self) -> Dict[str | BodyLocation, timeseries.Timeseries]: # TODO: Delete method - return deepcopy(self.__timeseries) - - def _to_array(self) -> ndarray: - """ - Converts Biosignal to a NumPy ndarray. - :return: C x M x N array, where C is the number of channels, M the number of segments of each, and N their length. - :rtype: list[numpy.ndarray] - """ - x = [channel._to_array() for channel in self.__timeseries.values()] - return np.stack(x) - - def to_dataframe(self) -> DataFrame: - pass - - def __iter__(self): - return self.__timeseries.items().__iter__() - - def __contains__(self, item): - if isinstance(item, str): - if item in self.__timeseries.keys(): # if channel exists - return True - if item in self.__associated_events: # if Event occurs - return True - events_from_consitions = self.__get_events_from_medical_conditions() - for label, event in events_from_consitions: - if item == label and event.domain in self: - return True - return False - elif isinstance(item, (datetime, DateTimeRange)): - for _, channel in self: - if item in channel: # if at least one channel defines this point in time - return True - return False - else: - raise TypeError(f'Cannot apply this operation with {type(item)}.') - - def __mul__(self, other): - if isinstance(other, (float, int)): - suffix = f' (dilated up by {str(other)})' if other > 1 else f' (compressed up by {str(other)})' - return self._apply_operation_and_new(lambda x: x * other, name=self.name + suffix) - - def __sub__(self, other): - return self + (other * -1) - - def __neg__(self): - return self * -1 - - def __add__(self, other): - """ - If a float or int: - Add constant to every channel. Translation of the signal. - If Biosignal: - Adds both sample-by-sample, if they have the same domain. - Notes: - - If the two Biosignals have two distinct acquisition locations, they will be lost. - - If the two Biosignals have two distinct sources, they will be lost. - - If the two Biosignals have the distict patients, they will be lost. - Raises: - - TypeError if Biosignals are not of the same type. - - ArithmeticError if Biosignals do not have the same domain. - """ - - if isinstance(other, (float, int)): - return self._apply_operation_and_new(lambda x: x + other, name=self.name + f' (shifted up by) {str(other)}') - - if isinstance(other, Biosignal): - # Check errors - if self.type != other.type: - while True: - answer = input( - f"Trying to add an {self.type.__name__} with an {other.type.__name__}. Do you mean to add templeates of the second as noise? (y/n)") - if answer.lower() in ('y', 'n'): - if answer.lower() == 'y': - return Biosignal.withAdditiveNoise(self, other) - else: - raise TypeError("Cannot add a {0} to a {1} if not as noise.".format(other.type.__name__, self.type.__name__)) - - if (not self.has_single_channel or not other.has_single_channel) and self.channel_names != other.channel_names: - raise ArithmeticError( - "Biosignals to add must have the same number of channels and the same channel names.") # unless each only has one channel - if self.domain != other.domain: - raise ArithmeticError("Biosignals to add must have the same domains.") - - # Prepare common metadata - name = f"{self.name} + {other.name}" - acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None - patient = self.__patient if self.patient_code == other.patient_code else None - if isclass(self.source) and isclass(other.source): # Un-instatiated sources - if self.source == other.source: - source = self.__source - else: - source = None - else: - if type(self.source) == type(other.source) and self.source == other.source: - source = self.__source - else: - source = None - - # Perform addition - res_timeseries = {} - if self.has_single_channel and other.has_single_channel: - ch_name1, ch1 = self._get_single_channel() - ch_name2, ch2 = self._get_single_channel() - res_timeseries[f'{ch_name1}+{ch_name2}'] = ch1 + ch2 - else: - for channel_name in self.channel_names: - res_timeseries[channel_name] = self._to_dict()[channel_name] + other._to_dict()[channel_name] - - # Union of Events - events = set(self.__associated_events.values()).union(set(other._Biosignal__associated_events.values())) - - return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, - name=name, events=events) - - raise TypeError(f"Addition operation not valid with Biosignal and object of type {type(other)}.") - - def __and__(self, other): - """ - Joins the channels of two Biosignals of the same type, if they do not have the same set of channel names. - Notes: - - If the two Biosignals have two distinct acquisition locations, they will be lost. - - If the two Biosignals have two distinct sources, they will be lost. - - If the two Biosignals have the distict patients, they will be lost. - Raises: - - TypeError if Biosignals are not of the same type. - - ArithmeticError if both Biosignals have any channel name in common. - """ - - # Check errors - if not isinstance(other, Biosignal): - raise TypeError(f"Operation join channels is not valid with object of type {type(other)}.") - if self.type != other.type: - raise TypeError("Cannot join a {0} to a {1}".format(other.type.__name__, self.type.__name__)) - if len(self.channel_names.intersection(other.channel_names)) != 0: - raise ArithmeticError("Channels to join cannot have the same names.") - - # Prepare common metadata - name = f"{self.name} and {other.name}" - acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None - patient = self.__patient if self.patient_code == other.patient_code else None - if isclass(self.source) and isclass(other.source): # Un-instatiated sources - if self.source == other.source: - source = self.__source - else: - source = None - else: - if type(self.source) == type(other.source) and self.source == other.source: - source = self.__source - else: - source = None - - # Join channels - res_timeseries = {} - res_timeseries.update(self._to_dict()) - res_timeseries.update(other._to_dict()) - - # Union of Events - events = set(self.__associated_events.values()).union(set(other._Biosignal__associated_events.values())) - - return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, name=name, - events=events) - - def __rshift__(self, other): - """ - Temporally concatenates two Biosignal, if they have the same set of channel names. - Notes: - - If the two Biosignals have two distinct acquisition locations, they will be lost. - - If the two Biosignals have two distinct sources, they will be lost. - - If the two Biosignals have the distict patients, they will be lost. - Raises: - - TypeError if Biosignals are not of the same type. - - ArithmeticError if both Biosignals do not have the same channel names. - - ArithmeticError if the second comes before the first. - """ - - # Check errors - if not isinstance(other, Biosignal): - raise TypeError(f"Operation join channels is not valid with object of type {type(other)}.") - if self.type != other.type: - raise TypeError("Cannot join a {0} to a {1}".format(other.type.__name__, self.type.__name__)) - if self.channel_names != other.channel_names: - raise ArithmeticError("Biosignals to concatenate must have the same channel names.") - if other.initial_datetime < self.final_datetime: - raise ArithmeticError("The second Biosignal comes before (in time) the first Biosignal.") - - # Prepare common metadata - name = f"{self.name} >> {other.name}" - acquisition_location = self.acquisition_location if self.acquisition_location == other.acquisition_location else None - patient = self.__patient if self.patient_code == other.patient_code else None - if isclass(self.source) and isclass(other.source): # Un-instatiated sources - if self.source == other.source: - source = self.__source - else: - source = None - else: - if type(self.source) == type(other.source) and self.source == other.source: - source = self.__source - else: - source = None - - # Perform concatenation - res_timeseries = {} - for channel_name in self.channel_names: - res_timeseries[channel_name] = self._get_channel(channel_name) >> other._get_channel(channel_name) - - # Union of Events - events = set(self.__associated_events.values()).union(set(other._Biosignal__associated_events.values())) - - return self._new(timeseries=res_timeseries, source=source, patient=patient, acquisition_location=acquisition_location, name=name, - events=events) - - # =================================== - # Binary Logic using Time and Conditions - - def __lt__(self, other): - if isinstance(other, Biosignal): - return self.final_datetime < other.initial_datetime - else: - res = self.when(lambda x: x < other) - res.name(self.name + ' < ' + str(other)) - return res - - def __le__(self, other): - if isinstance(other, Biosignal): - return self.final_datetime <= other.initial_datetime - else: - res = self.when(lambda x: x <= other) - res.name(self.name + ' >= ' + str(other)) - return res - - def __gt__(self, other): - if isinstance(other, Biosignal): - return self.initial_datetime > other.final_datetime - else: - res = self.when(lambda x: x > other) - res.name(self.name + ' >= ' + str(other)) - return res - - def __ge__(self, other): - if isinstance(other, Biosignal): - return self.initial_datetime >= other.final_datetime - else: - res = self.when(lambda x: x >= other) - res.name(self.name + ' >= ' + str(other)) - return res - - def __eq__(self, other): - if isinstance(other, Biosignal): - return self.initial_datetime == other.initial_datetime and self.final_datetime == other.final_datetime - else: - res = self.when(lambda x: x == other) - res.name(self.name + ' >= ' + str(other)) - return res - - def __ne__(self, other): - if isinstance(other, Biosignal): - return not self.__eq__(other) - else: - res = self.when(lambda x: x != other) - res.name(self.name + ' >= ' + str(other)) - return res - - ######## Events - - def set_channel_name(self, current: str | BodyLocation, new: str | BodyLocation): - if current in self.__timeseries.keys(): - self.__timeseries[new] = self.__timeseries[current] - del self.__timeseries[current] - else: - raise AttributeError(f"Channel named '{current}' does not exist.") - - def set_event_name(self, current: str, new: str): - if current in self.__associated_events.keys(): - event = self.__associated_events[current] - self.__associated_events[new] = Event(new, event._Event__onset, event._Event__offset) - del self.__associated_events[current] - else: - raise AttributeError(f"Event named '{current}' is not associated.") - - def delete_events(self): - for _, channel in self: - channel.delete_events() - self.__associated_events = {} - - def filter(self, filter_design) -> int: - ''' - Filters every channel with to the given filter_design. - - @param filter_design: A Filter object specifying the designed filter to be applied. - @return: 0 if the filtering is applied successfully. - @rtype: int - ''' - for channel in self.__timeseries.values(): - channel._accept_filtering(filter_design) - return 0 - - def undo_filters(self): - ''' - Restores the raw samples of every channel, eliminating the action of any applied filter. - ''' - for channel in self.__timeseries.values(): - channel._undo_filters() - - def resample(self, frequency: float): - ''' - Resamples every channel to the new sampling frequency given, using Fourier method. - @param frequency: New sampling frequency (in Hertz). - ''' - for channel in self.__timeseries.values(): - channel._resample(frequency) - - def __draw_plot(self, timeseries_plotting_method, title, xlabel, ylabel, grid_on: bool, show: bool = True, save_to: str = None): - ''' - Draws a base plot to display every channel in a subplot. It is independent of the content that is plotted. - - @param timeseries_plotting_method: The method to be called in Timeseries, that defines what content to plot. - @param title: What the content is about. The Biosignal's name and patient code will be added. - @param xlabel: Label for the horizontal axis. - @param ylabel: Label for the vertical axis. - @param grid_on: True if grid in to be drawn or not; False otherwise. - @param show: True if plot is to be immediately displayed; False otherwise. - @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. - @return: - ''' - fig = plt.figure(figsize=(13, 2.5 * len(self))) - - all_events = self.events - all_onsets = [e.onset for e in all_events if e.has_onset] - all_offsets = [e.offset for e in all_events if e.has_offset] - all_vlines = all_onsets + all_offsets - - for i, channel_name in zip(range(len(self)), self.channel_names): - channel = self.__timeseries[channel_name] - ax = plt.subplot(len(self), 1, i + 1, title=channel_name) - ax.title.set_size(10) - ax.margins(x=0) - ax.set_xlabel(xlabel, fontsize=8, rotation=0, loc="right") - ax.set_ylabel(ylabel, fontsize=8, rotation=90, loc="top") - plt.xticks(fontsize=9) - plt.yticks(fontsize=9) - if grid_on: - ax.grid() - timeseries_plotting_method(self=channel) - - _vlines = [int((t - channel.initial_datetime).total_seconds() * channel.sampling_frequency) for t in all_vlines if t in channel] - plt.vlines(_vlines, ymin=channel.min(), ymax=channel.max(), colors='red') - - fig.suptitle((title + ' ' if title is not None else '') + self.name + ' from patient ' + str(self.patient_code), fontsize=11) - fig.tight_layout() - if save_to is not None: - fig.savefig(save_to) - plt.show() if show else plt.close() - - # return fig - - def plot_spectrum(self, show: bool = True, save_to: str = None): - ''' - Plots the Bode plot of every channel. - @param show: True if plot is to be immediately displayed; False otherwise. - @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. - ''' - self.__draw_plot(timeseries.Timeseries._plot_spectrum, 'Power Spectrum of', 'Frequency (Hz)', 'Power (dB)', True, show, save_to) - - def plot(self, show: bool = True, save_to: str = None): - ''' - Plots the amplitude in time of every channel. - @param show: True if plot is to be immediately displayed; False otherwise. - @param save_to: A path to save the plot as an image file; If none is provided, it is not saved. - ''' - return self.__draw_plot(timeseries.Timeseries._plot, None, 'Time', 'Amplitude (n.d.)', False, show, save_to) - - @abstractmethod - def plot_summary(self, show: bool = True, save_to: str = None): - ''' - Plots a summary of relevant aspects of common analysis of the Biosignal. - ''' - pass # Implemented in each type - - def apply_operation(self, operation, **kwargs): - for channel in self.__timeseries.values(): - channel._apply_operation(operation, **kwargs) - - def apply_and_new(self, operation, **kwargs): - for channel in self.__timeseries.values(): - channel._apply_operation_and_new(operation, **kwargs) - - def invert(self, channel_label: str = None): - inversion = lambda x: -1 * x - if channel_label is None: # apply to all channels - self.apply_operation(inversion) - else: # apply only to one channel - self.__timeseries[channel_label]._apply_operation(inversion) - - def associate(self, events: Event | Collection[Event] | Dict[str, Event]): - ''' - Associates an Event to all Timeseries. - Events have names that serve as keys. If keys are given, - i.e. if 'events' is a dict, then the Event names are overridden. - @param events: One or multiple Event objects. - @rtype: None - ''' - - def __add_event(event: Event): - n_channels_associated = 0 - for _, channel in self: - try: - channel.associate(event) - n_channels_associated += 1 - except ValueError: - pass - if n_channels_associated > 0: # If at least one association was possible - self.__associated_events[event.name] = event - else: - warning(f"Event '{event.name}' was not associated, because it is outside of every channel's domain.") - - if isinstance(events, Event): - __add_event(events) - elif isinstance(events, dict): - for event_key in events: - event = events[event_key] - __add_event(Event(event_key, event._Event__onset, event._Event__offset)) # rename with given key - else: - for event in events: - __add_event(event) - - def disassociate(self, event_name: str): - ''' - Disassociates an Event from all Timeseries. - @param event_name: The name of the Event to be removed. - @rtype: None - ''' - if event_name in self.__associated_events: - for _, channel in self: - try: - channel.disassociate(event_name) - except NameError: - pass - del self.__associated_events[event_name] - else: - raise NameError(f"There's no Event '{event_name}' associated to this Biosignal.") - - @classmethod - def withAdditiveNoise(cls, original, noise, name: str = None): - """ - Creates a new Biosignal from 'original' with added 'noise'. - - :param original: (Biosignal) The original Biosignal to be contaminated with noise. - :param noise: (Noise | Timeseries | Biosignal) The noise to add to the original Biosignal. - :param name: (str) The name to associate to the resulting Biosignal. - - When 'noise' is a Noise: - - A trench of noise, with the duration of the channel, will be generated to be added to each channel. - - 'noise' should be configured with the same sampling frequency has the channels. - - When 'noise' is a Biosignal: - When it has the same set of channels as 'original', sampled at the same frequency: - - Each noisy channel will be added to the corresponding channel of 'original', in a template-wise manner. - When it has a unique channel: - - That noisy channel will be added to every channel of 'original', in a template-wise manner. - - That noisy channel should have the same sampling frequency has every channel of 'original'. - - If 'noise' has multiple segments, they are concatenated to make a hyper-template. - - Exception: in the case where both Timeseries having the same domain, the noisy samples will be added in a - segment-wise manner. - - When 'noise' is a Timeseries sampled at the same frequency of 'original': - - Its samples will be added to every channel of 'original', in a template-wise manner. - - If 'noise' has multiple segments, they are concatenated to make a hyper-template. - - Exception: in the case where both Timeseries having the same domain, the noisy samples will be added in a - segment-wise manner. - - 'noise' should have been sampled at the same frequency as 'original'. - - What is "template-wise manner"? - - If the template segment is longer than any original segment, the template segment will be trimmed accordingly. - - If the template segment is shorter than any original segment, the template will repeated in time. - - If the two segments are of equal length, they are added as they are. - - :return: A Biosignal with the same properties as the 'original', but with noise added to the samples of every channel. - :rtype: Biosignal subclass - """ - - if not isinstance(original, Biosignal): - raise TypeError(f"Parameter 'original' must be of type Biosignal; but {type(original)} was given.") - - if not isinstance(noise, (Noise, timeseries.Timeseries, Biosignal)): - raise TypeError(f"Parameter 'noise' must be of types Noise, Timeseries or Biosignal; but {type(noise)} was given.") - - if name is not None and not isinstance(name, str): - raise TypeError( - f"Parameter 'name' must be of type str; but {type(name)} was given.") - - def __add_template_noise(samples: ndarray, template: ndarray): - # Case A - if len(samples) < len(template): - _template = template[:len(samples)] # cut where it is enough - return samples + _template # add values - # Case B - elif len(samples) > len(template): - _template = np.tile(template, ceil(len(samples) / len(template))) # repeat full-pattern - _template = _template[:len(samples)] # cut where it is enough - return samples + _template # add values - # Case C - else: # equal lengths - return samples + template # add values - - def __noisy_timeseries(original: timeseries.Timeseries, noise: timeseries.Timeseries) -> timeseries.Timeseries: - # Case 1: Segment-wise - if original.domain == noise.domain: - template = [noise.samples, ] if noise.is_contiguous else noise.samples - return original._apply_operation_and_new(__add_template_noise, template=template, - iterate_over_each_segment_key='template') - # Case 2: Template-wise - elif noise.is_contiguous: - template = noise.samples - return original._apply_operation_and_new(__add_template_noise, template=template) - # Case 3: Template-wise, with hyper-template - else: - template = np.concatenate(noise.samples) # concatenate as a hyper-template - return original._apply_operation_and_new(__add_template_noise, template=template) - - noisy_channels = {} - - # Case Noise - if isinstance(noise, Noise): - for channel_name in original.channel_names: - channel = original._get_channel(channel_name) - if channel.sampling_frequency == noise.sampling_frequency: - template = noise[channel.duration] - noisy_channels[channel_name] = channel._apply_operation_and_new(__add_template_noise, template=template) - else: - raise AssertionError( - f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." - f"Suggestion: Resample one of them first.") - - # Case Timeseries - elif isinstance(noise, timeseries.Timeseries): - for channel_name in original.channel_names: - channel = original._get_channel(channel_name) - if channel.units != noise.units and channel.units != None and channel.units != Unitless and noise.units != None and noise.units != Unitless: - raise AssertionError( - f"Noise does not have the same units as channel '{channel_name}' of 'original'." - f"Suggestion: If possible, convert one of them first or drop units.") - if channel.sampling_frequency == noise.sampling_frequency: - noisy_channel = __noisy_timeseries(channel, noise) - noisy_channels[channel_name] = noisy_channel - else: - raise AssertionError( - f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." - f"Suggestion: Resample one of them first.") - - - elif isinstance(noise, Biosignal): - # Case Biosignal channel-wise - if original.channel_names == noise.channel_names: - for channel_name in original.channel_names: - original_channel = original._get_channel(channel_name) - noise_channel = noise._get_channel(channel_name) - if original_channel.units != noise_channel.units and original_channel.units != None and original_channel.units != Unitless and noise_channel.units != None and noise_channel.units != Unitless: - raise AssertionError( - f"Noise does not have the same units as channel '{channel_name}' of 'original'." - f"Suggestion: If possible, convert one of them first or drop units.") - if original_channel.sampling_frequency == noise_channel.sampling_frequency: - noisy_channel = __noisy_timeseries(original_channel, noise_channel) - noisy_channels[channel_name] = noisy_channel - else: - raise AssertionError(f"Channels '{channel_name}' do not have the same sampling frequency in 'original' and 'noise'." - f"Suggestion: Resample one of them first.") - - # Case Biosignal unique channel - elif len(noise) == 1: - _, x = tuple(iter(noise))[0] - for channel_name in original.channel_names: - channel = original._get_channel(channel_name) - if channel.units != x.units and channel.units != None and channel.units != Unitless and x.units != None and x.units != Unitless: - raise AssertionError( - f"Noise does not have the same units as channel '{channel_name}' of 'original'." - f"Suggestion: If possible, convert one of them first or drop units.") - if channel.sampling_frequency == x.sampling_frequency: - noisy_channel = __noisy_timeseries(channel, x) - noisy_channels[channel_name] = noisy_channel - else: - raise AssertionError(f"Noise does not have the same sampling frequency as channel '{channel_name}' of 'original'." - f"Suggestion: Resample one of them first.") - - else: - raise ArithmeticError("Noise should have 1 channel only (to be added to every channel of 'original') " - "or the same channels as 'original' (for each to be added to the corresponding channel of 'original'.") - - events = events = set(original.__associated_events.values()).union(set(noise._Biosignal__associated_events.values())) if isinstance( - noise, (Biosignal, timeseries.Timeseries)) else None - - return original._new(timeseries=noisy_channels, name=name if name is not None else 'Noisy ' + original.name, - events=events, added_noise=noise) - - def restructure_domain(self, time_intervals: tuple[DateTimeRange]): - domain = self.domain - if len(domain) >= len(time_intervals): - for _, channel in self: - # 1. Concatenate segments - channel._concatenate_segments() - # 2. Partition according to new domain - channel._partition(time_intervals) - else: - NotImplementedError("Not yet implemented.") - - def undo_segmentation(self, time_intervals: tuple[DateTimeRange]): - for _, channel in self: - channel._merge(time_intervals) - - def tag(self, tags: str | tuple[str]): - """ - Mark all channels with a tag. Useful to mark machine learning targets. - :param tags: The label or labels to tag the channels. - :return: None - """ - if isinstance(tags, str): - for _, channel in self: - channel.tag(tags) - elif isinstance(tags, tuple) and all(isinstance(x, str) for x in tags): - for x in tags: - for _, channel in self: - channel.tag(x) - else: - raise TypeError("Give one or multiple string labels to tag the channels.") - - @classmethod - def fromNoise(cls, - noises: Noise | Dict[str | BodyLocation, Noise], - time_intervals: DateTimeRange | tuple[DateTimeRange], - name: str = None): - """ - Creates a type of Biosignal from a noise source. - - :param noises: - - If a Noise object is given, the Biosignal will have 1 channel for the specified time interval. - - If a dictionary of Noise objects is given, the Biosignal will have multiple channels, with different - generated samples, for the specified time interval, named after the dictionary keys. - - :param time_interval: Interval [x, y[ where x will be the initial date and time of every channel, and y will be - the final date and time of every channel; on a union of intervals, in case a tuple is given. - - :param name: The name to be associated to the Biosignal. Optional. - - :return: Biosignal subclass - """ - - if not isinstance(time_intervals, DateTimeRange) and isinstance(time_intervals, tuple) and \ - not all([isinstance(x, DateTimeRange) for x in time_intervals]): - raise TypeError(f"Parameter 'time_interval' should be of type DateTimeRange or a tuple of them.") - - if isinstance(time_intervals, tuple) and len(time_intervals) == 1: - time_intervals = time_intervals[0] - - channels = {} - - if isinstance(noises, Noise): - if isinstance(time_intervals, DateTimeRange): - samples = noises[time_intervals.timedelta] - channels[noises.name] = timeseries.Timeseries(samples, time_intervals.start_datetime, noises.sampling_frequency, - units=Unitless(), name=noises.name) - else: - segments = {x.start_datetime: noises[x.timedelta] for x in time_intervals} - channels[noises.name] = timeseries.Timeseries.withDiscontiguousSegments(segments, noises.sampling_frequency, - units=Unitless(), name=noises.name) - - elif isinstance(noises, dict): - if isinstance(time_intervals, DateTimeRange): - for channel_name, noise in noises.items(): - samples = noise[time_intervals.timedelta] - channels[channel_name] = timeseries.Timeseries(samples, time_intervals.start_datetime, noise.sampling_frequency, - units=Unitless(), name=noise.name + f" : {channel_name}") - else: - for channel_name, noise in noises.items(): - segments = {x.start_datetime: noise[x.timedelta] for x in time_intervals} - channels[channel_name] = timeseries.Timeseries.withDiscontiguousSegments(segments, noise.sampling_frequency, - units=Unitless(), - name=noise.name + f" : {channel_name}") - - return cls(channels, name=name) - - def acquisition_scores(self): - print(f"Acquisition scores for '{self.name}'") - completness_score = self.completeness_score() - print("Completness Score = " + ("%.2f" % (completness_score * 100) + "%" if completness_score else "n.d.")) - onbody_score = self.onbody_score() - print("On-body Score = " + ("%.2f" % (onbody_score * 100) + "%" if onbody_score else "n.d.")) - quality_score = self.quality_score(_onbody_duration=onbody_score * self.duration if onbody_score else self.duration) - print("Quality Score = " + ("%.2f" % (quality_score * 100) + "%" if quality_score else "n.d.")) - - def completeness_score(self): - recorded_duration = self.duration - expected_duration = self.final_datetime - self.initial_datetime - return recorded_duration / expected_duration - - def onbody_score(self): - if hasattr(self.source, 'onbody'): # if the BS defines an 'onbody' method, then this score exists, it's computed and returned - x = self.source.onbody(self) - if x: - return self.source.onbody(self).duration / self.duration - - def quality_score(self, _onbody_duration=None): - if _onbody_duration: - if hasattr(self, - 'acceptable_quality'): # if the Biosignal modality defines an 'acceptable_quality' method, then this score exists, it's computed and returned - return self.acceptable_quality().duration / _onbody_duration - else: - if hasattr(self, 'acceptable_quality') and hasattr(self.source, 'onbody'): - return self.acceptable_quality().duration / self.source.onbody(self).duration - - # =================================== - # SERIALIZATION - - def __getstate__(self): - """ - 1: __name (str) - 2: __source (BS subclass (instantiated or not)) - 3: __patient (Patient) - 4: __acquisition_location (BodyLocation) - 5: __associated_events (tuple) - 6: __timeseries (dict) - """ - return (self.__SERIALVERSION, self.__name, self.__source, self.__patient, self.__acquisition_location, - tuple(self.__associated_events.values()), self.__timeseries) - - def __setstate__(self, state): - if state[0] in (1, 2): - self.__name, self.__source, self.__patient, self.__acquisition_location = state[1:5] - self.__timeseries = state[6] - self.__associated_events = {} - self.associate(state[5]) - else: - raise IOError( - f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' - f'Supported versions: 1 and 2.') - - EXTENSION = '.biosignal' - - def save(self, save_to: str): - # Check extension - if not save_to.endswith(Biosignal.EXTENSION): - save_to += Biosignal.EXTENSION - - # Make memory maps - temp_dir = mkdtemp(prefix='ltbio.') - for _, channel in self: - channel._memory_map(temp_dir) - - # Write - from _pickle import dump - with open(save_to, 'wb') as f: - dump(self, f) - - # Clean up memory maps - rmtree(temp_dir) - - @classmethod - def load(cls, filepath: str): - # Check extension - if not filepath.endswith(Biosignal.EXTENSION): - raise IOError("Only .biosignal files are allowed.") - - from _pickle import load - from _pickle import UnpicklingError - - # Read - try: # Versions >= 2023.0: - f = open(filepath, 'rb') - biosignal = load(f) - except UnpicklingError as e: # Versions 2022.0, 2022.1 and 2022.2: - from bz2 import BZ2File - print( - "Loading...\nNote: Loading a version older than 2023.0 takes significantly more time. It is suggested you save this Biosignal again, so you can have it in the newest fastest format.") - f = BZ2File(filepath, 'rb') - biosignal = load(f) - f.close() - return biosignal - - -class MultimodalBiosignal(Biosignal): - - def __init__(self, **biosignals): - - timeseries = {} - # sources = {} - patient = None - # locations = {} - name = "Union of" - events = {} - - for label, biosignal in biosignals.items(): - if patient is None: - patient = biosignal._Biosignal__patient - elif patient != biosignal._Biosignal__patient: - raise ValueError("When joining Biosignals, they all must be from the same Patient.") - - for channel_label, ts in biosignal._to_dict().items(): - timeseries[label + ':' + channel_label] = ts # Join Timeseries in a single dictionary - - # sources[label] = biosignal.source # Join sources - - # if biosignal.acquisition_location is not None: - # locations[label] = biosignal.acquisition_location - - name += f" '{biosignal.name}'," if biosignal.name != "No Name" else f" '{label}'," - - for event in biosignal.events: - if event.name in events and events[event.name] != event: - raise ValueError("There are two event names associated to different onsets/offsets in this set of Biosignals.") - else: - events[event.name] = event - - super(MultimodalBiosignal, self).__init__(timeseries, None, patient, None, name[:-1]) - self.associate(events) - self.__biosignals = biosignals - - if (len(self.type)) == 1: - raise TypeError("Cannot create Multimodal Biosignal of just 1 modality.") - - @property - def type(self): - return {biosignal.type for biosignal in self.__biosignals.values()} - - @property - def source(self) -> Set[BS]: - return {biosignal.source for biosignal in self.__biosignals.values()} - - @property - def acquisition_location(self) -> Set[BodyLocation]: - return {biosignal.acquisition_location for biosignal in self.__biosignals.values()} - - def __getitem__(self, item): - if isinstance(item, tuple): - if len(item) == 2: - biosignal = self.__biosignals[item[0]] - return biosignal[item[1]] - - elif isinstance(item, str) and item in self.__biosignals.keys(): - return self.__biosignals[item] - - raise IndexError("Indexing a Multimodal Biosignal should have two arguments, like 'multisignal['ecg'][V5]," - "where 'ecg' is the Biosignal to address and 'V5' is the channel to get.") - - def __contains__(self, item): - if isinstance(item, str) and item in self.__biosignals.keys(): - return True - if isinstance(item, Biosignal) and item in self.__biosignals.values(): - return True - - super(MultimodalBiosignal, self).__contains__(item) - - def __str__(self): - '''Returns a textual description of the MultimodalBiosignal.''' - res = f"MultimodalBiosignal containing {len(self.__biosignals)}:\n" - for i, biosignal in enumerate(self.__biosignals): - res += "({})\n{}".format(i, str(biosignal)) - return res - - def plot_summary(self, show: bool = True, save_to: str = None): - raise TypeError("Functionality not available for Multimodal Biosignals.") - - -def plot_comparison(biosignals: Collection[Biosignal], show: bool = True, save_to: str = None): - # Check parameters - if not isinstance(biosignals, Collection): - raise TypeError("Parameter 'biosignals' should be a collection of Biosignal objects.") +def plot_comparison(*biosignals: Biosignal, show: bool = True, save_to: str = None): channel_names = None for item in biosignals: @@ -1648,7 +54,6 @@ def plot_comparison(biosignals: Collection[Biosignal], show: bool = True, save_t fig.savefig(save_to) plt.show() if show else plt.close() - def cross_correlation(biosignal1: Biosignal, biosignal2: Biosignal): # Check parameters if not isinstance(biosignal1, Biosignal) or len(biosignal1) != 1: diff --git a/src/ltbio/biosignals/__init__.pyi b/src/ltbio/biosignals/__init__.pyi new file mode 100644 index 00000000..52c43c55 --- /dev/null +++ b/src/ltbio/biosignals/__init__.pyi @@ -0,0 +1,37 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# Package: biosignals.modalities +# =================================== + +from multipledispatch import dispatch + +from ._Biosignal import Biosignal, DerivedBiosignal, MultimodalBiosignal +from ._BiosignalSource import BiosignalSource +from ._Timeseries import Timeseries +from ._Segment import Segment +from ._Timeline import Timeline +from _Event import Event + +__all__ = [ + "Biosignal", + "DerivedBiosignal", + "MultimodalBiosignal", + "BiosignalSource", + "Timeseries", + "Segment", + "Timeline", + "Event", + "plot", +] + +__all__ += ["modalities", "derived_modalities", "sources"] + +# PLOTTING +@dispatch(Biosignal, bool, str) +def plot(*biosignals: Biosignal, show: bool = True, save_to: str = None) -> None: ... +@dispatch(Timeseries, bool, str) +def plot(*timereries: Timeseries, show: bool = True, save_to: str = None) -> None: ... +@dispatch(Timeline, bool, str) +def plot(timeline: Timeline, show: bool = True, save_to: str = None) -> None: ... + diff --git a/src/ltbio/biosignals/derived.py b/src/ltbio/biosignals/derived.py deleted file mode 100644 index 0addcfbf..00000000 --- a/src/ltbio/biosignals/derived.py +++ /dev/null @@ -1,96 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: src/ltbio/biosignals -# Module: pseudo -# Description: - -# Contributors: João Saraiva -# Created: 07/03/2023 - -# =================================== - -class DerivedBiosignal(Biosignal): - """ - A DerivedBiosignal is a set of Timeseries of some extracted feature from an original Biosignal. - It is such a feature that it is useful to manipulate it as any other Biosignal. - """ - - def __init__(self, timeseries, source = None, patient = None, acquisition_location = None, name = None, original: Biosignal = None): - if original is not None: - super().__init__(timeseries, original.source, original._Biosignal__patient, original.acquisition_location, original.name) - else: - super().__init__(timeseries, source, patient, acquisition_location, name) - - self.original = original # Save reference - - -class ACCMAG(DerivedBiosignal): - - DEFAULT_UNIT = G(Multiplier._) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: ACC | None = None): - super().__init__(timeseries, source, patient, acquisition_location, name, original) - - @classmethod - def fromACC(cls): - pass - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - -class RRI(DerivedBiosignal): - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: ECG | None = None): - super().__init__(timeseries, source, patient, acquisition_location, name, original) - - @classmethod - def fromECG(cls): - pass - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - -class HR(DerivedBiosignal): - - DEFAULT_UNIT = BeatsPerMinute() - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: RRI | IBI | ECG | PPG | None = None): - super(HR, self).__init__(timeseries, source, patient, acquisition_location, name, original) - - @classmethod - def fromRRI(cls): - pass - - @classmethod - def fromIBI(cls): - pass - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - def acceptable_quality(self): # -> Timeline - """ - Acceptable physiological values - """ - return self.when(lambda x: 40 <= x <= 200) # between 40-200 bpm - - -class IBI(DerivedBiosignal): - - DEFAULT_UNIT = Second() - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, original: PPG | None = None): - super().__init__(timeseries, source, patient, acquisition_location, name, original) - - @classmethod - def fromPPG(cls): - pass - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - diff --git a/src/ltbio/biosignals/derived_modalities/__init__.py b/src/ltbio/biosignals/derived_modalities/__init__.py new file mode 100644 index 00000000..c95ccf9e --- /dev/null +++ b/src/ltbio/biosignals/derived_modalities/__init__.py @@ -0,0 +1,11 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.derived_modalities +# +# Contributors: João Saraiva +# Created: 09/06/2023 +# Last Updated: 09/06/2023 +# =================================== diff --git a/src/ltbio/biosignals/derived_modalities/__init__.pyi b/src/ltbio/biosignals/derived_modalities/__init__.pyi new file mode 100644 index 00000000..97140531 --- /dev/null +++ b/src/ltbio/biosignals/derived_modalities/__init__.pyi @@ -0,0 +1,9 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# Package: biosignals.derived_modalities +# +# Description: Derived modalities that are usually processed like any other biosignal, but they are not directly +# acquired from the sensor. Instead, they are computed (e.g. a feature) from one or more biosignals. +# =================================== diff --git a/src/ltbio/biosignals/derived_modalities/motion.py b/src/ltbio/biosignals/derived_modalities/motion.py new file mode 100644 index 00000000..07065ab9 --- /dev/null +++ b/src/ltbio/biosignals/derived_modalities/motion.py @@ -0,0 +1,28 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.derived_modalities +# Module: motion +# +# Contributors: João Saraiva +# Created: 07/03/2023 +# Last Updated: 12/06/2023 +# =================================== + +from multipledispatch import dispatch + +from .._Biosignal import DerivedBiosignal +from ..modalities import ACC + + +class ACCMAG(DerivedBiosignal): + """ + Magnitude from a 3-axial acceleration biosignal. + """ + + @classmethod + @dispatch(ACC) + def derived_from(cls, biosignal: ACC): + pass diff --git a/src/ltbio/biosignals/derived_modalities/motion.pyi b/src/ltbio/biosignals/derived_modalities/motion.pyi new file mode 100644 index 00000000..bcb1a1ff --- /dev/null +++ b/src/ltbio/biosignals/derived_modalities/motion.pyi @@ -0,0 +1,23 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.derived_modalities +# Module: motion +# +# Description: Motion-related derived biosignal modalities +# =================================== + +from multipledispatch import dispatch + +from .._Biosignal import DerivedBiosignal +from ..modalities import ACC +from ..units import G + +class ACCMAG(DerivedBiosignal): + DEFAULT_UNIT = G() + + @classmethod + @dispatch(ACC) + def derived_from(cls, biosignal: ACC): ... diff --git a/src/ltbio/biosignals/derived_modalities/pulse.py b/src/ltbio/biosignals/derived_modalities/pulse.py new file mode 100644 index 00000000..437efd91 --- /dev/null +++ b/src/ltbio/biosignals/derived_modalities/pulse.py @@ -0,0 +1,49 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.derived_modalities +# Module: pulse +# +# Contributors: João Saraiva +# Created: 02/06/2022 +# Last Updated: 12/06/2023 +# =================================== + +from multipledispatch import dispatch + +from .._Biosignal import DerivedBiosignal +from ..modalities import ECG +from ..modalities import PPG + + +class RRI(DerivedBiosignal): + + @classmethod + @dispatch(ECG) + def derived_from(cls, biosignal: ECG): ... + + +class IBI(DerivedBiosignal): + + @classmethod + @dispatch(PPG) + def derived_from(cls, biosignal: PPG): ... + + +class HR(DerivedBiosignal): + + @classmethod + @dispatch(RRI) + def derived_from(cls, biosignal: RRI): ... + + @classmethod + @dispatch(IBI) + def derived_from(cls, biosignal: IBI): ... + + def acceptable_quality(self): # -> Timeline + """ + Acceptable physiological values + """ + return self.when(lambda x: 40 <= x <= 200) # between 40-200 bpm diff --git a/src/ltbio/biosignals/derived_modalities/pulse.pyi b/src/ltbio/biosignals/derived_modalities/pulse.pyi new file mode 100644 index 00000000..03789c74 --- /dev/null +++ b/src/ltbio/biosignals/derived_modalities/pulse.pyi @@ -0,0 +1,46 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.derived_modalities +# Module: pulse +# +# Description: Motion-related derived biosignal modalities +# =================================== + + +from multipledispatch import dispatch + +from .._Biosignal import DerivedBiosignal +from ..modalities import ECG +from ..modalities import PPG +from ..units import Second, BeatsPerMinute, Multiplier + + +class RRI(DerivedBiosignal): + DEFAULT_UNIT = Second(Multiplier.m) + + @classmethod + @dispatch(ECG) + def derived_from(cls, biosignal: ECG): ... + + +class IBI(DerivedBiosignal): + DEFAULT_UNIT = Second(Multiplier.m) + + @classmethod + @dispatch(PPG) + def derived_from(cls, biosignal: PPG): ... + + +class HR(DerivedBiosignal): + DEFAULT_UNIT = BeatsPerMinute() + + @classmethod + @dispatch(RRI) + def derived_from(cls, biosignal: RRI): ... + + @classmethod + @dispatch(IBI) + def derived_from(cls, biosignal: IBI): ... diff --git a/src/ltbio/biosignals/modalities/_ACC.py b/src/ltbio/biosignals/modalities/_ACC.py new file mode 100644 index 00000000..253c65b7 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_ACC.py @@ -0,0 +1,18 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: ACC +# +# Contributors: João Saraiva +# Created: 12/05/2022 +# Last Updated: 07/07/2022 +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal + + +class ACC(Biosignal): + ... diff --git a/src/ltbio/biosignals/modalities/_ACC.pyi b/src/ltbio/biosignals/modalities/_ACC.pyi new file mode 100644 index 00000000..b8da92ca --- /dev/null +++ b/src/ltbio/biosignals/modalities/_ACC.pyi @@ -0,0 +1,17 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: ACC +# +# Description: Accelerometry (also known as accelerometer, actigraphy or ACC) biosignal. +# =================================== + +from .._Biosignal import Biosignal +from ..units import G + + +class ACC(Biosignal): + DEFAULT_UNIT = G() diff --git a/src/ltbio/biosignals/modalities.py b/src/ltbio/biosignals/modalities/_ECG.py similarity index 74% rename from src/ltbio/biosignals/modalities.py rename to src/ltbio/biosignals/modalities/_ECG.py index 7179b02d..7125c076 100644 --- a/src/ltbio/biosignals/modalities.py +++ b/src/ltbio/biosignals/modalities/_ECG.py @@ -1,22 +1,16 @@ # -- encoding: utf-8 -- - +# # =================================== - -# IT - LongTermBiosignals - -# Package: src/ltbio/biosignals -# Module: modalities -# Description: - -# Contributors: João Saraiva, Mariana Abreu +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: ECG +# +# Contributors: João Saraiva, Mariana Abreu, Rafael Silva # Created: 12/05/2022 -# Last Updated: 07/03/2023 - +# Last Updated: 10/08/2022 # =================================== -from ltbio.biosignals.modalities.Biosignal import Biosignal, DerivedBiosignal -from ltbio.biosignals.timeseries.Unit import * - from datetime import timedelta from statistics import mean from typing import Callable @@ -24,61 +18,27 @@ import numpy as np import traces from biosppy.plotting import plot_ecg -from biosppy.signals.ecg import hamilton_segmenter, correct_rpeaks, extract_heartbeats, ecg as biosppyECG, christov_segmenter, \ +from biosppy.signals.ecg import hamilton_segmenter, correct_rpeaks, extract_heartbeats, ecg as biosppyECG, \ + christov_segmenter, \ engzee_segmenter -from biosppy.signals.tools import get_heart_rate, _filter_signal from biosppy.signals.ecg import sSQI, kSQI, pSQI, fSQI, bSQI, ZZ2018 +from biosppy.signals.tools import get_heart_rate, _filter_signal from numpy import linspace, ndarray, average, array -from ltbio.biosignals.modalities.Biosignal import Biosignal, DerivedBiosignal -from .. import timeseries as _timeseries -from ltbio.biosignals.timeseries.Unit import Volt, Multiplier, BeatsPerMinute, Second - - -# =================================== -# Mechanical Modalities -# =================================== - -class ACC(Biosignal): - - DEFAULT_UNIT = G(Multiplier._) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(ACC, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - -class RESP(Biosignal): - - DEFAULT_UNIT = Volt(Multiplier.m) +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals.units import Multiplier, BeatsPerMinute, Second +from .. import Timeseries - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(RESP, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show:bool=True, save_to:str=None): - pass - - -# =================================== -# Electrical Modalities -# =================================== class ECG(Biosignal): - DEFAULT_UNIT = Volt(Multiplier.m) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(ECG, self).__init__(timeseries, source, patient, acquisition_location, name) - def plot_summary(self, show: bool = True, save_to: str = None): for channel_name in self.channel_names: channel = self._Biosignal__timeseries[channel_name] for segment in channel.segments: if save_to is not None: - save_to += '_{0}_from_{1}_to_{2}'.format(channel_name, str(segment.initial_datetime), str(segment.final_datetime)) + save_to += '_{0}_from_{1}_to_{2}'.format(channel_name, str(segment.start), str(segment.end)) if not segment.is_filtered: # compute info with biosppy default filtering print("Using biosppy filtered version to show a summary") @@ -119,7 +79,7 @@ def __biosppy_r_indices(signal, sampling_rate, algorithm_method, **kwargs) -> nd corrected_indices = correct_rpeaks(signal, indices, sampling_rate)['rpeaks'] # Correct indices return corrected_indices - def __r_indices(self, channel: _timeseries.Timeseries, segmenter: Callable = hamilton_segmenter): + def __r_indices(self, channel: Timeseries, segmenter: Callable = hamilton_segmenter): r_indices = channel._apply_operation_and_return(self.__biosppy_r_indices, sampling_rate=channel.sampling_frequency, @@ -260,7 +220,7 @@ def hr(self, smooth_length: float = None): all_hr_channels[channel_name] = hr_channel - from ltbio.biosignals.modalities.HR import HR + from ltbio.biosignals.derived_modalities.pulse import HR return HR(all_hr_channels, self.source, self._Biosignal__patient, self.acquisition_location, 'Heart Rate of ' + self.name, original_signal=self) @@ -529,153 +489,11 @@ def zhaoSQI(self, by_segment: bool = False): peaks1 = self.__r_indices(channel, hamilton_segmenter) peaks2 = self.__r_indices(channel, christov_segmenter) - def aux(signal, p1, p2, **kwargs): - return ZZ2018(signal, p1, p2, **kwargs) - - res[channel_name] = [channel._apply_operation_and_return(aux, fs=channel.sampling_frequency, search_window=100, nseg=1024, mode='fuzzy') + res[channel_name] = [channel._apply_operation_and_return(ZZ2018, p1, p2, fs=channel.sampling_frequency, search_window=100, nseg=1024, mode='fuzzy') for p1, p2 in zip(peaks1, peaks2)] if not by_segment: res[channel_name] = average(array(res[channel_name]), weights=list(map(lambda subdomain: subdomain.timedelta.total_seconds(), channel.domain))) -class EDA(Biosignal): - - DEFAULT_UNIT = Volt(Multiplier.m) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(EDA, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - @property - def preview(self): - """Returns 2 minutes of the middle of the signal.""" - domain = self.domain - middle_of_domain: DateTimeRange = domain[len(domain) // 2] - middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) - try: - return self[middle - timedelta(seconds=2): middle + timedelta(minutes=2)] - except IndexError: - raise AssertionError( - f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") - - @staticmethod - def racSQI(samples): - """ - Rate of Amplitude change (RAC) - It is recomended to be analysed in windows of 2 seconds. - """ - max_, min_ = max(samples), min(samples) - amplitude = max_ - min_ - return abs(amplitude / max_) - - def acceptable_quality(self): # -> Timeline - """ - Suggested by Böttcher et al. Scientific Reports, 2022, for wearable wrist EDA. - """ - return self.when(lambda x: mean(x) > 0.05 and EDA.racSQI(x) < 0.2, window=timedelta(seconds=2)) - -class EEG(Biosignal): - - DEFAULT_UNIT = Volt(Multiplier.m) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(EEG, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - -class EMG(Biosignal): - - DEFAULT_UNIT = Volt(Multiplier.m) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(EMG, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - -# =================================== -# Optical modalities -# =================================== - -class PPG(Biosignal): - - DEFAULT_UNIT = None - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None, **options): - super(PPG, self).__init__(timeseries, source, patient, acquisition_location, name, **options) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - def acceptable_quality(self): # -> Timeline - """ - Suggested for wearable wrist PPG by: - - Glasstetter et al. MDPI Sensors, 21, 2021 - - Böttcher et al. Scientific Reports, 2022 - """ - - sfreq = self.sampling_frequency - nperseg = int(4 * self.sampling_frequency) # 4 s window - fmin = 0.1 # Hz - fmax = 5 # Hz - - def spectral_entropy(x, sfreq, nperseg, fmin, fmax): - if len(x) < nperseg: # if segment smaller than 4s - nperseg = len(x) - noverlap = int(0.9375 * nperseg) # if nperseg = 4s, then 3.75 s of overlap - f, psd = welch(x, sfreq, nperseg=nperseg, noverlap=noverlap) - idx_min = np.argmin(np.abs(f - fmin)) - idx_max = np.argmin(np.abs(f - fmax)) - psd = psd[idx_min:idx_max] - psd /= np.sum(psd) # normalize the PSD - entropy = -np.sum(psd * np.log2(psd)) - N = idx_max - idx_min - entropy_norm = entropy / np.log2(N) - return entropy_norm - - return self.when(lambda x: spectral_entropy(x, sfreq, nperseg, fmin, fmax) < 0.8, window=timedelta(seconds=4)) - - -class TEMP(Biosignal): - - DEFAULT_UNIT = DegreeCelsius(Multiplier._) - - def __init__(self, timeseries, source=None, patient=None, acquisition_location=None, name=None): - super(TEMP, self).__init__(timeseries, source, patient, acquisition_location, name) - - def plot_summary(self, show: bool = True, save_to: str = None): - pass - - @property - def preview(self): - """Returns 2 minutes of the middle of the signal.""" - domain = self.domain - middle_of_domain: DateTimeRange = domain[len(domain) // 2] - middle = middle_of_domain.start_datetime + (middle_of_domain.timedelta / 2) - try: - return self[middle - timedelta(seconds=2): middle + timedelta(minutes=2)] - except IndexError: - raise AssertionError( - f"The middle segment of {self.name} from {self.patient_code} does not have at least 5 seconds to return a preview.") - - @staticmethod - def racSQI(samples): - """ - Rate of Amplitude change (RAC) - It is recomended to be analysed in windows of 2 seconds. - """ - max_, min_ = max(samples), min(samples) - amplitude = max_ - min_ - return abs(amplitude / max_) - - def acceptable_quality(self): # -> Timeline - """ - Suggested by Böttcher et al. Scientific Reports, 2022, for wearable wrist TEMP. - """ - return self.when(lambda x: 25 < mean(x) < 40 and TEMP.racSQI(x) < 0.2, window=timedelta(seconds=2)) diff --git a/src/ltbio/biosignals/modalities/_ECG.pyi b/src/ltbio/biosignals/modalities/_ECG.pyi new file mode 100644 index 00000000..7e89c8ba --- /dev/null +++ b/src/ltbio/biosignals/modalities/_ECG.pyi @@ -0,0 +1,51 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: ECG +# +# Description: Electrocardiogram (also known as electrocardiography, ECG or EKG) biosignal. +# =================================== + +from datetime import datetime + +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals.units import Volt, Multiplier +from ..derived_modalities.pulse import HR, RRI + + +class ECG(Biosignal): + + DEFAULT_UNIT = Volt(Multiplier.m) + + def plot_summary(self, show: bool = True, save_to: str = None) -> None: ... + + def r_timepoints(self, algorithm = 'hamilton', _by_segment = False) -> tuple[datetime]: ... + + def heartbeats(self, before=0.2, after=0.4) -> ECG: ... + + def hr(self, smooth_length: float = None) -> HR: ... + + def nni(self) -> RRI: ... + + def invert_if_necessary(self) -> None: ... + + # Quality Metrics + + def skewness(self, by_segment: bool = False) -> dict[str: float | list[float]]: ... + + def kurtosis(self, by_segment: bool = False) -> dict[str: float | list[float]]: ... + + def flatline_percentage(self, by_segment: bool = False) -> dict[str: float | list[float]]: ... + + def basSQI(self, by_segment: bool = False) -> dict[str: float | list[float]]: ... + + def bsSQI(self, by_segment: bool = False) -> dict[str: float | list[float]]: ... + + def pSQI(self, by_segment: bool = False) -> dict[str: float | list[float]]: ... + + def qSQI(self, by_segment: bool = False) -> dict[str: float | list[float]]: ... + + def zhaoSQI(self, by_segment: bool = False) -> dict[str: float | list[float]]: ... diff --git a/src/ltbio/biosignals/modalities/_EDA.py b/src/ltbio/biosignals/modalities/_EDA.py new file mode 100644 index 00000000..67557e8e --- /dev/null +++ b/src/ltbio/biosignals/modalities/_EDA.py @@ -0,0 +1,25 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: EDA +# +# Contributors: João Saraiva, Mariana Abreu +# Created: 12/05/2022 +# Last Updated: 07/07/2022 +# =================================== +from multimethod import multimethod + +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals._BiosignalSource import BiosignalSource +from ltbio.biosignals._Timeseries import Timeseries +from ltbio.clinical import BodyLocation, Patient + + +class EDA(Biosignal): + @multimethod + def __init__(self, timeseries: dict[str | BodyLocation, Timeseries], source: BiosignalSource = None, + patient: Patient = None, acquisition_location: BodyLocation = None, name: str = None): + super().__init__(timeseries, source, patient, acquisition_location, name) diff --git a/src/ltbio/biosignals/modalities/_EDA.pyi b/src/ltbio/biosignals/modalities/_EDA.pyi new file mode 100644 index 00000000..2f2dfea6 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_EDA.pyi @@ -0,0 +1,17 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: EDA +# +# Description: Electrodermal Activity (also known as EDA, galvanic skin response or GSR) biosignal. +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals.units import Volt, Multiplier + + +class EDA(Biosignal): + DEFAULT_UNIT = Volt(Multiplier.m) diff --git a/src/ltbio/biosignals/modalities/_EEG.py b/src/ltbio/biosignals/modalities/_EEG.py new file mode 100644 index 00000000..eb829f74 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_EEG.py @@ -0,0 +1,18 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: EEG +# +# Contributors: João Saraiva +# Created: 12/05/2022 +# Last Updated: 07/07/2022 +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal + + +class EEG(Biosignal): + ... diff --git a/src/ltbio/biosignals/modalities/_EEG.pyi b/src/ltbio/biosignals/modalities/_EEG.pyi new file mode 100644 index 00000000..afdb262e --- /dev/null +++ b/src/ltbio/biosignals/modalities/_EEG.pyi @@ -0,0 +1,17 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: EEG +# +# Description: Electroencephalogram (also known as electroencephalography or EEG) biosignal. +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals.units import Volt, Multiplier + + +class EEG(Biosignal): + DEFAULT_UNIT = Volt(Multiplier.m) diff --git a/src/ltbio/biosignals/modalities/_EMG.py b/src/ltbio/biosignals/modalities/_EMG.py new file mode 100644 index 00000000..cf83e5f2 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_EMG.py @@ -0,0 +1,18 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: EMG +# +# Contributors: João Saraiva +# Created: 12/05/2022 +# Last Updated: 07/07/2022 +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal + + +class EMG(Biosignal): + ... diff --git a/src/ltbio/biosignals/modalities/_EMG.pyi b/src/ltbio/biosignals/modalities/_EMG.pyi new file mode 100644 index 00000000..c0f29e32 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_EMG.pyi @@ -0,0 +1,17 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: EMG +# +# Description: Electromyogram (also known as electromyography or EMG) biosignal. +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals.units import Volt, Multiplier + + +class EMG(Biosignal): + DEFAULT_UNIT = Volt(Multiplier.m) diff --git a/src/ltbio/biosignals/modalities/_PPG.py b/src/ltbio/biosignals/modalities/_PPG.py new file mode 100644 index 00000000..2adbeeb8 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_PPG.py @@ -0,0 +1,18 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: PPG +# +# Contributors: João Saraiva +# Created: 12/05/2022 +# Last Updated: 09/07/2022 +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal + + +class PPG(Biosignal): + ... diff --git a/src/ltbio/biosignals/modalities/_PPG.pyi b/src/ltbio/biosignals/modalities/_PPG.pyi new file mode 100644 index 00000000..0fe5092a --- /dev/null +++ b/src/ltbio/biosignals/modalities/_PPG.pyi @@ -0,0 +1,17 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: PPG +# +# Description: Photoplethysmogram (also known as photoplethysmography or PPG) biosignal. +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals.units import Unitless + + +class PPG(Biosignal): + DEFAULT_UNIT = Unitless() diff --git a/src/ltbio/biosignals/modalities/_RESP.py b/src/ltbio/biosignals/modalities/_RESP.py new file mode 100644 index 00000000..d5ee8fb5 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_RESP.py @@ -0,0 +1,18 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: RESP +# +# Contributors: João Saraiva, Mariana Abreu +# Created: 12/05/2022 +# Last Updated: 29/06/2022 +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal + + +class RESP(Biosignal): + ... diff --git a/src/ltbio/biosignals/modalities/_RESP.pyi b/src/ltbio/biosignals/modalities/_RESP.pyi new file mode 100644 index 00000000..49fa22a6 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_RESP.pyi @@ -0,0 +1,17 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: RESP +# +# Description: Respirogram (also known as respiration or RESP) biosignal. +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals.units import Meter, Multiplier + + +class RESP(Biosignal): + DEFAULT_UNIT = Meter(Multiplier.c) # cm diff --git a/src/ltbio/biosignals/modalities/_TEMP.py b/src/ltbio/biosignals/modalities/_TEMP.py new file mode 100644 index 00000000..030eba86 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_TEMP.py @@ -0,0 +1,18 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: TEMP +# +# Contributors: João Saraiva +# Created: 15/06/2022 +# Last Updated: 09/07/2022 +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal + + +class TEMP(Biosignal): + ... diff --git a/src/ltbio/biosignals/modalities/_TEMP.pyi b/src/ltbio/biosignals/modalities/_TEMP.pyi new file mode 100644 index 00000000..8cfa8864 --- /dev/null +++ b/src/ltbio/biosignals/modalities/_TEMP.pyi @@ -0,0 +1,17 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# Class: TEMP +# +# Description: Temperature (also known as TEMP) biosignal. +# =================================== + +from ltbio.biosignals._Biosignal import Biosignal +from ltbio.biosignals.units import DegreeCelsius + + +class TEMP(Biosignal): + DEFAULT_UNIT = DegreeCelsius() diff --git a/src/ltbio/biosignals/modalities/__init__.py b/src/ltbio/biosignals/modalities/__init__.py new file mode 100644 index 00000000..5061922e --- /dev/null +++ b/src/ltbio/biosignals/modalities/__init__.py @@ -0,0 +1,20 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: biosignals.modalities +# +# Contributors: João Saraiva +# Created: 12/05/2022 +# Last Updated: 09/06/2023 +# =================================== + +from ._ACC import ACC +from ._ECG import ECG +from ._EDA import EDA +from ._EEG import EEG +from ._EMG import EMG +from ._PPG import PPG +from ._RESP import RESP +from ._TEMP import TEMP diff --git a/src/ltbio/biosignals/modalities/__init__.pyi b/src/ltbio/biosignals/modalities/__init__.pyi new file mode 100644 index 00000000..1b5be7f5 --- /dev/null +++ b/src/ltbio/biosignals/modalities/__init__.pyi @@ -0,0 +1,22 @@ +# -- encoding: utf-8 -- +# +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# Package: biosignals.modalities +# +# Description: All commonly used biosignal modalities as classes. Each class offers a set of methods that are specific +# to that modality and that help to process the data in the specific context of that modality. +# =================================== + +# Available Modalities +# (Each is implemented in its own file for legibility) +from ._ACC import ACC +from ._ECG import ECG +from ._EDA import EDA +from ._EEG import EEG +from ._EMG import EMG +from ._PPG import PPG +from ._RESP import RESP +from ._TEMP import TEMP + +__all__ = ["ACC", "ECG", "EDA", "EEG", "EMG", "PPG", "RESP", "TEMP"] diff --git a/src/ltbio/biosignals/sources.py b/src/ltbio/biosignals/sources.py deleted file mode 100644 index 2a3502ef..00000000 --- a/src/ltbio/biosignals/sources.py +++ /dev/null @@ -1,1311 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: src/ltbio/biosignals -# Module: sources -# Description: - -# Contributors: João Saraiva, Mariana Abreu -# Created: 25/04/2022 -# Last Updated: 29/06/2022 - -# =================================== - -from abc import ABC, abstractmethod - -from . import Event -from numpy import array - - -class __BiosignalSource(ABC): - - __SERIALVERSION: int = 1 - - def __init__(self): - pass - - @abstractmethod - def __repr__(self): - pass - - def __eq__(self, other): - return type(self) == type(other) - - @staticmethod - @abstractmethod - def _timeseries(path:str, type, **options): - pass - - @staticmethod - def _events(path:str, **options) -> tuple[Event] | None: - return None # Override implementation is optional - - @staticmethod - @abstractmethod - def _write(path:str, timeseries:dict): - pass - - @staticmethod - @abstractmethod - def _transfer(samples:array, type) -> array: - pass - - @classmethod - def _get(cls, path:str, type, **options): - return { - 'timeseries': cls._timeseries(path, type, **options), - 'patient': cls._patient(path, **options), - 'acquisition_location': cls._acquisition_location(path, type, **options), - 'events': cls._events(path, **options), - 'name': cls._name(path, type, **options) - } - - @staticmethod - def _patient(path, **options): - return None # Override implementation is optional - - @staticmethod - def _acquisition_location(path, type, **options): - return None # Override implementation is optional - - @staticmethod - def _name(path, type, **options): - return None # Override implementation is optional - - def __getstate__(self): - """ - 1: other... (dict) - """ - other_attributes = self.__dict__.copy() - return (self.__SERIALVERSION, ) if len(other_attributes) == 0 else (self.__SERIALVERSION, other_attributes) - - def __setstate__(self, state): - if state[0] == 1: - if len(state) == 2: - self.__dict__.update(state[1]) - else: - raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' - f'Supported versions: 1.') - - -# =================================== -# Hospitals and Clinics -# =================================== - - -from neo import MicromedIO -from numpy import array - -from ..sources.BiosignalSource import BiosignalSource - - -class HEM(BiosignalSource): - '''This class represents the source of Hospital de Santa Maria (Lisboa, PT) and includes methods to read and write - biosignal files provided by them. Usually they are in the European EDF/EDF+ format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "Hospital Egas Moniz" - - @staticmethod - def __read_trc(list, metadata=False): - """ - Return trc file information, whether it is the values or the metadata, according to boolean metadata - :param list - :param metadata - - """ - dirfile = list[0] - sensor = list[1] - # get edf data - seg_micromed = MicromedIO(dirfile) - hem_data = seg_micromed.read_segment() - hem_sig = hem_data.analogsignals[0] - ch_list = seg_micromed.header['signal_channels']['name'] - # get channels that correspond to type (POL Ecg = type ecg) - find_idx = [hch for hch in range(len(ch_list)) if sensor.lower() in ch_list[hch].lower()] - # returns ch_list of interest, sampling frequency, initial datetime - if metadata: - return ch_list[find_idx], float(hem_sig.sampling_rate), hem_data.rec_datetime, hem_sig.units - # returns initial date and samples - print(ch_list[find_idx]) - return array(hem_sig[:, find_idx].T), hem_data.rec_datetime, ch_list[find_idx] - - @staticmethod - def _timeseries(dir, type, **options): - '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient.''' - # first a list is created with all the filenames that end in .edf and are inside the chosen dir - # this is a list of lists where the second column is the type of channel to extract - if type is modalities.ECG: - label = 'ecg' - all_files = sorted([[path.join(dir, file), label] for file in listdir(dir) if file.lower().endswith('.trc')]) - # run the edf read function for all files in list all_files - channels, sfreq, start_datetime, units = HEM.__read_trc(all_files[0], metadata=True) - all_trc = list(map(HEM.__read_trc, all_files)) - # run the trc read function for all files in list all_files - new_dict, first_time = {}, all_trc[0][1] - # TODO ADD UNITS TO TIMESERIES - for channel in channels: - last_start = all_trc[0][1] - segments = {last_start: all_trc[0][0][list(all_trc[0][2]).index(channel)]} - for at, trc_data in enumerate(all_trc[1:]): - if channel not in trc_data[2]: - continue - ch = list(trc_data[2]).index(channel) - final_time = all_trc[at][1] + timedelta(seconds=len(all_trc[at][0][ch])/sfreq) - if trc_data[1] <= final_time: - if (final_time - trc_data[1]) < timedelta(seconds=1): - segments[last_start] = np.append(segments[last_start], trc_data[0][ch]) - else: - continue - print('here') - else: - segments[trc_data[1]] = trc_data[0][ch] - last_start = trc_data[1] - - if len(segments) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch]) - else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch]) - new_dict[channels[ch]] = new_timeseries - - return new_dict - - @staticmethod - def _write(path: str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass - - from ..sources.BiosignalSource import BiosignalSource - - class HSM(BiosignalSource): - '''This class represents the source of Hospital de Santa Maria (Lisboa, PT) and includes methods to read and write - biosignal files provided by them. Usually they are in the European EDF/EDF+ format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "Hospital de Santa Maria" - - @staticmethod - def __read_edf(list, metadata=False): - - """ - Reads one edf file - If metadata is True - returns list of channels and sampling frequency and initial datetime - Else return arrays one for each channel - """ - dirfile = list[0] - sensor = list[1] - # get edf data - hsm_data = read_raw_edf(dirfile) - # get channels that correspond to type (POL Ecg = type ecg) - channel_list = [hch for hch in hsm_data.ch_names if sensor.lower() in hch.lower()] - # initial datetime - if metadata: - return channel_list, hsm_data.info['sfreq'] - # structure of hsm_sig is two arrays, the 1st has one array for each channel and the 2nd is an int-time array - hsm_sig = hsm_data[channel_list] - - return hsm_sig[0], hsm_data.info['meas_date'].replace(tzinfo=None) - - @staticmethod - def _timeseries(dir, type, **options): - '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient.''' - if type is modalities.ECG: - label = 'ecg' - if type is modalities.EMG: - label = 'emg' - if type is modalities.EEG: - label = 'eeg' - # first a list is created with all the filenames that end in .edf and are inside the chosen dir - # this is a list of lists where the second column is the type of channel to extract - all_files = sorted([[path.join(dir, file), label] for file in listdir(dir) if file.endswith('.edf')]) - # run the edf read function for all files in list all_files - channels, sfreq = HSM.__read_edf(all_files[0], metadata=True) - all_edf = list(map(HSM.__read_edf, all_files)) - new_dict = {} - for ch in range(len(channels)): - segments = {edf_data[1]: edf_data[0][ch] for edf_data in all_edf} - if len(segments) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch]) - else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch]) - new_dict[channels[ch]] = new_timeseries - return new_dict - - @staticmethod - def _write(path: str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass - - -# =================================== -# General-purpose Devices -# =================================== - - -import configparser -from ast import literal_eval -from datetime import timedelta -from json import load -from os import listdir, path, access, R_OK -from os.path import getsize -from warnings import warn - -import numpy as np -from dateutil.parser import parse as to_datetime - -from .. import timeseries -from .. import modalities -from ..sources.BiosignalSource import BiosignalSource -from ltbio.clinical.BodyLocation import BodyLocation - - -class Sense(BiosignalSource): - - # Sense Defaults files use these keys: - MODALITIES = 'modalities' - CHANNEL_LABELS = 'labels' - BODY_LOCATION = 'location' - - # Sense csv data files use these keys: - KEY_CH_LABELS_IN_HEADER = 'Channel Labels' - KEY_HZ_IN_HEADER = 'Sampling rate (Hz)' - KEY_TIME_IN_HEADER = 'ISO 8601' - ANALOGUE_LABELS_FORMAT = 'AI{0}_raw' - - # These are needed to map channels to biosignal modalities - DEFAULTS_PATH: str - DEVICE_ID: str - - # Flag to deal with badly-formatted CSV files - BAD_FORMAT = False - - def __init__(self, device_id:str, defaults_path:str=None): - super().__init__() - self.__device_id = device_id - Sense.DEVICE_ID = device_id - if defaults_path is not None: - Sense.DEFAULTS_PATH = defaults_path - else: - if not path.exists('resources/config.ini'): - raise FileNotFoundError('No config.ini was found.') - try: - config = configparser.ConfigParser() - config.read('resources/config.ini') - Sense.DEFAULTS_PATH = config['DEFAULT']['Sense'] - print(f"Getting default mapping from {Sense.DEFAULTS_PATH}") - except IndexError: - raise KeyError("No defaults file indicated 'Sense' devices in config.ini.") - self.__defaults_path = defaults_path - - Sense.BAD_FORMAT = False - - def __repr__(self): - return "ScientISST Sense" - - - @staticmethod - def __aux_date(header): - """ Get starting time from header. """ - return to_datetime(header[Sense.KEY_TIME_IN_HEADER], ignoretz=True) - - @staticmethod - def __check_empty(len_, type=''): - """ Confirm if the length is acceptable and return the desired output. """ - if type == 'file_size': - if len_ <= 50: - return True - else: - if len_ < 1: - return True - return False - - @staticmethod - def __get_mapping(biosignal_type, channel_labels, modalities_available): - """ - Given a header, find all indexes that correspond to biosignal modality of interest. - It REQUIRES a default mapping to be specified in a JSON file, otherwise a mapping will be requested on the stdin and saved for future use. - - @param header: A list of strings corresponding to column names. - @param biosignal_type: Biosignal subclass indicating which modality is of interest. - @param defaults_path: The path to the JSON file containing the mapping in the correct syntax. - - @rtype: tuple - @return: A tuple with: - a) A dictionary with the indexes corresponding to the biosignal modality of interest mapped to a channel label. Optionally, it can have a key Sense.BODY_LOCATION mapped to some body location. - E.g.: {1: 'Label of channel 1', 3: 'Label of channel 3'} - b) A body location (in str) or None - """ - - mapping = {} - - if biosignal_type.__name__ in str(modalities_available): - for index in modalities_available[biosignal_type.__name__]: - # Map each analogue channel of interest to a label - mapping[index] = channel_labels[str(index)] - else: - raise IOError(f"There are no analogue channels associated with {biosignal_type.__name__}") - - return mapping - - @staticmethod - def __get_defaults(): - """ - Gets the default mapping of channels for a device. - - @return: A tuple with - a) modalities: A dictionary mapping biosignal modalities to column indexes; - b) channel_labels: A dictionary mapping each column index to a meaningful channel label; - c) body_location: A string associated with a body location. - @rtype: tuple of size 3 - """ - - if not hasattr(Sense, 'DEVICE_ID'): - raise IOError("Unlike other BiosignalSource(s), Sense needs to be instantiated and a 'device_id' must be provided on instantiation.") - - # Check if file exists and it is readable - if path.isfile(Sense.DEFAULTS_PATH) and access(Sense.DEFAULTS_PATH, R_OK): - - # OPTION A: Use the mapping in the json file - with open(Sense.DEFAULTS_PATH, 'r') as json_file: - json_string = load(json_file) - - # Get mapping of modalities - if Sense.MODALITIES in json_string[Sense.DEVICE_ID]: - modalities = json_string[Sense.DEVICE_ID][Sense.MODALITIES] - else: - raise IOError(f"Key {Sense.MODALITIES} is mandatory for each device default mapping.") - - # Get mapping of channel labels, if any - if Sense.CHANNEL_LABELS in json_string[Sense.DEVICE_ID]: - channel_labels = json_string[Sense.DEVICE_ID][Sense.CHANNEL_LABELS] - else: - channel_labels = None - - # Get body location, if any - if Sense.BODY_LOCATION in json_string[Sense.DEVICE_ID]: - body_location = json_string[Sense.DEVICE_ID][Sense.BODY_LOCATION] - if body_location.startswith('BodyLocation.'): - body_location:BodyLocation = eval(body_location) - else: - body_location = None - - return modalities, channel_labels, body_location - - # File does not exist; creates one - else: - print("Either Sense defaults file is missing or it is not readable. Creating new defaults...") - # OPTION B: Ask and save a new mapping - json_string = {} - json_string[Sense.DEVICE_ID] = {} # Create a new object for a new device mapping - # B1. Input modalities - # B2. Input Channel labels - # B3. Input Body Location - # TODO: Use stdin to ask for default, save it, and return it - - @staticmethod - def __get_header(file_path): - """ - Auxiliary procedures to find the header (1st line) and column names (2nd line) of the file in the given path. - @param file_path: The path of the file to look for a header. - @return: A tuple with: - a) header: A dictionary with the header metadata. - b) column_names: A list of the column names. - @raise: - IOError: If the given file path does not exist. - """ - with open(file_path) as fh: - header = next(fh)[1:] # Read first line - header = literal_eval(header) # Get a dictionary of the header metadata - column_names = next(fh)[1:] # Read second line - column_names = column_names.split() # Get a list of the column names - return header, column_names - - @staticmethod - def __get_samples(file_path): - """ - Auxiliary procedures to find the samples (> 3rd line) of the file in the given path. - @param file_path: The path of the file to look for a header. - @return: A np.array of the data. - @raise: - IOError: If the given file path does not exist. - """ - with open(file_path) as fh: - # Dismiss header (it is in the first line) - header = next(fh)[1:] - next(fh) - # Get the remaining data, i.e., the samples - data = [line.strip().split() for line in fh] - try: - return np.array(data, float) - except ValueError: # In July 2022, it could occur that SENSE files could present Bad Format. - Sense.BAD_FORMAT = True - all_segments = [] - start_indices = [0, ] - # In that case, we need to separate each valid segment of samples. - correct_length = len(data[0]) # FIXME: Assuming first line is syntax-valid. Poor verification, though. - for i in range(len(data)): - if len(data[i]) != correct_length: # Bad syntax found - warn(f"File '{file_path}' has bad syntax on line {i}. This portion was dismissed.") - # Trim the end of data - for j in range(i-1, 0, -1): - if data[j][0] == '15': # Look for NSeq == 15 - all_segments.append(np.array(data[start_indices[-1]:j + 1], float)) # append "old" segment - break - # Trim the beginning of new segment - for j in range(i+1, len(data), 1): - if data[j][0] == '0': # Look for NSeq == 0 - start_indices.append(j) - break - - all_segments.append(np.array(data[start_indices[-1]:], float)) # append last "new" segment - return all_segments, start_indices - - - @staticmethod - def __read_file(file_path, type, channel_labels, modalities_available): - """ - Reads one csv file - Args: - list_ (list): contains the file path - metadata (bool): defines whether only metadata or actual timeseries values should be returned - sensor_idx (list): list of indexes that correspond to the columns of sensor to extract - sensor_names (list): list of names that correspond to the sensor label - ex: sensor='ECG', sensor_names=['ECG_chest'] - ex: sensor='ACC', options['location']='wrist', sensor_names=['ACCX_wrist','ACCY_wrist','ACCZ_wrist'] - device (str): device MacAddress, this is used to get the specific header, specially when using 2 devices - **options (dict): equal to _read arg - - @return: A tuple with: - a) sensor_data (np.array): 2-dimensional array of time over sensors columns. - b) date (datetime): initial datetime of samples. - d) sampling_frequency (float): The sampling frequency, in Hertz, of the read samples. - - @raise: - IOError: if sensor_names is empty, meaning no channels could be retrieved for chosen sensor - """ - - # STEP 1 - # Get header - header, column_names = Sense.__get_header(file_path) - - # STEP 2 - # Get all samples - all_samples = Sense.__get_samples(file_path) - - # STEP 3 - # Raise Error if file is empty - if not Sense.BAD_FORMAT and Sense.__check_empty(len(all_samples)): - raise IOError(f'Empty file: {file_path}.') - - # STEP 4 - # Get analogue channels of interest, mapped to labels, and a body location (if any associated) - mapping = Sense.__get_mapping(type, channel_labels, modalities_available) - - # STEP 5 - # Get initial date and sampling frequency - date = Sense.__aux_date(header) - sf = header[Sense.KEY_HZ_IN_HEADER] - - # STEP 6 - # Filtering only the samples of the channels of interest - if not Sense.BAD_FORMAT: - samples_of_interest = {} - for ix in mapping: - label = mapping[ix] - samples_of_interest[label] = all_samples[:, column_names.index(Sense.ANALOGUE_LABELS_FORMAT.format(str(ix)))] - # return dict, start date, sampling frequency - return samples_of_interest, date, sf - else: - samples_of_interest_by_segment, start_dates = [], [] - all_segments, start_indices = all_samples - for segment, start_index in zip(all_segments, start_indices): - start_dates.append(date + timedelta(seconds=start_index/sf)) - samples_of_interest = {} - for ix in mapping: - label = mapping[ix] - samples_of_interest[label] = segment[:, column_names.index(Sense.ANALOGUE_LABELS_FORMAT.format(str(ix)))] - samples_of_interest_by_segment.append(samples_of_interest) - # return segments, start dates, sampling frequency - return samples_of_interest_by_segment, start_dates, sf - - - @staticmethod - def _timeseries(dir, type, **options): - """Reads multiple csv files on the directory 'path' and returns a Biosignal associated with a Patient. - @param dir (str): directory that contains Sense files in csv format - @param type (subclass of Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG - @param **options (dict): - defaults_path (str): if the user wants to use a json to save and load bitalino configurations - device_id (str): directory to json file. If not defined, a default will be set automatically - - @return: A typical dictionary like {str: Timeseries}. - - @raise: - IOError: If there are no Sense files in the given directory. - IOError: If Sense files have no header. - """ - - # STEP 0 - Get defaults - modalities_available, channel_labels, _ = Sense.__get_defaults() - - # STEP 1 - Get files - # A list is created with all the filenames that end with '.csv' inside the given directory. - # E.g. [ file1.csv, file.2.csv, ... ] - all_files = [path.join(dir, file) for file in listdir(dir) if file.endswith('.csv')] - if not all_files: - raise IOError(f"No files in {dir}.") - - # STEP 2 - Convert channel labels to BodyLocations, if any - for position, label in channel_labels.items(): - if label.startswith('BodyLocation.'): - channel_labels[position]:BodyLocation = eval(label) - - # STEP 3 - Read files - # Get samples of analogue channels of interest from each file - data = [] - for file in all_files: - if getsize(file) == 0: - warn(f"File '{file}' has 0 bytes. Its reading was dismissed.") - continue - what_is_read = Sense.__read_file(file, type, channel_labels, modalities_available) - if not Sense.BAD_FORMAT: - data.append(what_is_read) - else: - samples_of_interest_by_segment, start_dates, sf = what_is_read - for segment, start_date in zip(samples_of_interest_by_segment, start_dates): - data.append((segment, start_date, sf)) - Sense.BAD_FORMAT = False # done dealing with a bad format - - # E.g.: data[k] = samples_of_interest, start_date, sampling_frequency - - # STEP 4 - Restructuring - # Listing all Segments of the same channel together, labelled to the same channel label. - res = {} - segments = {} - for samples, date, sf in data: - for channel in samples: - # instantiating or appending - if channel not in res: - segments[channel] = {date: samples[channel]} - else: - segments[channel][date] = samples[channel] - res[channel] = sf # save sampling frequency here to be used on the next loop - - # Encapsulating the list of Segments of the same channel in a Timeseries - for channel in segments: - if len(segments[channel]) > 1: - res[channel] = timeseries.Timeseries.withDiscontiguousSegments(segments[channel], sampling_frequency=res[channel]) - else: - res[channel] = timeseries.Timeseries(tuple(segments[channel].values())[0], tuple(segments[channel].keys())[0], sampling_frequency=res[channel]) - - return res - - @staticmethod - def _acquisition_location(path, type, **options): - _, _, bl = Sense.__get_defaults() - return bl - - @staticmethod - def _write(dir, timeseries): - pass # TODO - - @staticmethod - def _transfer(samples, to_unit): - pass - - - -class Bitalino(BiosignalSource): - def __init__(self): - super().__init__() - - def __repr__(self): - return "Bitalino" - - def __aux_date(header): - """ - Get starting time from header - """ - time_key = [key for key in header.keys() if 'time' in key][0] - try: - return to_datetime(header['date'].strip('\"') + ' ' + header[time_key].strip('\"')) - except Exception as e: - print(e) - - def __check_empty(len_, type=''): - """ - Confirm if the length is acceptable and return the desired output - """ - if type == 'file_size': - if len_ <= 50: - return True - else: - if len_ < 1: - return True - return False - - def __change_sens_list(sens, device, channels): - """ - Confirm if the list of sensors has only RAW as labels, and ask the user for new labels in that case. - """ - if list(set(sens)) == ['RAW']: - print(f'Please update sens according to the sensors used:') - analogs = channels[-len(sens):] - for se in range(len(sens)): - new_se = str(input(f'{device} -- {sens[se]} -- {analogs[se]}')).upper() - sens[se] = new_se - return sens - - def __analog_idx(header, sensor, **options): - """ - From a header choose analog sensor key idx that correspond to a specific sensor. - This also runs read json to save configurations to facilitate implementation - This function leads with several devices and it returns a list that may contain one or several integers - """ - sensor_idx, sensor_names, json_bool, chosen_device = [], [], False, '' - # if options and json key, get json to calculate - if options: - if 'json' in options.keys(): - json_bool = options['json'] - json_dir = options['json_dir'] if 'json_dir' in options.keys() \ - else path.join(getcwd(), 'bitalino.json') - len_ch = 0 - for device in header.keys(): - chosen_device = device - sens_id = '' - # iterate over each device - if json_bool: - sens, ch, location = Bitalino.__read_json(json_dir, header[device]) - else: - sens = header[device][str(input(f'What is the header key of sensor names? {header}\n ')).strip().lower()] - ch = header[device][str(input(f'What is the header key for analog channels? {header}\n ')).strip().lower()] - location = str(input(f'What is the body location of this device {device}? \n')) - sens = Bitalino.__change_sens_list(sens, device, ch) - analogs = ch[-len(sens):] - - if sensor in str(sens): - # add other column devices as offset to the column to retrieve - location_bool = True - if 'location' in options.keys(): - if location.lower() not in options['location'].lower(): - location_bool = False - sens_id = [lab + '_' + location for lab in sens if sensor in lab.upper() and location_bool] - sensor_idx += [len_ch + ch.index(analogs[sens.index(sid.split('_')[0])]) for sid in sens_id] - if sens_id != '': - chosen_device = device - len_ch = len(ch) - sensor_names += sens_id - - return sensor_idx, sensor_names, chosen_device - - def __read_json(dir_, header): - # check if bitalino json exists and returns the channels and labels and location - if path.isfile(dir_) and access(dir_, - R_OK): - # checks if file exists - with open(dir_, 'r') as json_file: - json_string = load(json_file) - else: - print("Either file is missing or is not readable, creating file...") - json_string = {} - if 'device connection' in header.keys(): - device = header['device connection'] - else: - device = input('Enter device id (string): ') - if device not in json_string.keys(): - json_string[device] = {} - - for key in ['column', 'label', 'firmware version', 'device', 'resolution', 'channels', 'sensor', 'location']: - if key not in json_string[device].keys(): - if key in header.keys(): - json_string[device][key] = header[key] - else: - print(header['device connection'], header['label']) - new_info = str(input(f'{key}: ')).lower() - json_string[device][key] = new_info - if key == 'label': - sens = Bitalino.__change_sens_list(json_string[device]['label'], device, header['column']) - json_string[device][key] = sens - with open(dir_, 'w') as db_file: - dump(json_string, db_file, indent=2) - return json_string[device]['label'], json_string[device]['column'], json_string[device]['location'] - - @staticmethod - def __read_metadata(dirfile, sensor, **options): - """ - Read metadata of a single file - Args: - dirfile (str): contains the file path - sensor (str): contains the sensor label to look for - Returns: - sensor_idx (list), sensor_names (list), device (str), header (dict) - **options (dict): equal to _read arg - """ - # size of bitalino file - file_size = path.getsize(dirfile) - if file_size <= 50: - return {} - - with open(dirfile) as fh: - next(fh) - header = next(fh)[2:] - next(fh) - - header = ast.literal_eval(header) - sensor_idx, sensor_names, device = Bitalino.__analog_idx(header, sensor, **options) - return sensor_idx, sensor_names, device, header[device] - - # @staticmethod - def __read_bit(dirfile, sensor, sensor_idx=[], sensor_names=[], device='', **options): - """ - Reads one edf file - Args: - dirfile (str): contains the file path - sensor (str): contains the sensor label to look for - sensor_idx (list): list of indexes that correspond to the columns of sensor to extract - sensor_names (list): list of names that correspond to the sensor label - ex: sensor='ECG', sensor_names=['ECG_chest'] - ex: sensor='ACC', options['location']='wrist', sensor_names=['ACCX_wrist','ACCY_wrist','ACCZ_wrist'] - device (str): device MacAddress, this is used to get the specific header, specially when using 2 devices - **options (dict): equal to _read arg - - Returns: - sensor_data (array): 2-dimensional array of time over sensors columns - date (datetime): initial datetime of array - - Raises: - IOError: if sensor_names is empty, meaning no channels could be retrieved for chosen sensor - """ - # size of bitalino file - file_size = path.getsize(dirfile) - if file_size <= 50: - return '', [] - with open(dirfile) as fh: - next(fh) - header = next(fh)[2:] - next(fh) - # signal - data = np.array([line.strip().split() for line in fh], float) - # if file is empty, return - if Bitalino.__check_empty(len(data)): - return None - - header = ast.literal_eval(header) - if len(sensor_names) > 0: - sensor_data = data[:, sensor_idx] - date = Bitalino.__aux_date(header[device]) - print(date) - return sensor_data, date - else: - raise IOError(f"Sensor {sensor} was not found in this acquisition, please insert another") - - @staticmethod - def _timeseries(dir, type, startkey='A20', **options): - """Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. - Args: - dir (str): directory that contains bitalino files in txt format - type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG - startkey (str): default is A20. the key that appears in all bitalino file names to extract from directory - **options (dict): only the keys json, json_dir and location are being evaluated. - options[json] (bool): if the user wants to use a json to save and load bitalino configurations - options[json_dir] (str): directory to json file. If not defined, a default will be set automatically - options[location] (str): if given, only the devices with that body location will be retrieved - - Returns: - dict: A dictionary where keys are the sensors associated to the Biosignal with a Timeseries to each key - - Raises: - IOError: if the Biosignal is not one of the ones mentioned - IOError: if the list of bitalino files from dir returns empty - IOError: if header is still empty after going through all Bitalino files - """ - options = {'json_bool': True, 'json_dir': 'bitalino.json'} - sensor = 'ECG' if type is modalities.ECG else 'EDA' if type is modalities.EDA else 'PPG' if type is modalities.PPG else 'ACC' if type is modalities.ACC else 'PZT' if type is modalities.RESP else 'EMG' if type is modalities.EMG else '' - if sensor == '': - raise IOError(f'Type {type} does not have label associated, please insert one') - # first a list is created with all the filenames that end in .edf and are inside the chosen dir - # this is a list of lists where the second column is the type of channel to extract - all_files = sorted([path.join(dir, file) for file in listdir(dir) if startkey in file]) - # get header and sensor positions by running the bitalino files until a header is found - if not all_files: - raise IOError(f'No files in dir="{dir}" that start with {startkey}') - header, h = {}, 0 - while len(header) < 1: - ch_idx, channels, device, header = Bitalino.__read_metadata(all_files[h], sensor, **options) - h += 1 - if header == {}: - raise IOError(f'The files in {dir} did not contain a bitalino type {header}') - new_dict = {} - segments = [Bitalino.__read_bit(file, sensor=sensor, sensor_idx=ch_idx, sensor_names=channels, - device=device, **options) for file in all_files[h - 1:]] - for ch, channel in enumerate(channels): - - samples = {segment[1]: segment[0][:, ch] for segment in segments if segment} - if len(samples) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(samples, sampling_frequency=header['sampling rate'], - name=channels[ch]) - else: - new_timeseries = timeseries.Timeseries(tuple(samples.values())[0], tuple(samples.keys())[0], header['sampling rate'], - name=channels[ch]) - new_dict[channel] = new_timeseries - return new_dict - - @staticmethod - def _write(dir, timeseries): - '''Writes multiple TXT files on the directory 'path' so they can be opened in Opensignals.''' - # TODO - - @staticmethod - def _transfer(samples, to_unit): - pass - - # -*- encoding: utf-8 -*- - - # =================================== - - # IT - LongTermBiosignals - - # Package: biosignals - # Module: E4 - # Description: Class E4, a type of BiosignalSource, with static procedures to read and write datafiles from - # an Empatica E4 wristband. - - # Contributors: João Saraiva, Mariana Abreu - # Created: 15/06/2022 - # Last Updated: 22/07/2022 - - # =================================== - - from ..sources.BiosignalSource import BiosignalSource - - class E4(BiosignalSource): - '''This class represents the source of Seer Epilepsy Database and includes methods to read and write - biosignal files provided by them. Usually they are in .edf format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "Empatica E4 - Epilepsy Wristband" - - @staticmethod - def _aux_date(date): - """ Receives a string that contains a unix timestamp in UTC - Returns a datetime after convertion - """ - - ts = float(date) - return datetime.utcfromtimestamp(ts) - - @staticmethod - def __get_header(file_path): - """ - Auxiliary procedures to find the initial datetimes (1st line) and sampling frequencies (2nd line) of the file in the given path. - @param file_path: The path of the file to look for a header. - @return: A tuple with: - a) channel_labels: A dictionary with the header metadata. - b) column_names: A list of the column names. - @raise: - IOError: If the given file path does not exist. - """ - with open(file_path) as fh: - header = next(fh)[1:] # Read first line - header = literal_eval(header) # Get a dictionary of the header metadata - column_names = next(fh)[1:] # Read second line - column_names = column_names.split() # Get a list of the column names - return header, column_names - - @staticmethod - def __read_file(file_path): - """ - Reads one csv file. - @param: file_path (str) path to one csv file - @return: A tuple with: - a) A dictionary with arrays of samples associated with channel labels (like {'label': [...], }) - b) The initial datetime (in datetime) - c) The sampling frequency (in float) - - """ - with open(file_path, 'r') as f: - reader = csv.reader(f, dialect=csv.excel_tab, delimiter=',') - a = list(reader) - - # Channel label comes from the file name, or (x, y, z) in case of ACC - channel_labels = file_path.split(sep)[-1].split('.csv')[0].lower() - channel_labels = (channel_labels,) if len(a[0]) == 1 else ('x', 'y', 'z') - - # First row is the initial datetime - datetime = E4._aux_date(a[0][0]) - - # Second row is sampling frequency - sampling_frequency = float(a[1][0]) - - # Form third row and on are the sample values - samples = vstack(a[2:]).astype('float32').T - - return {label: samples[i] for i, label in enumerate(channel_labels)}, datetime, sampling_frequency - - @staticmethod - def _timeseries(dir, type, **options): - ''' - Reads multiple CSV files on multiple subdirectories of 'path' and returns a Biosignal associated with a Patient. - Args: - dir (str): directory that contains subdirectories of E4 files in csv format - type (Biosignal): type of biosignal to extract can be one of HR, EDA, PPG and ACC - ''' - sensor = 'EDA' if type is modalities.EDA else 'BVP' if type is modalities.PPG else 'ACC' if type is modalities.ACC else 'HR' if type is modalities.HR else 'TEMP' \ - if type is modalities.TEMP else '' - if sensor == '': - raise IOError(f'Type {type} does not have label associated, please insert one') - - # STEP 1 - # Get list of subdirectories - all_subdirectories = list([path.join(dir, d) for d in listdir(dir)]) - - res = {} - segments = {} - # STEP 2 - # Get list of files of interest, i.e., the ones corresponding to the modality of interest - for subdir in all_subdirectories: - if isdir(subdir): - file = list([path.join(subdir, file) for file in listdir(subdir) if sensor in file])[0] - if not file: - raise IOError(f'Files were not found in path {subdir} for {sensor=} ') - - # STEP 3 - # Read each file - samples, datetime, sf = E4.__read_file(file) - - # STEP 4 - Restructuring - # Listing all Segments of the same channel together, labelled to the same channel label. - for channel_label in samples: - # instantiating or appending - if channel_label not in res: - segments[channel_label] = {datetime: samples[channel_label]} - else: - segments[channel_label][datetime] = samples[channel_label] - res[channel_label] = sf # save sampling frequency here to be used on the next loop - - # Encapsulating the list of Segments of the same channel in a Timeseries - for channel in segments: - if len(segments[channel]) > 1: - res[channel] = timeseries.Timeseries.withDiscontiguousSegments(segments[channel], sampling_frequency=res[channel]) - else: - res[channel] = timeseries.Timeseries(tuple(segments[channel].values())[0], tuple(segments[channel].keys())[0], - sampling_frequency=res[channel]) - - return res - - @staticmethod - def _events(dir: str, file_key='tag'): - """ Extracts onsets from tags file - First we check if a tags file exists in directory. Then it will be opened and passed as a list "a". - Each date in a will be transformed from unix timestamp str to datetime using aux_date function. - Returns: A List of Event objects. - """ - - # STEP 1 - # Get list of subdirectories - all_subdirectories = list([path.join(dir, d) for d in listdir(dir)]) - - # STEP 2 - # Get tag file - res = [] - n_events = 0 # counter of events - for subdir in all_subdirectories: - if isdir(subdir): - onsets_file = [path.join(subdir, file) for file in listdir(subdir) if file_key in file] - if not onsets_file: - raise IOError(f"No tag file was found in path '{subdir}'.") - if len(onsets_file) > 1: - raise IOError(f'{len(onsets_file)} tag files were found, rather than just 1.') - else: - # STEP 3 - # Get onsets - with open(onsets_file[0], 'r') as f: - reader = csv.reader(f, dialect=csv.excel_tab) - a = list(reader) - # Events are named numerically - for i in range(len(a)): - n_events += 1 - res.append(timeseries.Event('event' + str(n_events), E4._aux_date(a[i][0]))) - return res - - @staticmethod - def _fetch(source_dir='', type=None, patient_code=None): - pass - - @staticmethod - def _write(path: str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass - - @staticmethod - def onbody(biosignal): - - window = timedelta(minutes=1) - - def condition_is_met_1_percent(x, condition): - count = np.count_nonzero(condition) - return count / len(x) >= 0.01 - - if type(biosignal) is modalities.ACC: - biosignal = biosignal['x'] + biosignal['y'] + biosignal['z'] # sum sample-by-sample the 3 axes - window_size = int(10 * biosignal.sampling_frequency) # 10 s moving standard deviation - - def moving_std(x): - cumsum = np.cumsum(x, dtype=float) - cumsum[window_size:] = cumsum[window_size:] - cumsum[:-window_size] - moving_averages = cumsum[window_size - 1:] / window_size - moving_sq_averages = np.cumsum(x ** 2, dtype=float) - moving_sq_averages[window_size:] = moving_sq_averages[window_size:] - moving_sq_averages[:-window_size] - moving_sq_averages = moving_sq_averages[window_size - 1:] / window_size - return np.sqrt(moving_sq_averages - moving_averages ** 2) - - x = biosignal.when(lambda x: condition_is_met_1_percent(x, moving_std(x) > 0.2), window=window) - x.name = biosignal.name + " Onbody Domain" - return x - - if type(biosignal) is modalities.EDA: - x = biosignal.when(lambda x: condition_is_met_1_percent(x, x > 0.05), window=window) - x.name = biosignal.name + " Onbody Domain" - return x - - if type(biosignal) is modalities.TEMP: - x = biosignal.when(lambda x: condition_is_met_1_percent(x, (x > 25) & (x < 40)), window=window) - x.name = biosignal.name + " Onbody Domain" - return x - - return None - -# =================================== -# Public Databases -# =================================== - - class MITDB(BiosignalSource): - '''This class represents the source of MIT-BIH Arrhythmia Database and includes methods to read and write - biosignal files provided by them. Usually they are in .dat format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "MIT-BIH Arrhythmia Database" - - def __aux_date(header): - """ - Get starting time from header - """ - time_key = [key for key in header.keys() if 'time' in key][0] - time_date = [key for key in header.keys() if 'date' in key][0] - try: - return to_datetime(header[time_date].strip('\"') + ' ' + header[time_key].strip('\"')) - except Exception as e: - print(f'Date is {header[time_date]} and Time is {header[time_key]} so the default will be used') - print('Default start date: 2000-1-1 00:00:00') - return datetime(2000, 1, 1, 00, 00, 00) - - @staticmethod - def __read_dat(dirfile, metadata=False): - - """ - Reads one dat file - param: dirfile (str) path to one file that ends in dat - param: sensor (str) name of the channel to extract (ex: ECG) - If metadata is True - returns list of channels and sampling frequency and initial datetime - Else return arrays one for each channel - """ - - # get edf data - signal, fields = wfdb.rdsamp(dirfile) - # get channels - channel_list = fields['sig_name'] - if metadata: - return channel_list, fields['fs'], fields['units'] - # structure of signal is two arrays, one array for each channel - return signal, MITDB.__aux_date(fields) - - @staticmethod - def _timeseries(dir, type, **options): - '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. - Args: - dir (str): directory that contains bitalino files in txt format - type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG - ''' - if type != modalities.ECG: - raise IOError(f'Type {type} must be ECG') - # first a list is created with all the filenames that end in .dat and are inside the chosen dir - all_files = sorted(list(set([path.join(dir, di.split('.')[0]) for di in sorted(listdir(dir)) if di.endswith('dat')]))) - - # run the dat read function for all files in list all_files - channels, sfreq, units = MITDB.__read_dat(all_files[0], metadata=True) - - all_edf = list(map(MITDB.__read_dat, all_files)) - new_dict = {} - for ch in range(len(channels)): - segments = {edf_data[1]: edf_data[0][:, ch] for edf_data in all_edf} - unit = Volt(Multiplier.m) if 'mV' in units[ch] else None - name = BodyLocation.MLII if channels[ch].strip() == 'MLII' else BodyLocation.V5 if channels[ch].strip() == 'V5' else \ - channels[ch] - if len(segments) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch], - units=unit) - else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch], - units=unit) - new_dict[channels[ch]] = new_timeseries - - return new_dict - - @staticmethod - def _fetch(type=None, patient_code=None): - """ Fetch one patient from the database - Args: - patient_code (int): number of patient to select - """ - # Transform patient code to the patient folder name - if not patient_code: - raise IOError('Please give a patient code (int)') - - temp_dir = '.cache' - if not path.isdir(temp_dir): - makedirs(temp_dir) - temp_dir = wget.download('https://physionet.org/content/mitdb/1.0.0/' + str(patient_code) + '.dat', out=temp_dir) - if temp_dir != '': - print(f'{temp_dir=}') - files = MITDB._timeseries(temp_dir, type) - return files - elif len(temp_dir) == '': - raise IOError(f'No patient was found {patient_code=}') - - @staticmethod - def _write(path: str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass - - def _write(path: str, timeseries: dict): - pass - - from ..sources.BiosignalSource import BiosignalSource - - class Seer(BiosignalSource): - '''This class represents the source of Seer Epilepsy Database and includes methods to read and write - biosignal files provided by them. Usually they are in .edf format.''' - - def __init__(self): - super().__init__() - - def __repr__(self): - return "Seer Epilepsy Database" - - @staticmethod - def __read_file(dirfile, metadata=False): - """ - Reads one dat file - param: dirfile (str) path to one file that ends in dat - param: sensor (str) name of the channel to extract (ex: ECG) - If metadata is True - returns list of channels and sampling frequency and initial datetime - Else return arrays one for each channel - """ - # get edf data - edf = read_raw_edf(dirfile) - # get channels that correspond to type (HR = type HR) - channel_list = edf.ch_names - # initial datetime - if metadata: - return channel_list, edf.info['sfreq'], None - # structure of signal is two arrays, one array for each channel - signal = edf.get_data() - date = edf.info['meas_date'].replace(tzinfo=None) - edf.close() - return signal, date - - @staticmethod - def _timeseries(dir, type, **options): - '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. - Args: - dir (str): directory that contains bitalino files in txt format - type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG - ''' - sensor = 'ECG' if type is modalities.ECG else 'EDA' if type is modalities.EDA else 'PPG' if type is modalities.PPG else 'ACC' if type is modalities.ACC \ - else 'PZT' if type is modalities.RESP else 'EMG' if type is modalities.EMG else 'HR' if modalities.HR else '' - if sensor == '': - raise IOError(f'Type {type} does not have label associated, please insert one') - # first a list is created with all the filenames that end in .dat and are inside the chosen dir - all_files = sorted(list(set([path.join(dir, di) for di in sorted(listdir(dir)) if sensor in di.upper()]))) - # devices example "Byteflies, Empatica" - devices = set([file.split(' - ')[-1] for file in all_files]) - # run the dat read function for all files in list all_files - new_dict = {} - for device in devices: - # select only device files - device_files = [file for file in all_files if device in file] - channels, sfreq, units = Seer.__read_file(device_files[0], metadata=True) - all_edf = list(map(Seer.__read_file, device_files)) - for ch in range(len(channels)): - segments = {edf_data[1]: edf_data[0][ch] for edf_data in all_edf} - unit = units - name = f'{channels[ch]} from {device.split("-")[0]}' - dict_key = f'{device.split("-")[0]}-{channels[ch].upper()}' if len(devices) > 1 else channels[ch].upper() - if len(segments) > 1: - new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=name, - units=unit) - else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=name, - units=unit) - new_dict[dict_key] = new_timeseries - - return new_dict - - @staticmethod - def _fetch(source_dir='', type=None, patient_code=None): - """ Fetch one patient from the database - Args: - patient_code (int): number of patient to select - """ - # Transform patient code to the patient folder name - if not patient_code: - raise IOError('Please give a patient code (int)') - if source_dir == '': - raise IOError('Please give patients location') - list_patients = listdir(source_dir) - selected_patient = [pat for pat in list_patients if str(patient_code) in pat] - if len(selected_patient) == 1: - print(f'{selected_patient=}') - path_ = path.join(source_dir, selected_patient[0]) - files = Seer._timeseries(path_, type) - return files - elif len(selected_patient) > 1: - raise IOError(f'More than one patient found {selected_patient=}') - else: - raise IOError(f'No patient was found {selected_patient=}') - - @staticmethod - def _write(path: str, timeseries: dict): - pass - - @staticmethod - def _transfer(samples, to_unit): - pass - diff --git a/src/ltbio/biosignals/sources/_BITalino.py b/src/ltbio/biosignals/sources/_BITalino.py new file mode 100644 index 00000000..45a96dda --- /dev/null +++ b/src/ltbio/biosignals/sources/_BITalino.py @@ -0,0 +1,272 @@ +# -*- encoding: utf-8 -*- +# +# =================================== +# +# IT - LongTermBiosignals +# +# Package: biosignals +# Module: Bitalino +# Description: Class Bitalino, a type of BiosignalSource, with static procedures to read and write datafiles from +# any Bitalino device. +# +# Contributors: João Saraiva, Mariana Abreu +# Created: 25/04/2022 +# Last Updated: 22/07/2022 + +# =================================== + +import ast +from json import load, dump +from os import listdir, path, getcwd, access, R_OK + +import numpy as np +from dateutil.parser import parse as to_datetime + +from .. import modalities +from .._BiosignalSource import BiosignalSource +from .._Timeseries import Timeseries + + +class BITalino(BiosignalSource): + def __init__(self): + super().__init__() + + def __repr__(self): + return "Bitalino" + + def __aux_date(header): + """ + Get starting time from header + """ + time_key = [key for key in header.keys() if 'time' in key][0] + try: + return to_datetime(header['date'].strip('\"') + ' ' + header[time_key].strip('\"')) + except Exception as e: + print(e) + + def __check_empty(len_, type=''): + """ + Confirm if the length is acceptable and return the desired output + """ + if type == 'file_size': + if len_ <= 50: + return True + else: + if len_ < 1: + return True + return False + + def __change_sens_list(sens, device, channels): + """ + Confirm if the list of sensors has only RAW as labels, and ask the user for new labels in that case. + """ + if list(set(sens)) == ['RAW']: + print(f'Please update sens according to the sensors used:') + analogs = channels[-len(sens):] + for se in range(len(sens)): + new_se = str(input(f'{device} -- {sens[se]} -- {analogs[se]}')).upper() + sens[se] = new_se + return sens + + def __analog_idx(header, sensor, **options): + """ + From a header choose analog sensor key idx that correspond to a specific sensor. + This also runs read json to save configurations to facilitate implementation + This function leads with several devices and it returns a list that may contain one or several integers + """ + sensor_idx, sensor_names, json_bool, chosen_device = [], [], False, '' + # if options and json key, get json to calculate + if options: + if 'json' in options.keys(): + json_bool = options['json'] + json_dir = options['json_dir'] if 'json_dir' in options.keys() \ + else path.join(getcwd(), 'bitalino.json') + len_ch = 0 + for device in header.keys(): + chosen_device = device + sens_id = '' + # iterate over each device + if json_bool: + sens, ch, location = BITalino.__read_json(json_dir, header[device]) + else: + sens = header[device][str(input(f'What is the header key of sensor names? {header}\n ')).strip().lower()] + ch = header[device][str(input(f'What is the header key for analog channels? {header}\n ')).strip().lower()] + location = str(input(f'What is the body location of this device {device}? \n')) + sens = BITalino.__change_sens_list(sens, device, ch) + analogs = ch[-len(sens):] + + if sensor in str(sens): + # add other column devices as offset to the column to retrieve + location_bool = True + if 'location' in options.keys(): + if location.lower() not in options['location'].lower(): + location_bool = False + sens_id = [lab + '_' + location for lab in sens if sensor in lab.upper() and location_bool] + sensor_idx += [len_ch + ch.index(analogs[sens.index(sid.split('_')[0])]) for sid in sens_id] + if sens_id != '': + chosen_device = device + len_ch = len(ch) + sensor_names += sens_id + + return sensor_idx, sensor_names, chosen_device + + def __read_json(dir_, header): + # check if bitalino json exists and returns the channels and labels and location + if path.isfile(dir_) and access(dir_, + R_OK): + # checks if file exists + with open(dir_, 'r') as json_file: + json_string = load(json_file) + else: + print("Either file is missing or is not readable, creating file...") + json_string = {} + if 'device connection' in header.keys(): + device = header['device connection'] + else: + device = input('Enter device id (string): ') + if device not in json_string.keys(): + json_string[device] = {} + + for key in ['column', 'label', 'firmware version', 'device', 'resolution', 'channels', 'sensor', 'location']: + if key not in json_string[device].keys(): + if key in header.keys(): + json_string[device][key] = header[key] + else: + print(header['device connection'], header['label']) + new_info = str(input(f'{key}: ')).lower() + json_string[device][key] = new_info + if key == 'label': + sens = BITalino.__change_sens_list(json_string[device]['label'], device, header['column']) + json_string[device][key] = sens + with open(dir_, 'w') as db_file: + dump(json_string, db_file, indent=2) + return json_string[device]['label'], json_string[device]['column'], json_string[device]['location'] + + @staticmethod + def __read_metadata(dirfile, sensor, **options): + """ + Read metadata of a single file + Args: + dirfile (str): contains the file path + sensor (str): contains the sensor label to look for + Returns: + sensor_idx (list), sensor_names (list), device (str), header (dict) + **options (dict): equal to _read arg + """ + # size of bitalino file + file_size = path.getsize(dirfile) + if file_size <= 50: + return {} + + with open(dirfile) as fh: + next(fh) + header = next(fh)[2:] + next(fh) + + header = ast.literal_eval(header) + sensor_idx, sensor_names, device = BITalino.__analog_idx(header, sensor, **options) + return sensor_idx, sensor_names, device, header[device] + + # @staticmethod + def __read_bit(dirfile, sensor, sensor_idx=[], sensor_names=[], device='', **options): + """ + Reads one edf file + Args: + dirfile (str): contains the file path + sensor (str): contains the sensor label to look for + sensor_idx (list): list of indexes that correspond to the columns of sensor to extract + sensor_names (list): list of names that correspond to the sensor label + ex: sensor='ECG', sensor_names=['ECG_chest'] + ex: sensor='ACC', options['location']='wrist', sensor_names=['ACCX_wrist','ACCY_wrist','ACCZ_wrist'] + device (str): device MacAddress, this is used to get the specific header, specially when using 2 devices + **options (dict): equal to _read arg + + Returns: + sensor_data (array): 2-dimensional array of time over sensors columns + date (datetime): initial datetime of array + + Raises: + IOError: if sensor_names is empty, meaning no channels could be retrieved for chosen sensor + """ + # size of bitalino file + file_size = path.getsize(dirfile) + if file_size <= 50: + return '', [] + with open(dirfile) as fh: + next(fh) + header = next(fh)[2:] + next(fh) + # signal + data = np.array([line.strip().split() for line in fh], float) + # if file is empty, return + if BITalino.__check_empty(len(data)): + return None + + header = ast.literal_eval(header) + if len(sensor_names) > 0: + sensor_data = data[:, sensor_idx] + date = BITalino.__aux_date(header[device]) + print(date) + return sensor_data, date + else: + raise IOError(f"Sensor {sensor} was not found in this acquisition, please insert another") + + @staticmethod + def _timeseries(dir, type, startkey='A20', **options): + """Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. + Args: + dir (str): directory that contains bitalino files in txt format + type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG + startkey (str): default is A20. the key that appears in all bitalino file names to extract from directory + **options (dict): only the keys json, json_dir and location are being evaluated. + options[json] (bool): if the user wants to use a json to save and load bitalino configurations + options[json_dir] (str): directory to json file. If not defined, a default will be set automatically + options[location] (str): if given, only the devices with that body location will be retrieved + + Returns: + dict: A dictionary where keys are the sensors associated to the Biosignal with a Timeseries to each key + + Raises: + IOError: if the Biosignal is not one of the ones mentioned + IOError: if the list of bitalino files from dir returns empty + IOError: if header is still empty after going through all Bitalino files + """ + options = {'json_bool': True, 'json_dir': 'bitalino.json'} + sensor = 'ECG' if type is modalities.ECG else 'EDA' if type is modalities.EDA else 'PPG' if type is modalities.PPG else 'ACC' if type is modalities.ACC else 'PZT' if type is modalities.RESP else 'EMG' if type is modalities.EMG else '' + if sensor == '': + raise IOError(f'Type {type} does not have label associated, please insert one') + # first a list is created with all the filenames that end in .edf and are inside the chosen dir + # this is a list of lists where the second column is the type of channel to extract + all_files = sorted([path.join(dir, file) for file in listdir(dir) if startkey in file]) + # get header and sensor positions by running the bitalino files until a header is found + if not all_files: + raise IOError(f'No files in dir="{dir}" that start with {startkey}') + header, h = {}, 0 + while len(header) < 1: + ch_idx, channels, device, header = BITalino.__read_metadata(all_files[h], sensor, **options) + h += 1 + if header == {}: + raise IOError(f'The files in {dir} did not contain a bitalino type {header}') + new_dict = {} + segments = [BITalino.__read_bit(file, sensor=sensor, sensor_idx=ch_idx, sensor_names=channels, + device=device, **options) for file in all_files[h-1:]] + for ch, channel in enumerate(channels): + + samples = {segment[1]: segment[0][:, ch] for segment in segments if segment} + if len(samples) > 1: + new_timeseries = Timeseries.withDiscontiguousSegments(samples, sampling_frequency=header['sampling rate'], + name=channels[ch]) + else: + new_timeseries = Timeseries(tuple(samples.values())[0], tuple(samples.keys())[0], header['sampling rate'], + name=channels[ch]) + new_dict[channel] = new_timeseries + return new_dict + + @staticmethod + def _write(dir, timeseries): + '''Writes multiple TXT files on the directory 'path' so they can be opened in Opensignals.''' + # TODO + + @staticmethod + def _transfer(samples, to_unit): + pass diff --git a/src/ltbio/biosignals/sources/_BITalino.pyi b/src/ltbio/biosignals/sources/_BITalino.pyi new file mode 100644 index 00000000..7f5952ee --- /dev/null +++ b/src/ltbio/biosignals/sources/_BITalino.pyi @@ -0,0 +1,12 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# Package: biosignals.sources +# Class: Bitalino +# =================================== + +from ltbio.biosignals import BiosignalSource + + +class BITalino(BiosignalSource): + ... diff --git a/src/ltbio/biosignals/sources/_E4.py b/src/ltbio/biosignals/sources/_E4.py new file mode 100644 index 00000000..f25d658b --- /dev/null +++ b/src/ltbio/biosignals/sources/_E4.py @@ -0,0 +1,193 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: E4 +# Description: Class E4, a type of BiosignalSource, with static procedures to read and write datafiles from +# an Empatica E4 wristband. + +# Contributors: João Saraiva, Mariana Abreu +# Created: 15/06/2022 +# Last Updated: 22/07/2022 + +# =================================== + +import csv +from ast import literal_eval +from datetime import datetime +from os import listdir, path, sep +from os.path import isdir + +from numpy import vstack + +from .. import modalities +from .._BiosignalSource import BiosignalSource +from .._Timeseries import Timeseries + + +class E4(BiosignalSource): + '''This class represents the source of Seer Epilepsy Database and includes methods to read and write + biosignal files provided by them. Usually they are in .edf format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "Empatica E4 - Epilepsy Wristband" + + @staticmethod + def __aux_date(date): + """ Receives a string that contains a unix timestamp in UTC + Returns a datetime after convertion + """ + + ts = float(date) + return datetime.utcfromtimestamp(ts) + + @staticmethod + def __get_header(file_path): + """ + Auxiliary procedures to find the initial datetimes (1st line) and sampling frequencies (2nd line) of the file in the given path. + @param file_path: The path of the file to look for a header. + @return: A tuple with: + a) channel_labels: A dictionary with the header metadata. + b) column_names: A list of the column names. + @raise: + IOError: If the given file path does not exist. + """ + with open(file_path) as fh: + header = next(fh)[1:] # Read first line + header = literal_eval(header) # Get a dictionary of the header metadata + column_names = next(fh)[1:] # Read second line + column_names = column_names.split() # Get a list of the column names + return header, column_names + + @staticmethod + def __read_file(file_path): + """ + Reads one csv file. + @param: file_path (str) path to one csv file + @return: A tuple with: + a) A dictionary with arrays of samples associated with channel labels (like {'label': [...], }) + b) The initial datetime (in datetime) + c) The sampling frequency (in float) + + """ + with open(file_path, 'r') as f: + reader = csv.reader(f, dialect=csv.excel_tab, delimiter=',') + a = list(reader) + + # Channel label comes from the file name, or (x, y, z) in case of ACC + channel_labels = file_path.split(sep)[-1].split('.csv')[0].lower() + channel_labels = (channel_labels, ) if len(a[0]) == 1 else ('x', 'y', 'z') + + # First row is the initial datetime + datetime = E4.__aux_date(a[0][0]) + + # Second row is sampling frequency + sampling_frequency = float(a[1][0]) + + # Form third row and on are the sample values + samples = vstack(a[2:]).astype('float32').T + + return {label: samples[i] for i, label in enumerate(channel_labels)}, datetime, sampling_frequency + + @staticmethod + def _timeseries(dir, type, **options): + ''' + Reads multiple CSV files on multiple subdirectories of 'path' and returns a Biosignal associated with a Patient. + Args: + dir (str): directory that contains subdirectories of E4 files in csv format + type (Biosignal): type of biosignal to extract can be one of HR, EDA, PPG and ACC + ''' + sensor = 'EDA' if type is modalities.EDA else 'BVP' if type is modalities.PPG else 'ACC' if type is modalities.ACC else 'HR' if type is modalities.HR else 'TEMP' \ + if type is modalities.TEMP else '' + if sensor == '': + raise IOError(f'Type {type} does not have label associated, please insert one') + + # STEP 1 + # Get list of subdirectories + all_subdirectories = list([path.join(dir, d) for d in listdir(dir)]) + + res = {} + segments = {} + # STEP 2 + # Get list of files of interest, i.e., the ones corresponding to the modality of interest + for subdir in all_subdirectories: + if isdir(subdir): + file = list([path.join(subdir, file) for file in listdir(subdir) if sensor in file])[0] + if not file: + raise IOError(f'Files were not found in path {subdir} for {sensor=} ') + + # STEP 3 + # Read each file + samples, datetime, sf = E4.__read_file(file) + + # STEP 4 - Restructuring + # Listing all Segments of the same channel together, labelled to the same channel label. + for channel_label in samples: + # instantiating or appending + if channel_label not in res: + segments[channel_label] = {datetime: samples[channel_label]} + else: + segments[channel_label][datetime] = samples[channel_label] + res[channel_label] = sf # save sampling frequency here to be used on the next loop + + # Encapsulating the list of Segments of the same channel in a Timeseries + for channel in segments: + if len(segments[channel]) > 1: + res[channel] = timeseries.Timeseries.withDiscontiguousSegments(segments[channel], sampling_frequency=res[channel]) + else: + res[channel] = timeseries.Timeseries(tuple(segments[channel].values())[0], tuple(segments[channel].keys())[0], sampling_frequency=res[channel]) + + return res + + @staticmethod + def _events(dir:str, file_key='tag'): + """ Extracts onsets from tags file + First we check if a tags file exists in directory. Then it will be opened and passed as a list "a". + Each date in a will be transformed from unix timestamp str to datetime using aux_date function. + Returns: A List of Event objects. + """ + + # STEP 1 + # Get list of subdirectories + all_subdirectories = list([path.join(dir, d) for d in listdir(dir)]) + + # STEP 2 + # Get tag file + res = [] + n_events = 0 # counter of events + for subdir in all_subdirectories: + if isdir(subdir): + onsets_file = [path.join(subdir, file) for file in listdir(subdir) if file_key in file] + if not onsets_file: + raise IOError(f"No tag file was found in path '{subdir}'.") + if len(onsets_file) > 1: + raise IOError(f'{len(onsets_file)} tag files were found, rather than just 1.') + else: + # STEP 3 + # Get onsets + with open(onsets_file[0], 'r') as f: + reader = csv.reader(f, dialect=csv.excel_tab) + a = list(reader) + # Events are named numerically + for i in range(len(a)): + n_events += 1 + res.append(timeseries.Event('event' + str(n_events), E4.__aux_date(a[i][0]))) + return res + + @staticmethod + def _fetch(source_dir='', type=None, patient_code=None): + pass + + @staticmethod + def _write(path:str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass diff --git a/src/ltbio/biosignals/sources/_E4.pyi b/src/ltbio/biosignals/sources/_E4.pyi new file mode 100644 index 00000000..de2ac6b9 --- /dev/null +++ b/src/ltbio/biosignals/sources/_E4.pyi @@ -0,0 +1,4 @@ +from ltbio.biosignals.sources._BiosignalSource import BiosignalSource + +class E4(BiosignalSource): + def __init__(self, device_id: str, defaults_path: str = None) -> E4: ... diff --git a/src/ltbio/biosignals/sources/_HEM.py b/src/ltbio/biosignals/sources/_HEM.py new file mode 100644 index 00000000..76277386 --- /dev/null +++ b/src/ltbio/biosignals/sources/_HEM.py @@ -0,0 +1,109 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: HEM +# Description: Class HEM, a type of BiosignalSource, with static procedures to read and write datafiles from +# Hospital Egas Moniz, Portugal. + +# Contributors: João Saraiva, Mariana Abreu +# Created: 25/04/2022 +# Last Updated: 22/07/2022 + +# =================================== +from datetime import timedelta +from os import listdir, path + +import numpy as np +from neo import MicromedIO +from numpy import array + +from .. import modalities +from .._BiosignalSource import BiosignalSource +from .._Timeseries import Timeseries + + +class HEM(BiosignalSource): + '''This class represents the source of Hospital de Santa Maria (Lisboa, PT) and includes methods to read and write + biosignal files provided by them. Usually they are in the European EDF/EDF+ format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "Hospital Egas Moniz" + + @staticmethod + def __read_trc(list, metadata=False): + """ + Return trc file information, whether it is the values or the metadata, according to boolean metadata + :param list + :param metadata + + """ + dirfile = list[0] + sensor = list[1] + # get edf data + seg_micromed = MicromedIO(dirfile) + hem_data = seg_micromed.read_segment() + hem_sig = hem_data.analogsignals[0] + ch_list = seg_micromed.header['signal_channels']['name'] + # get channels that correspond to type (POL Ecg = type ecg) + find_idx = [hch for hch in range(len(ch_list)) if sensor.lower() in ch_list[hch].lower()] + # returns ch_list of interest, sampling frequency, initial datetime + if metadata: + return ch_list[find_idx], float(hem_sig.sampling_rate), hem_data.rec_datetime, hem_sig.unit + # returns initial date and samples + print(ch_list[find_idx]) + return array(hem_sig[:, find_idx].T), hem_data.rec_datetime, ch_list[find_idx] + + @staticmethod + def _timeseries(dir, type, **options): + '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient.''' + # first a list is created with all the filenames that end in .edf and are inside the chosen dir + # this is a list of lists where the second column is the type of channel to extract + if type is modalities.ECG: + label = 'ecg' + all_files = sorted([[path.join(dir, file), label] for file in listdir(dir) if file.lower().endswith('.trc')]) + # run the edf read function for all files in list all_files + channels, sfreq, start_datetime, units = HEM.__read_trc(all_files[0], metadata=True) + all_trc = list(map(HEM.__read_trc, all_files)) + # run the trc read function for all files in list all_files + new_dict, first_time = {}, all_trc[0][1] + # TODO ADD UNITS TO TIMESERIES + for channel in channels: + last_start = all_trc[0][1] + segments = {last_start: all_trc[0][0][list(all_trc[0][2]).index(channel)]} + for at, trc_data in enumerate(all_trc[1:]): + if channel not in trc_data[2]: + continue + ch = list(trc_data[2]).index(channel) + final_time = all_trc[at][1] + timedelta(seconds=len(all_trc[at][0][ch])/sfreq) + if trc_data[1] <= final_time: + if (final_time - trc_data[1]) < timedelta(seconds=1): + segments[last_start] = np.append(segments[last_start], trc_data[0][ch]) + else: + continue + print('here') + else: + segments[trc_data[1]] = trc_data[0][ch] + last_start = trc_data[1] + + if len(segments) > 1: + new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch]) + else: + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch]) + new_dict[channels[ch]] = new_timeseries + + return new_dict + + @staticmethod + def _write(path: str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass diff --git a/src/ltbio/biosignals/sources/_HEM.pyi b/src/ltbio/biosignals/sources/_HEM.pyi new file mode 100644 index 00000000..18bd96ac --- /dev/null +++ b/src/ltbio/biosignals/sources/_HEM.pyi @@ -0,0 +1,4 @@ +from ltbio.biosignals.sources._BiosignalSource import BiosignalSource + +class HEM(BiosignalSource): + def __init__(self, device_id: str, defaults_path: str = None) -> HEM: ... diff --git a/src/ltbio/biosignals/sources/_HSM.py b/src/ltbio/biosignals/sources/_HSM.py new file mode 100644 index 00000000..995977ea --- /dev/null +++ b/src/ltbio/biosignals/sources/_HSM.py @@ -0,0 +1,90 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: HSM +# Description: Class HSM, a type of BiosignalSource, with static procedures to read and write datafiles from +# Hospital de Santa Maria, Portugal. + +# Contributors: João Saraiva, Mariana Abreu +# Created: 25/04/2022 +# Last Updated: 22/07/2022 + +# =================================== + +from os import listdir, path + +from mne.io import read_raw_edf + +from .. import modalities +from .._BiosignalSource import BiosignalSource +from .._Timeseries import Timeseries + + +class HSM(BiosignalSource): + '''This class represents the source of Hospital de Santa Maria (Lisboa, PT) and includes methods to read and write + biosignal files provided by them. Usually they are in the European EDF/EDF+ format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "Hospital de Santa Maria" + + @staticmethod + def __read_edf(list, metadata=False): + + """ + Reads one edf file + If metadata is True - returns list of channels and sampling frequency and initial datetime + Else return arrays one for each channel + """ + dirfile = list[0] + sensor = list[1] + # get edf data + hsm_data = read_raw_edf(dirfile) + # get channels that correspond to type (POL Ecg = type ecg) + channel_list = [hch for hch in hsm_data.ch_names if sensor.lower() in hch.lower()] + # initial datetime + if metadata: + return channel_list, hsm_data.info['sfreq'] + # structure of hsm_sig is two arrays, the 1st has one array for each channel and the 2nd is an int-time array + hsm_sig = hsm_data[channel_list] + + return hsm_sig[0], hsm_data.info['meas_date'].replace(tzinfo=None) + + @staticmethod + def _timeseries(dir, type, **options): + '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient.''' + if type is modalities.ECG: + label = 'ecg' + if type is modalities.EMG: + label = 'emg' + if type is modalities.EEG: + label = 'eeg' + # first a list is created with all the filenames that end in .edf and are inside the chosen dir + # this is a list of lists where the second column is the type of channel to extract + all_files = sorted([[path.join(dir, file), label] for file in listdir(dir) if file.endswith('.edf')]) + # run the edf read function for all files in list all_files + channels, sfreq = HSM.__read_edf(all_files[0], metadata=True) + all_edf = list(map(HSM.__read_edf, all_files)) + new_dict = {} + for ch in range(len(channels)): + segments = {edf_data[1]: edf_data[0][ch] for edf_data in all_edf} + if len(segments) > 1: + new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch]) + else: + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch]) + new_dict[channels[ch]] = new_timeseries + return new_dict + + @staticmethod + def _write(path:str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass diff --git a/src/ltbio/biosignals/sources/_HSM.pyi b/src/ltbio/biosignals/sources/_HSM.pyi new file mode 100644 index 00000000..68916fe7 --- /dev/null +++ b/src/ltbio/biosignals/sources/_HSM.pyi @@ -0,0 +1,5 @@ +from ltbio.biosignals.sources._BiosignalSource import BiosignalSource + +class HSM(BiosignalSource): + def __init__(self, device_id: str, defaults_path: str = None) -> HSM: ... + diff --git a/src/ltbio/biosignals/sources/_MITDB.py b/src/ltbio/biosignals/sources/_MITDB.py new file mode 100644 index 00000000..15694fc5 --- /dev/null +++ b/src/ltbio/biosignals/sources/_MITDB.py @@ -0,0 +1,134 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: MITDB +# Description: Class MITDB, a type of BiosignalSource, with static procedures to read and write datafiles from the +# MIT-BIH Arrhythmia dataset at https://physionet.org/content/mitdb/1.0.0/. + +# Contributors: João Saraiva, Mariana Abreu +# Created: 31/05/2022 +# Last Updated: 22/07/2022 + +# =================================== + +from datetime import datetime +from os import listdir, path, makedirs + +import wfdb +import wget +from dateutil.parser import parse as to_datetime + +from .. import modalities +from .._BiosignalSource import BiosignalSource +from .._Timeseries import Timeseries +from ltbio.clinical.BodyLocation import BodyLocation +from ltbio.biosignals.units import * + + +class MITDB(BiosignalSource): + '''This class represents the source of MIT-BIH Arrhythmia Database and includes methods to read and write + biosignal files provided by them. Usually they are in .dat format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "MIT-BIH Arrhythmia Database" + + def __aux_date(header): + """ + Get starting time from header + """ + time_key = [key for key in header.keys() if 'time' in key][0] + time_date = [key for key in header.keys() if 'date' in key][0] + try: + return to_datetime(header[time_date].strip('\"') + ' ' + header[time_key].strip('\"')) + except Exception as e: + print(f'Date is {header[time_date]} and Time is {header[time_key]} so the default will be used') + print('Default start date: 2000-1-1 00:00:00') + return datetime(2000, 1, 1, 00, 00, 00) + + @staticmethod + def __read_dat(dirfile, metadata=False): + + """ + Reads one dat file + param: dirfile (str) path to one file that ends in dat + param: sensor (str) name of the channel to extract (ex: ECG) + If metadata is True - returns list of channels and sampling frequency and initial datetime + Else return arrays one for each channel + """ + + # get edf data + signal, fields = wfdb.rdsamp(dirfile) + # get channels + channel_list = fields['sig_name'] + if metadata: + return channel_list, fields['fs'], fields['units'] + # structure of signal is two arrays, one array for each channel + return signal, MITDB.__aux_date(fields) + + @staticmethod + def _timeseries(dir, type, **options): + '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. + Args: + dir (str): directory that contains bitalino files in txt format + type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG + ''' + if type != modalities.ECG: + raise IOError(f'Type {type} must be ECG') + # first a list is created with all the filenames that end in .dat and are inside the chosen dir + all_files = sorted(list(set([path.join(dir, di.split('.')[0]) for di in sorted(listdir(dir)) if di.endswith('dat')]))) + + # run the dat read function for all files in list all_files + channels, sfreq, units = MITDB.__read_dat(all_files[0], metadata=True) + + all_edf = list(map(MITDB.__read_dat, all_files)) + new_dict = {} + for ch in range(len(channels)): + segments = {edf_data[1]: edf_data[0][:, ch] for edf_data in all_edf} + unit = Volt(Multiplier.m) if 'mV' in units[ch] else None + name = BodyLocation.MLII if channels[ch].strip() == 'MLII' else BodyLocation.V5 if channels[ch].strip() == 'V5' else channels[ch] + if len(segments) > 1: + new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch], units=unit) + else: + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch], units=unit) + new_dict[channels[ch]] = new_timeseries + + return new_dict + + @staticmethod + def _fetch(type=None, patient_code=None): + """ Fetch one patient from the database + Args: + patient_code (int): number of patient to select + """ + # Transform patient code to the patient folder name + if not patient_code: + raise IOError('Please give a patient code (int)') + + temp_dir = '.cache' + if not path.isdir(temp_dir): + makedirs(temp_dir) + temp_dir = wget.download('https://physionet.org/content/mitdb/1.0.0/'+str(patient_code)+'.dat', out=temp_dir) + if temp_dir != '': + print(f'{temp_dir=}') + files = MITDB._timeseries(temp_dir, type) + return files + elif len(temp_dir) == '': + raise IOError(f'No patient was found {patient_code=}') + + @staticmethod + def _write(path:str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass + + def _write(path:str, timeseries: dict): + pass diff --git a/src/ltbio/biosignals/sources/_MITDB.pyi b/src/ltbio/biosignals/sources/_MITDB.pyi new file mode 100644 index 00000000..d0963b15 --- /dev/null +++ b/src/ltbio/biosignals/sources/_MITDB.pyi @@ -0,0 +1,4 @@ +from ltbio.biosignals.sources._BiosignalSource import BiosignalSource + +class MITDB(BiosignalSource): + def __init__(self, device_id: str, defaults_path: str = None) -> MITDB: ... diff --git a/src/ltbio/biosignals/sources/_Seer.py b/src/ltbio/biosignals/sources/_Seer.py new file mode 100644 index 00000000..0d5d84bb --- /dev/null +++ b/src/ltbio/biosignals/sources/_Seer.py @@ -0,0 +1,127 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: Seer +# Description: Class Seer, a type of BiosignalSource, with static procedures to read and write datafiles from the +# Seer dataset at https://seermedical.com. + +# Contributors: Mariana Abreu +# Created: 02/06/2022 +# Last Updated: 22/07/2022 + +# =================================== + +from os import listdir, path + +from mne.io import read_raw_edf + +from .. import modalities +from .._BiosignalSource import BiosignalSource +from .._Timeseries import Timeseries + + +class Seer(BiosignalSource): + '''This class represents the source of Seer Epilepsy Database and includes methods to read and write + biosignal files provided by them. Usually they are in .edf format.''' + + def __init__(self): + super().__init__() + + def __repr__(self): + return "Seer Epilepsy Database" + + @staticmethod + def __read_file(dirfile, metadata=False): + """ + Reads one dat file + param: dirfile (str) path to one file that ends in dat + param: sensor (str) name of the channel to extract (ex: ECG) + If metadata is True - returns list of channels and sampling frequency and initial datetime + Else return arrays one for each channel + """ + # get edf data + edf = read_raw_edf(dirfile) + # get channels that correspond to type (HR = type HR) + channel_list = edf.ch_names + # initial datetime + if metadata: + return channel_list, edf.info['sfreq'], None + # structure of signal is two arrays, one array for each channel + signal = edf.get_data() + date = edf.info['meas_date'].replace(tzinfo=None) + edf.close() + return signal, date + + @staticmethod + def _timeseries(dir, type, **options): + '''Reads multiple EDF/EDF+ files on the directory 'path' and returns a Biosignal associated with a Patient. + Args: + dir (str): directory that contains bitalino files in txt format + type (Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG + ''' + sensor = 'ECG' if type is modalities.ECG else 'EDA' if type is modalities.EDA else 'PPG' if type is modalities.PPG else 'ACC' if type is modalities.ACC \ + else 'PZT' if type is modalities.RESP else 'EMG' if type is modalities.EMG else 'HR' if modalities.HR else '' + if sensor == '': + raise IOError(f'Type {type} does not have label associated, please insert one') + # first a list is created with all the filenames that end in .dat and are inside the chosen dir + all_files = sorted(list(set([path.join(dir, di) for di in sorted(listdir(dir)) if sensor in di.upper()]))) + # devices example "Byteflies, Empatica" + devices = set([file.split(' - ')[-1] for file in all_files]) + # run the dat read function for all files in list all_files + new_dict = {} + for device in devices: + # select only device files + device_files = [file for file in all_files if device in file] + channels, sfreq, units = Seer.__read_file(device_files[0], metadata=True) + all_edf = list(map(Seer.__read_file, device_files)) + for ch in range(len(channels)): + segments = {edf_data[1]: edf_data[0][ch] for edf_data in all_edf} + unit = units + name = f'{channels[ch]} from {device.split("-")[0]}' + dict_key = f'{device.split("-")[0]}-{channels[ch].upper()}' if len(devices) > 1 else channels[ch].upper() + if len(segments) > 1: + new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=name, units=unit) + else: + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=name, units=unit) + new_dict[dict_key] = new_timeseries + + return new_dict + + @staticmethod + def _fetch(source_dir='', type=None, patient_code=None): + """ Fetch one patient from the database + Args: + patient_code (int): number of patient to select + """ + # Transform patient code to the patient folder name + if not patient_code: + raise IOError('Please give a patient code (int)') + if source_dir == '': + raise IOError('Please give patients location') + list_patients = listdir(source_dir) + selected_patient = [pat for pat in list_patients if str(patient_code) in pat] + if len(selected_patient) == 1: + print(f'{selected_patient=}') + path_ = path.join(source_dir, selected_patient[0]) + files = Seer._timeseries(path_, type) + return files + elif len(selected_patient) > 1: + raise IOError(f'More than one patient found {selected_patient=}') + else: + raise IOError(f'No patient was found {selected_patient=}') + + @staticmethod + def _write(path:str, timeseries: dict): + pass + + @staticmethod + def _transfer(samples, to_unit): + pass + + +# path_ = 'C:\\Users\\Mariana\\OneDrive - Universidade de Lisboa\\PreEpiseizures\\BD-SEER' +# files = Seer._fetch(path_, type=EMG, patient_code="172") diff --git a/src/ltbio/biosignals/sources/_Seer.pyi b/src/ltbio/biosignals/sources/_Seer.pyi new file mode 100644 index 00000000..db981b6a --- /dev/null +++ b/src/ltbio/biosignals/sources/_Seer.pyi @@ -0,0 +1,4 @@ +from ltbio.biosignals.sources._BiosignalSource import BiosignalSource + +class Seer(BiosignalSource): + def __init__(self, device_id: str, defaults_path: str = None) -> Seer: ... diff --git a/src/ltbio/biosignals/sources/_Sense.py b/src/ltbio/biosignals/sources/_Sense.py new file mode 100644 index 00000000..e0501ae2 --- /dev/null +++ b/src/ltbio/biosignals/sources/_Sense.py @@ -0,0 +1,373 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: Sense +# Description: Class Sense, a type of BiosignalSource, with static procedures to read and write datafiles from +# any ScientISST Sense device. + +# Contributors: Mariana Abreu, João Saraiva +# Created: 20/06/2022 +# Last Updated: 22/07/2022 + +# =================================== + +import configparser +from ast import literal_eval +from datetime import timedelta +from json import load +from os import listdir, path, access, R_OK +from os.path import getsize +from warnings import warn + +import numpy as np +from dateutil.parser import parse as to_datetime + +from .. import modalities +from .._BiosignalSource import BiosignalSource +from .._Timeseries import Timeseries +from ltbio.clinical.BodyLocation import BodyLocation + + +class Sense(BiosignalSource): + + # Flag to deal with badly-formatted CSV files + BAD_FORMAT = False + + def __init__(self, device_id:str, defaults_path:str=None): + super().__init__() + self.__device_id = device_id + Sense.DEVICE_ID = device_id + if defaults_path is not None: + Sense.DEFAULTS_PATH = defaults_path + else: + if not path.exists('resources/config.ini'): + raise FileNotFoundError('No config.ini was found.') + try: + config = configparser.ConfigParser() + config.read('resources/config.ini') + Sense.DEFAULTS_PATH = config['DEFAULT']['Sense'] + print(f"Getting default mapping from {Sense.DEFAULTS_PATH}") + except IndexError: + raise KeyError("No defaults file indicated 'Sense' devices in config.ini.") + self.__defaults_path = defaults_path + + Sense.BAD_FORMAT = False + + def __repr__(self): + return "ScientISST Sense" + + + @staticmethod + def __aux_date(header): + """ Get starting time from header. """ + return to_datetime(header[Sense.__KEY_TIME_IN_HEADER], ignoretz=True) + + @staticmethod + def __check_empty(len_, type=''): + """ Confirm if the length is acceptable and return the desired output. """ + if type == 'file_size': + if len_ <= 50: + return True + else: + if len_ < 1: + return True + return False + + @staticmethod + def __get_mapping(biosignal_type, channel_labels, modalities_available): + """ + Given a header, find all indexes that correspond to biosignal modality of interest. + It REQUIRES a default mapping to be specified in a JSON file, otherwise a mapping will be requested on the stdin and saved for future use. + + @param header: A list of strings corresponding to column names. + @param biosignal_type: Biosignal subclass indicating which modality is of interest. + @param defaults_path: The path to the JSON file containing the mapping in the correct syntax. + + @rtype: tuple + @return: A tuple with: + a) A dictionary with the indexes corresponding to the biosignal modality of interest mapped to a channel label. Optionally, it can have a key Sense.BODY_LOCATION mapped to some body location. + E.g.: {1: 'Label of channel 1', 3: 'Label of channel 3'} + b) A body location (in str) or None + """ + + mapping = {} + + if biosignal_type.__name__ in str(modalities_available): + for index in modalities_available[biosignal_type.__name__]: + # Map each analogue channel of interest to a label + mapping[index] = channel_labels[str(index)] + else: + raise IOError(f"There are no analogue channels associated with {biosignal_type.__name__}") + + return mapping + + @staticmethod + def __get_defaults(): + """ + Gets the default mapping of channels for a device. + + @return: A tuple with + a) modalities: A dictionary mapping biosignal modalities to column indexes; + b) channel_labels: A dictionary mapping each column index to a meaningful channel label; + c) body_location: A string associated with a body location. + @rtype: tuple of size 3 + """ + + if not hasattr(Sense, 'DEVICE_ID'): + raise IOError("Unlike other BiosignalSource(s), Sense needs to be instantiated and a 'device_id' must be provided on instantiation.") + + # Check if file exists and it is readable + if path.isfile(Sense.DEFAULTS_PATH) and access(Sense.DEFAULTS_PATH, R_OK): + + # OPTION A: Use the mapping in the json file + with open(Sense.DEFAULTS_PATH, 'r') as json_file: + json_string = load(json_file) + + # Get mapping of modalities + if Sense.__MODALITIES in json_string[Sense.DEVICE_ID]: + modalities = json_string[Sense.DEVICE_ID][Sense.__MODALITIES] + else: + raise IOError(f"Key {Sense.__MODALITIES} is mandatory for each device default mapping.") + + # Get mapping of channel labels, if any + if Sense.__CHANNEL_LABELS in json_string[Sense.DEVICE_ID]: + channel_labels = json_string[Sense.DEVICE_ID][Sense.__CHANNEL_LABELS] + else: + channel_labels = None + + # Get body location, if any + if Sense.__BODY_LOCATION in json_string[Sense.DEVICE_ID]: + body_location = json_string[Sense.DEVICE_ID][Sense.__BODY_LOCATION] + if body_location.startswith('BodyLocation.'): + body_location:BodyLocation = eval(body_location) + else: + body_location = None + + return modalities, channel_labels, body_location + + # File does not exist; creates one + else: + print("Either Sense defaults file is missing or it is not readable. Creating new defaults...") + # OPTION B: Ask and save a new mapping + json_string = {} + json_string[Sense.DEVICE_ID] = {} # Create a new object for a new device mapping + # B1. Input modalities + # B2. Input Channel labels + # B3. Input Body Location + # TODO: Use stdin to ask for default, save it, and return it + + @staticmethod + def __get_header(file_path): + """ + Auxiliary procedures to find the header (1st line) and column names (2nd line) of the file in the given path. + @param file_path: The path of the file to look for a header. + @return: A tuple with: + a) header: A dictionary with the header metadata. + b) column_names: A list of the column names. + @raise: + IOError: If the given file path does not exist. + """ + with open(file_path) as fh: + header = next(fh)[1:] # Read first line + header = literal_eval(header) # Get a dictionary of the header metadata + column_names = next(fh)[1:] # Read second line + column_names = column_names.split() # Get a list of the column names + return header, column_names + + @staticmethod + def __get_samples(file_path): + """ + Auxiliary procedures to find the samples (> 3rd line) of the file in the given path. + @param file_path: The path of the file to look for a header. + @return: A np.array of the data. + @raise: + IOError: If the given file path does not exist. + """ + with open(file_path) as fh: + # Dismiss header (it is in the first line) + header = next(fh)[1:] + next(fh) + # Get the remaining data, i.e., the samples + data = [line.strip().split() for line in fh] + try: + return np.array(data, float) + except ValueError: # In July 2022, it could occur that SENSE files could present Bad Format. + Sense.BAD_FORMAT = True + all_segments = [] + start_indices = [0, ] + # In that case, we need to separate each valid segment of samples. + correct_length = len(data[0]) # FIXME: Assuming first line is syntax-valid. Poor verification, though. + for i in range(len(data)): + if len(data[i]) != correct_length: # Bad syntax found + warn(f"File '{file_path}' has bad syntax on line {i}. This portion was dismissed.") + # Trim the end of data + for j in range(i-1, 0, -1): + if data[j][0] == '15': # Look for NSeq == 15 + all_segments.append(np.array(data[start_indices[-1]:j + 1], float)) # append "old" segment + break + # Trim the beginning of new segment + for j in range(i+1, len(data), 1): + if data[j][0] == '0': # Look for NSeq == 0 + start_indices.append(j) + break + + all_segments.append(np.array(data[start_indices[-1]:], float)) # append last "new" segment + return all_segments, start_indices + + + @staticmethod + def __read_file(file_path, type, channel_labels, modalities_available): + """ + Reads one csv file + Args: + list_ (list): contains the file path + metadata (bool): defines whether only metadata or actual timeseries values should be returned + sensor_idx (list): list of indexes that correspond to the columns of sensor to extract + sensor_names (list): list of names that correspond to the sensor label + ex: sensor='ECG', sensor_names=['ECG_chest'] + ex: sensor='ACC', options['location']='wrist', sensor_names=['ACCX_wrist','ACCY_wrist','ACCZ_wrist'] + device (str): device MacAddress, this is used to get the specific header, specially when using 2 devices + **options (dict): equal to _read arg + + @return: A tuple with: + a) sensor_data (np.array): 2-dimensional array of time over sensors columns. + b) date (datetime): initial datetime of samples. + d) sampling_frequency (float): The sampling frequency, in Hertz, of the read samples. + + @raise: + IOError: if sensor_names is empty, meaning no channels could be retrieved for chosen sensor + """ + + # STEP 1 + # Get header + header, column_names = Sense.__get_header(file_path) + + # STEP 2 + # Get all samples + all_samples = Sense.__get_samples(file_path) + + # STEP 3 + # Raise Error if file is empty + if not Sense.BAD_FORMAT and Sense.__check_empty(len(all_samples)): + raise IOError(f'Empty file: {file_path}.') + + # STEP 4 + # Get analogue channels of interest, mapped to labels, and a body location (if any associated) + mapping = Sense.__get_mapping(type, channel_labels, modalities_available) + + # STEP 5 + # Get initial date and sampling frequency + date = Sense.__aux_date(header) + sf = header[Sense.__KEY_HZ_IN_HEADER] + + # STEP 6 + # Filtering only the samples of the channels of interest + if not Sense.BAD_FORMAT: + samples_of_interest = {} + for ix in mapping: + label = mapping[ix] + samples_of_interest[label] = all_samples[:, column_names.index(Sense.__ANALOGUE_LABELS_FORMAT.format(str(ix)))] + # return dict, start date, sampling frequency + return samples_of_interest, date, sf + else: + samples_of_interest_by_segment, start_dates = [], [] + all_segments, start_indices = all_samples + for segment, start_index in zip(all_segments, start_indices): + start_dates.append(date + timedelta(seconds=start_index/sf)) + samples_of_interest = {} + for ix in mapping: + label = mapping[ix] + samples_of_interest[label] = segment[:, column_names.index(Sense.__ANALOGUE_LABELS_FORMAT.format(str(ix)))] + samples_of_interest_by_segment.append(samples_of_interest) + # return segments, start dates, sampling frequency + return samples_of_interest_by_segment, start_dates, sf + + + @staticmethod + def _timeseries(dir, type, **options): + """Reads multiple csv files on the directory 'path' and returns a Biosignal associated with a Patient. + @param dir (str): directory that contains Sense files in csv format + @param type (subclass of Biosignal): type of biosignal to extract can be one of ECG, EDA, PPG, RESP, ACC and EMG + @param **options (dict): + defaults_path (str): if the user wants to use a json to save and load bitalino configurations + device_id (str): directory to json file. If not defined, a default will be set automatically + + @return: A typical dictionary like {str: Timeseries}. + + @raise: + IOError: If there are no Sense files in the given directory. + IOError: If Sense files have no header. + """ + + # STEP 0 - Get defaults + modalities_available, channel_labels, _ = Sense.__get_defaults() + + # STEP 1 - Get files + # A list is created with all the filenames that end with '.csv' inside the given directory. + # E.g. [ file1.csv, file.2.csv, ... ] + all_files = [path.join(dir, file) for file in listdir(dir) if file.endswith('.csv')] + if not all_files: + raise IOError(f"No files in {dir}.") + + # STEP 2 - Convert channel labels to BodyLocations, if any + for position, label in channel_labels.items(): + if label.startswith('BodyLocation.'): + channel_labels[position]:BodyLocation = eval(label) + + # STEP 3 - Read files + # Get samples of analogue channels of interest from each file + data = [] + for file in all_files: + if getsize(file) == 0: + warn(f"File '{file}' has 0 bytes. Its reading was dismissed.") + continue + what_is_read = Sense.__read_file(file, type, channel_labels, modalities_available) + if not Sense.BAD_FORMAT: + data.append(what_is_read) + else: + samples_of_interest_by_segment, start_dates, sf = what_is_read + for segment, start_date in zip(samples_of_interest_by_segment, start_dates): + data.append((segment, start_date, sf)) + Sense.BAD_FORMAT = False # done dealing with a bad format + + # E.g.: data[k] = samples_of_interest, start_date, sampling_frequency + + # STEP 4 - Restructuring + # Listing all Segments of the same channel together, labelled to the same channel label. + res = {} + segments = {} + for samples, date, sf in data: + for channel in samples: + # instantiating or appending + if channel not in res: + segments[channel] = {date: samples[channel]} + else: + segments[channel][date] = samples[channel] + res[channel] = sf # save sampling frequency here to be used on the next loop + + # Encapsulating the list of Segments of the same channel in a Timeseries + for channel in segments: + if len(segments[channel]) > 1: + res[channel] = timeseries.Timeseries.withDiscontiguousSegments(segments[channel], sampling_frequency=res[channel]) + else: + res[channel] = timeseries.Timeseries(tuple(segments[channel].values())[0], tuple(segments[channel].keys())[0], sampling_frequency=res[channel]) + + return res + + @staticmethod + def _acquisition_location(path, type, **options): + _, _, bl = Sense.__get_defaults() + return bl + + @staticmethod + def _write(dir, timeseries): + pass # TODO + + @staticmethod + def _transfer(samples, to_unit): + pass + diff --git a/src/ltbio/biosignals/sources/_Sense.pyi b/src/ltbio/biosignals/sources/_Sense.pyi new file mode 100644 index 00000000..ad40b9ef --- /dev/null +++ b/src/ltbio/biosignals/sources/_Sense.pyi @@ -0,0 +1,20 @@ +from ltbio.biosignals.sources._BiosignalSource import BiosignalSource + +class Sense(BiosignalSource): + + # Sense Defaults files use these keys: + __MODALITIES = 'modalities' + __CHANNEL_LABELS = 'labels' + __BODY_LOCATION = 'location' + + # Sense csv data files use these keys: + __KEY_CH_LABELS_IN_HEADER = 'Channel Labels' + __KEY_HZ_IN_HEADER = 'Sampling rate (Hz)' + __KEY_TIME_IN_HEADER = 'ISO 8601' + __ANALOGUE_LABELS_FORMAT = 'AI{0}_raw' + + # These are needed to map channels to biosignal modalities + DEFAULTS_PATH: str + DEVICE_ID: str + + def __init__(self, device_id: str, defaults_path: str = None) -> Sense: ... diff --git a/src/ltbio/biosignals/sources/__init__.py b/src/ltbio/biosignals/sources/__init__.py new file mode 100644 index 00000000..6deb2352 --- /dev/null +++ b/src/ltbio/biosignals/sources/__init__.py @@ -0,0 +1,22 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals + +# Package: biosignals.sources +# Description: Classes representing where from and how the biosignals were acquired. Sources can be specific sensors, +# devices, hospitals, databases, etc. and they can be composed. Each class contains a set of methods that know how to +# read and write data from that source, to extract metadata from that source, and some might also include methods to +# process the data in the specific context of that source. + +# Contributors: João Saraiva +# Created: 12/05/2022 +# Last Updated: 09/06/2023 +# =================================== + +from ._BITalino import BITalino +from ._E4 import E4 +from ._HEM import HEM +from ._HSM import HSM +from ._MITDB import MITDB +from ._Seer import Seer +from ._Sense import Sense diff --git a/src/ltbio/biosignals/sources/__init__.pyi b/src/ltbio/biosignals/sources/__init__.pyi new file mode 100644 index 00000000..f63117e2 --- /dev/null +++ b/src/ltbio/biosignals/sources/__init__.pyi @@ -0,0 +1,17 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# Package: biosignals.sources +# =================================== + +# Available Sources +# (Each is implemented in its own file for legibility) +from ._BITalino import BITalino +from ._E4 import E4 +from ._HEM import HEM +from ._HSM import HSM +from ._MITDB import MITDB +from ._Seer import Seer +from ._Sense import Sense + +__all__ = ['BITalino', 'E4', 'HEM', 'HSM', 'MITDB', 'Seer', 'Sense'] diff --git a/src/ltbio/biosignals/statistics.py b/src/ltbio/biosignals/statistics.py deleted file mode 100644 index 33ce5b9d..00000000 --- a/src/ltbio/biosignals/statistics.py +++ /dev/null @@ -1,112 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: src/ltbio/biosignals -# Module: statistics -# Description: - -# Contributors: João Saraiva -# Created: 30/08/2022 - -# =================================== -from typing import Callable - -import numpy as np -from numpy import ndarray - -from ltbio.biosignals import Timeseries -from ltbio.biosignals.modalities.Biosignal import Biosignal - - -def _check_biosignals(biosignal_object: Biosignal, name: str): - if not isinstance(biosignal_object, Biosignal): - raise TypeError(f"Parameter '{name}' should be a type of Biosignal.") - - -def _bi_timeseries_statistic(ts1: Timeseries, ts2: Timeseries, statistic: Callable[[ndarray, ndarray], float], by_segment: bool): - if ts1.domain != ts2.domain: - raise ArithmeticError("Different domains.") - if ts1.sampling_frequency != ts2.sampling_frequency: - raise ArithmeticError("Different sampling frequencies.") - - if ts1.is_contiguous: - stat_value = statistic(ts1.samples, ts2.samples) - else: - stat_value = [statistic(seg1, seg2) for seg1, seg2 in zip(ts1.samples, ts2.samples)] - - if by_segment: - return stat_value - else: - return np.mean(stat_value) - -def _bi_biosignal_statistic(biosignal1: Biosignal, biosignal2: Biosignal, statistic: Callable[[ndarray, ndarray], float], by_segment: bool): - # Check types - _check_biosignals(biosignal1, 'biosignal1') - _check_biosignals(biosignal2, 'biosignal2') - - # One channel - if len(biosignal1) == 1 and len(biosignal2) == 1: - ts1: Timeseries = biosignal1._get_channel(biosignal1.channel_names.pop()) - ts2: Timeseries = biosignal2._get_channel(biosignal2.channel_names.pop()) - try: - return _bi_timeseries_statistic(ts1, ts2, statistic, by_segment) - except ArithmeticError: - raise ArithmeticError('The domain and sampling frequency of both Biosignals must be the same.') - - # Multiple channels - else: - if biosignal1.channel_names != biosignal2.channel_names: - raise ArithmeticError("The channel names of both Biosignals must be the same.") - - res = {} - for channel_name, channel1 in biosignal1: - channel2 = biosignal2._get_channel(channel_name) - try: - res[channel_name] = _bi_timeseries_statistic(channel1, channel2, statistic, by_segment) - except ArithmeticError: - raise ArithmeticError( - f"The domain and sampling frequency of channels '{channel_name}' of both Biosignals must be the same.") - return res - - -def mse(biosignal1: Biosignal, biosignal2: Biosignal, by_segment: bool = False): - # Stat function - def _mse(x: ndarray, y: ndarray) -> float: - return np.square(np.subtract(x, y)).mean() - - return _bi_biosignal_statistic(biosignal1, biosignal2, _mse, by_segment) - - -def nmse(biosignal1: Biosignal, biosignal2: Biosignal, by_segment: bool = False, decibel: bool = False): - # Stat function - def _nmse(x: ndarray, y: ndarray) -> float: - a = np.sum(np.square(np.subtract(x, y))) - b = np.sum(np.square(np.subtract(x, np.mean(x)))) - return a/b - - stat = _bi_biosignal_statistic(biosignal1, biosignal2, _nmse, by_segment) - return 10 * np.log10(stat) if decibel else stat - - -def mean(biosignal: Biosignal, by_segment: bool = False): - if not isinstance(biosignal, Biosignal): - raise TypeError("Parameter 'biosignal' should be a type of Biosignal.") - - res = {} - for channel_name, channel in biosignal: - if channel.is_contiguous: - mean = np.mean(channel.samples) - elif not by_segment: - mean = np.mean(np.array(channel.samples)) - else: - mean = np.mean(np.array(channel.samples), axis=1) - - res[channel_name] = mean - - if len(biosignal) == 1: - return tuple(res.values())[0] - else: - return res diff --git a/src/ltbio/biosignals/timeseries.py b/src/ltbio/biosignals/timeseries.py deleted file mode 100644 index ba3a66f1..00000000 --- a/src/ltbio/biosignals/timeseries.py +++ /dev/null @@ -1,2080 +0,0 @@ -# -- encoding: utf-8 -- - -# =================================== - -# IT - LongTermBiosignals - -# Package: src/ltbio/biosignals -# Module: timeseries -# Description: - -# Contributors: João Saraiva -# Created: 20/04/2022 -# Last Updated: 07/03/2023 - -# =================================== - - -from math import ceil -from os.path import join -from tempfile import mkstemp -from typing import Iterable, Collection, Dict, Tuple, Callable - -import numpy as np -from biosppy.signals.tools import power_spectrum -from dateutil.parser import parse as to_datetime -from ..biosignals import Event -from units import Unit -from numpy import array, append, ndarray, divide, concatenate, tile, memmap -from scipy.signal import resample - - -class Frequency(float): - - def __init__(self, value:float): - self.value = float(value) - - def __str__(self): - return str(self.value) + ' Hz' - - def __repr__(self): - return self.__str__() - - def __eq__(self, other): - if isinstance(other, float): - return other == self.value - elif isinstance(other, Frequency): - return other.value == self.value - - def __float__(self): - return self.value - - def __copy__(self): - return Frequency(self.value) - - -from datetime import datetime, timedelta -from functools import reduce -from typing import Sequence, List - -import matplotlib.pyplot as plt -from datetimerange import DateTimeRange -from matplotlib import cm -from matplotlib.dates import date2num -from matplotlib.lines import Line2D -from matplotlib.patches import Rectangle - - -class Timeline(): - - class Group(): - - def __init__(self, intervals: Sequence[DateTimeRange] = [], points: Sequence[datetime] = [], name: str = None, color_hex: str = None): - self.intervals = list(intervals) - self.points = list(points) - self.name = name - self.color_hex = color_hex - - def __repr__(self): - res = '' - if 0 < len(self.intervals): - if len(self.intervals) < 10: - res += ' U '.join(['[' + str(interval) + '[' for interval in self.intervals]) - else: - res += f'{len(self.intervals)} intervals with {self.duration} of total duration' - if 0 < len(self.points): - if len(self.points) < 10: - res += '\nand the following timepoints:\n' - res += ', '.join(['[' + str(point) + '[' for point in self.points]) - else: - res += f'\nand {len(self.points)} timepoints.\n' - return res - - @property - def initial_datetime(self) -> datetime: - return min([interval.start_datetime for interval in self.intervals] + self.points) - - @property - def final_datetime(self) -> datetime: - return max([interval.end_datetime for interval in self.intervals] + self.points) - - @property - def duration(self) -> timedelta: - return sum([interval.timedelta for interval in self.intervals], timedelta()) - - @property - def has_only_intervals(self) -> bool: - return len(self.intervals) > 0 and len(self.points) == 0 - - @property - def has_only_points(self) -> bool: - return len(self.intervals) == 0 and len(self.points) > 0 - - def _as_index(self) -> tuple: - if self.has_only_intervals: - return tuple(self.intervals) - if self.has_only_points: - return tuple(self.points) - return None - - def __init__(self, *groups: Group, name: str = None): - self.groups = list(groups) - self.__name = name - - @property - def name(self): - return self.__name if self.__name else "No Name" - - @name.setter - def name(self, name: str): - self.__name = name - - def __repr__(self): - if len(self.groups) == 1: - return repr(self.groups[0]) - else: - res = '' - for g in self.groups: - res += f'\nGroup {g}\n' - res += repr(g) - return res - - def __and__(self, other): - if isinstance(other, Timeline): - groups = [] - groups += self.groups - groups += other.groups - group_names = [g.name for g in groups] - if len(set(group_names)) != len(group_names): - raise NameError('Cannot join Timelines with groups with the same names.') - return Timeline(*groups, name = self.name + " and " + other.name) - - @property - def initial_datetime(self) -> datetime: - return min([g.initial_datetime for g in self.groups]) - - @property - def final_datetime(self) -> datetime: - return max([g.final_datetime for g in self.groups]) - - @property - def has_single_group(self) -> bool: - return len(self.groups) == 1 - - @property - def single_group(self) -> Group: - return self.groups[0] if self.has_single_group else None - - @property - def duration(self) -> timedelta: - if len(self.groups) == 1: - return self.groups[0].duration - else: - return NotImplementedError() - - @property - def is_index(self) -> bool: - """ - Returns whether or not this can serve as an index to a Biosignal. - A Timeline can be an index when: - - It only contains one interval or a union of intervals (serves as a subdomain) - - It only contains one point or a set of points (serves as set of objects) - """ - return len(self.groups) == 1 and (self.groups[0].has_only_intervals ^ self.groups[0].has_only_points) - - def _as_index(self) -> tuple | None: - if self.is_index: - return self.groups[0]._as_index() - - def plot(self, show:bool=True, save_to:str=None): - fig = plt.figure(figsize=(len(self.groups)*10, len(self.groups)*2)) - ax = plt.gca() - legend_elements = [] - - cmap = cm.get_cmap('tab20b') - for y, g in enumerate(self.groups): - color = g.color_hex - if color is None: - color = cmap(y/len(self.groups)) - - for interval in g.intervals: - start = date2num(interval.start_datetime) - end = date2num(interval.end_datetime) - rect = Rectangle((start, y + 0.4), end - start, 0.4, facecolor=color, alpha=0.5) - ax.add_patch(rect) - - for point in g.points: - ax.scatter(date2num(point), y + 0.95, color=color, alpha=0.5, marker = 'o', markersize=10) - - if len(self.groups) > 1: - legend_elements.append(Line2D([0], [0], marker='o', color=color, label=g.name, markerfacecolor='g', markersize=10)) - - ax.set_xlim(date2num(self.initial_datetime), date2num(self.final_datetime)) - ax.set_ylim(0, len(self.groups)) - ax.get_yaxis().set_visible(False) - for pos in ['right', 'top', 'left']: - plt.gca().spines[pos].set_visible(False) - ax.xaxis_date() - fig.autofmt_xdate() - - if len(self.groups) > 1: - ax.legend(handles=legend_elements, loc='center') - - if self.name: - fig.suptitle(self.name, fontsize=11) - fig.tight_layout() - if save_to is not None: - fig.savefig(save_to) - plt.show() if show else plt.close() - - def _repr_png_(self): - self.plot() - - @classmethod - def union(cls, *timelines): - # Check input - if not all(isinstance(tl, Timeline) for tl in timelines): - raise TypeError("Give objects Timeline to Timeline.union.") - if len(timelines) < 2: - raise ValueError("Give at least 2 Timelines to compute their union.") - - # Get sets of intervals of each Timeline - tl_intervals = [] - for i, tl in enumerate(timelines): - if tl.has_single_group and tl.single_group.has_only_intervals: - tl_intervals.append(tl.single_group.intervals) - else: - raise AssertionError(f"The {i+1}th Timeline does not have a single group with only intervals.") - - # Binary function - def union_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): - intervals = intervals1 + intervals2 - intervals.sort(key=lambda x: x.start_datetime) - union = [intervals[0]] - for i in range(1, len(intervals)): - if union[-1].end_datetime >= intervals[i].start_datetime: - union[-1].set_end_datetime(max(union[-1].end_datetime, intervals[i].end_datetime)) - else: - union.append(intervals[i]) - return union - - res_intervals = reduce(union_of_two_timelines, tl_intervals) - return Timeline(Timeline.Group(res_intervals), name=f"Union of " + ', '.join(tl.name for tl in timelines)) - - @classmethod - def intersection(cls, *timelines): - # Check input - if not all(isinstance(tl, Timeline) for tl in timelines): - raise TypeError("Give objects Timeline to Timeline.union.") - if len(timelines) < 2: - raise ValueError("Give at least 2 Timelines to compute their union.") - - # Get sets of intervals of each Timeline - tl_intervals = [] - for i, tl in enumerate(timelines): - if tl.has_single_group and tl.single_group.has_only_intervals: - tl_intervals.append(tl.single_group.intervals) - else: - raise AssertionError(f"The {i + 1}th Timeline does not have a single group with only intervals.") - - # Binary function - def intersection_of_two_timelines(intervals1: List[DateTimeRange], intervals2: List[DateTimeRange]): - intervals1.sort(key=lambda x: x.start) - intervals2.sort(key=lambda x: x.start) - - intersection = [] - i, j = 0, 0 - while i < len(intervals1) and j < len(intervals2): - if intervals1[i].end_datetime <= intervals2[j].start_datetime: - i += 1 - elif intervals2[j].end_datetime <= intervals1[i].start_datetime: - j += 1 - else: - start = max(intervals1[i].start_datetime, intervals2[j].start_datetime) - end = min(intervals1[i].end_datetime, intervals2[j].end_datetime) - intersection.append(DateTimeRange(start, end)) - if intervals1[i].end_datetime <= intervals2[j].end_datetime: - i += 1 - else: - j += 1 - - return intersection - - res_intervals = reduce(intersection_of_two_timelines, tl_intervals) - return Timeline(Timeline.Group(res_intervals), name=f"Intersection of " + ', '.join(tl.name for tl in timelines)) - - EXTENSION = '.timeline' - - def save(self, save_to: str): - # Check extension - if not save_to.endswith(Timeline.EXTENSION): - save_to += Biosignal.EXTENSION - # Write - from _pickle import dump - with open(save_to, 'wb') as f: - dump(self, f) - - @classmethod - def load(cls, filepath: str): - # Check extension - if not filepath.endswith(Timeline.EXTENSION): - raise IOError("Only .timeline files are allowed.") - - # Read - from _pickle import load - with open(filepath, 'rb') as f: - timeline = load(f) - return timeline - - - -class Timeseries(): - """ - A Timeseries is a sequence of data points that occur in successive order over some period of time. - In a Biosignal, one Timeseries' data points are the measurement of a biological variable, in some unit, taken from a - sensor or channel. This data points are often called samples, and are acquired at fixed sampling frequency. - - To each time point of a Timeseries' domain corresponds one and only one sample. However, a Timeseries might be - contiguous if a sample was acquired at every sampling time point, or discontiguous if there were interruptions. Each - interval/sequence of contiguous samples is called a Segment, but those are managed internally. - - Constructors / Initializers - ______________ - - Timeseries: default - Instantiates a Timeseries with a contiguous sequence of samples. - - Timeseries.withDiscontiguousSegments - Instantiates a Timeseries with discontiguous sequences of samples. - - - Properties: - ______________ - - name: str - The name of the Timeseries, if any. - - samples: array # FIXME - Contiguous or discontiguous sequence of samples. - - sampling_frequency: float - The frequency at which the samples were acquired, in Hz. - - units: Unit - The physical unit at which the samples should be interpreted. - - events: tuple[Event] - The events timely associated to the Timeseries. - - initial_datetime: datetime - The date and time of the first sample. - - final_datetime: datetime - The date and time of the last sample. - - duration: timedelta - The total time of acquired samples, excluding interruptions. - - domain: tuple[DateTimeRange] - The intervals of date and time in which the Timeseries is defined, i.e., samples were acquired. - - is_equally_segmented: bool - The logic value stating if each interval in the domain has the same duration. - - segment_duration: timedelta: - Duration of all segments, if is_equally_segmented is True. - - - Built-ins: - ______________ - - len: Returns the total number of samples. - - copy: Copies all Timeseries' content. - - iter: Returns an iterator over the samples of all Timeseries' Segments. - - in: Returns True if a date, time or event is contained in the Timeseries. - - [] : Indexes by date, time or events. - - + : Adds Timeseries. - - += : Appends more samples to the last Timeseries' Segment. - - Methods: - ______________ - - append(datetime, array): - Appends a new sequence of samples in a separate Segment. - - associate(Event): - Timely associates a given Event to the Timeseries. - - dissociate(str): - Removes any association the Timeseries has with an Event with the given name. - - filter(Filter): - Filters the Timeseries with the given design. - - undo_filters(): - Reverts the effect of all filters. - - plot(): - Plots the Timeseries amplitude over time, with all its interruptions, if any. - - plot(): - Plots the Timeseries frequency spectrum. - - ______________ - - Full documentation in: - https://github.com/jomy-kk/IT-LongTermBiosignals/wiki/%5BClass%5D-Timeseries - """ - - __SERIALVERSION: int = 2 - - # =================================== - # Class: Segment - - class __Segment(): - """ - A Segment is an interrupted sequence of samples. - This is an internal class of Timeseries, for its internal management, and should not be used outside Timeseries. - - Properties: - ______________ - - samples: array - Sequence of samples, acquired at a fixed sampling rate. - - raw_samples: array - Original samples, without application of filters or other operations in-place. - - initial_datetime: datetime - The date and time of the first sample. - - final_datetime: datetime - The date and time of the last sample. - - duration: timedeltao - The total time of acquired samples, i.e., final_datetime - initial_datetime. - - is_filtered: bool - The logic value stating if the samples have been filtered. - - - Built-ins: - ______________ - - len: Returns the total number of samples. - - copy: Copies all Timeseries' content. - - in: Returns True if a date, time or event is contained in the Timeseries. - - [] : Indexes by sample index. - - += : Appends more samples. - - <, >, <=, >= : State if onw Timeseries come before another. - - - Methods: - ______________ - - adjacent(Segment): - Returns whether two Segments are adjacent in time. - - overlap(Segment): - Returns whether two Segments overlap in time. - - """ - - __SERIALVERSION: int = 2 - - def __init__(self, samples: ndarray, initial_datetime: datetime, sampling_frequency: Frequency, - is_filtered: bool = False): - """ - A Segment is an uninterrupted sequence of samples. - - Parameters - ------------ - samples: ndarray - The samples to store. - - initial_datetime: datetime - The date and time of the first sample. - - sampling_frequency: Frequency - Reference to the sampling frequency object of the respective Timeseries. - - is_filtered: bool - If samples have been filtered. - """ - - # Save metadata - self.__initial_datetime = initial_datetime - self.__final_datetime = self.initial_datetime + timedelta(seconds=len(samples) / sampling_frequency) - self.__raw_samples = samples # if some filter is applied to a Timeseries, the raw version of each Segment should be saved here - self.__is_filtered = is_filtered - self.__sampling_frequency = sampling_frequency - - # Save samples - self.__samples = samples - """ - if not isinstance(samples, memmap): - # Create a memory map for the array - file_name = str(hash(self.__initial_datetime) * hash(self.__final_datetime) * hash(len(samples))) - self.__filepath = join(__temp__.name, file_name) - self.__samples = memmap(self.__filepath, dtype='float32', mode='w+', shape=samples.shape) - self.__samples[:] = samples[:] - self.__samples.flush() # release memory in RAM; don't know if this is actually helping - del samples # delete np.array - else: - self.__samples = samples - """ - - # =================================== - # Properties - - @property - def samples(self) -> array: - return self.__samples - - @property - def raw_samples(self) -> array: - return self.__raw_samples - - @property - def initial_datetime(self) -> datetime: - return self.__initial_datetime - - @property - def final_datetime(self) -> datetime: - return self.__final_datetime - - @property - def domain(self) -> DateTimeRange: - return DateTimeRange(self.__initial_datetime, self.__final_datetime) - - @property - def duration(self) -> timedelta: - return self.__final_datetime - self.__initial_datetime - - @property - def is_filtered(self) -> bool: - return self.__is_filtered - - # =================================== - # Built-ins - - def __len__(self): - return len(self.__samples) - - def __rshift__(self, other: ndarray | list): - if isinstance(other, type(self)): # if it's Segment - if other.final_datetime == self.initial_datetime and other._Segment__sampling_frequency == self.__sampling_frequency: - self.__samples = append(self.__samples, other.samples) - if self.is_filtered and other.is_filtered: - self.__raw_samples = append(self.__samples, other.raw_samples) - self.__final_datetime = other.final_datetime - else: - raise AssertionError("Segments cannot be concatenated.") - else: - self.__samples = append(self.__samples, other) - self.__final_datetime += timedelta(seconds= len(other)/self.__sampling_frequency) - - def __add__(self, other): - if isinstance(other, type(self)): # if it is a Segment - return self._new(samples = self.samples + other.samples, is_filtered=False) # raw is lost - - def __contains__(self, item): # Operand 'in' === belongs to - if isinstance(item, datetime): - return self.initial_datetime <= item < self.final_datetime - if isinstance(item, DateTimeRange): - return item in self.domain - if isinstance(item, type(self)): # item is a Segment - # A Segment contains other Segment if its start is less than the other's and its end is greater than the other's. - return self.initial_datetime < item.initial_datetime and self.final_datetime > item.final_datetime - - def __getitem__(self, item): - '''The built-in slicing and indexing (segment[x:y]) operations.''' - if isinstance(item, tuple): - return [self[k] for k in item] - if isinstance(item, int): - return self.__samples[item] - elif isinstance(item, slice): - if item.start is None: - new_initial_datetime = self.__initial_datetime - else: - new_initial_datetime = self.__initial_datetime + timedelta( - seconds=item.start / self.__sampling_frequency.value) - return self._new(samples=self.__samples[item], initial_datetime=new_initial_datetime, - raw_samples=self.__raw_samples[item]) - - def sliding_window(self, window_length: int): - assert window_length > 0 - for i in range(0, len(self.__samples), window_length): - yield self.__samples[i: i + window_length] - - # =================================== - # Amplitude methods - - def max(self): - return np.max(self.__samples) - - def min(self): - return np.min(self.__samples) - - # =================================== - # Binary Logic using Time and Conditions - - def __lt__(self, other): - """A Segment comes before other Segment if its end is less than the other's start.""" - if isinstance(other, Timeseries._Timeseries__Segment): - return self.final_datetime < other.initial_datetime - else: - return tuple(self.__when(self.__samples < other)) - - def __le__(self, other): - if isinstance(other, Timeseries._Timeseries__Segment): - return self.final_datetime <= other.initial_datetime - else: - return tuple(self.__when(self.__samples <= other)) - - def __gt__(self, other): - """A Segment comes after other Segment if its start is greater than the other's end.""" - if isinstance(other, Timeseries._Timeseries__Segment): - return self.initial_datetime > other.final_datetime - else: - return tuple(self.__when(self.__samples > other)) - - def __ge__(self, other): - if isinstance(other, Timeseries._Timeseries__Segment): - return self.initial_datetime >= other.final_datetime - else: - return tuple(self.__when(self.__samples >= other)) - - def __eq__(self, other): - """A Segment corresponds to the same time period than other Segment if their start and end are equal.""" - if isinstance(other, Timeseries._Timeseries__Segment): - return self.initial_datetime == other.initial_datetime and self.final_datetime == other.final_datetime - else: - return tuple(self.__when(self.__samples == other)) - - def __ne__(self, other): - if isinstance(other, Timeseries._Timeseries__Segment): - return not self.__eq__(other) - else: - return tuple(self.__when(self.__samples != other)) - - def overlaps(self, other): - """A Segment overlaps other Segment if its end comes after the other's start, or its start comes before the others' end, or vice versa.""" - if self <= other: - return self.final_datetime > other.initial_datetime - else: - return self.initial_datetime < other.final_datetime - - def adjacent(self, other): - """Returns True if the Segments' start or end touch.""" - return self.final_datetime == other.initial_datetime or self.initial_datetime == other.final_datetime - - @staticmethod - def __when(condition): - intervals = [] - true_interval = False - start, end = None, None - - for i, x in enumerate(condition): - if x: - if not true_interval: # not open - true_interval = True # then open - start = i - else: - if true_interval: # is open - true_interval = False - end = i - intervals.append((start, end)) # close interval - - if true_interval: # is open - intervals.append((start, i+1)) # then close - - return intervals - - def _when(self, condition, window_length: int = 1): - assert window_length > 0 - if window_length == 1: - evaluated = [condition(x) for x in self.__samples] - else: - evaluated = [] - for i in range(0, len(self.__samples), window_length): - x = self.__samples[i: i+window_length] - evaluated += [condition(x), ] * len(x) - return self.__when(evaluated) - - # =================================== - # INTERNAL USAGE - Accept Methods - - # General-purpose - - def _apply_operation(self, operation, **kwargs): - """ - Protected Access: For use of this module. - Applies operation in-place to its samples. - """ - self.__samples = operation(self.__samples, **kwargs) - - def _apply_operation_and_return(self, operation, **kwargs): - """ - Protected Access: For use of this module. - Applies operation to a copy of its samples and returns the output. - """ - return operation(self.__samples.copy(), **kwargs) - - # Purpose-specific - - def _accept_filtering(self, filter_design): - """ - Protected Access: For use of this module. - Applies a filter to its samples, given a design. - """ - res = filter_design._visit(self.__samples) # replace with filtered samples - self.__samples = res - self.__is_filtered = True - - def _restore_raw(self): - """ - Protected Access: For use of this module. - Restores the raw samples. - """ - if self.is_filtered: - self.__samples = self.__raw_samples - self.__is_filtered = False - - def _resample(self, new_frequency: Frequency): - """ - Protected Access: For use of this module. - Resamples the samples to a new sampling frequency. - """ - n_samples = int(new_frequency * len(self) / self.__sampling_frequency) - self.__samples = resample(self.__samples, num=n_samples) - self.__raw_samples = resample(self.__raw_samples, num=n_samples) - self.__sampling_frequency = new_frequency - self.__final_datetime = self.initial_datetime + timedelta(seconds=len(self) / new_frequency.value) - - # =================================== - # INTERNAL USAGE - Make similar copies or itself - - def __copy__(self): - """ Creates an exact copy of the Segment contents and returns the new object. """ - new = type(self)(self.samples.copy(), self.initial_datetime, self.__sampling_frequency.__copy__(), - self.is_filtered) - new._Segment__raw_samples = self.__raw_samples - return new - - def _new(self, samples: array = None, initial_datetime: datetime = None, sampling_frequency: Frequency = None, - is_filtered: bool = False, raw_samples: array = None): - """ - Protected Access: For use of this module. - - Creates a similar copy of the Segment's contents and returns the new object. - The value of any field can be changed, when explicitly given a new value for it. All others will be copied. - - :param samples: Different samples. Optional. - :param initial_datetime: A different date and time of the first sample. Optional. - :param sampling_frequency: A different sampling frequency of the samples. Optional. - :param is_filtered: Alter the filtered state. Optional. - :param raw_samples: Different raw samples. Optional. - - Note: If none of these parameters is given, this method is equivalent to '__copy__'. - - :return: A new Segment with the given fields changed. All other contents shall remain the same. - :rtype: Segment - """ - samples = self.__samples if samples is None else samples - initial_datetime = self.__initial_datetime if initial_datetime is None else initial_datetime - sampling_frequency = self.__sampling_frequency if sampling_frequency is None else sampling_frequency - is_filtered = self.__is_filtered if is_filtered is None else is_filtered - raw_samples = self.__raw_samples if raw_samples is None else raw_samples - - new = type(self)(samples, initial_datetime, sampling_frequency, is_filtered) - new._Segment__raw_samples = raw_samples - return new - - def _apply_operation_and_new(self, operation: Callable, initial_datetime: datetime = None, - sampling_frequency: Frequency = None, **kwargs): - """ - Protected Access: For use of this module. - - Similarly to '_apply_operation', it applies 'operation' but saves the resulting samples in a new Segment, - which is returned. - - :param operation: A procedures to be executed over the samples. Its First argument must expect a ndarray. - :param initial_datetime: A different date and time the first sample might have after the operation. - :param sampling_frequency: A different sampling frequency the samples might have after the operation. - :param kwargs: Additional arguments to pass when calling 'operation'. - - :return: A new Segment with the samples outputted by the operation. All other contents shall remain the same, - except for initial_datetime and sampling_frequency if new values were given. - :rtype: Segment - """ - samples = operation(self.__samples.copy(), **kwargs) - return self._new(samples, initial_datetime=initial_datetime, sampling_frequency=sampling_frequency) - - def _partition(self, individual_length: int, overlap_length: int = 0): - """ - Protected Access: For use of this module. - - Splits one Segment into many Segments of equal length. Overlaps can be enforced. - - :param individual_length: Length of each resulting Segment. - :param overlap_length: Overlap length between the resulting Segments. - - :return: A list of new Segments with the original samples distributed. All other properties shall remain the same, - except for initial_datetime which changes for each new Segment. - :rtype: list - """ - - res = [] - - step = individual_length - overlap_length - for i in range(0, len(self), step): - trimmed_samples = self.__samples[i: i + individual_length] - trimmed_raw_samples = self.__raw_samples[i: i + individual_length] - res.append(self._new(samples=trimmed_samples, raw_samples=trimmed_raw_samples, - initial_datetime=self.__initial_datetime + timedelta(seconds=i/self.__sampling_frequency))) - - return res - - @classmethod - def _merge(cls, *segments): - """ - It's assummed `segments` is timely ordered and are all of the same sampling frequency. - """ - if len(segments) == 1: - return segments[0] - else: - try: - samples = concatenate([seg.samples for seg in segments]) - except Exception as e: - pass - initial_datetime = segments[0].initial_datetime - sampling_frequency = segments[0]._Segment__sampling_frequency - return Timeseries._Timeseries__Segment(samples, initial_datetime, sampling_frequency) - - # =================================== - # SERIALIZATION - - def _memory_map(self, path): - if not isinstance(self.__samples, memmap): # Create a memory map for the array - _, file_name = mkstemp(dir=path, suffix='.segment') - filepath = join(path, file_name) - self.__memory_map = memmap(filepath, dtype='float32', mode='r+', shape=self.__samples.shape) - self.__memory_map[:] = self.__samples[:] - self.__memory_map.flush() # release memory in RAM; don't know if this is actually helping - - def __hash__(self): - return hash(self.__initial_datetime) * hash(self.__final_datetime) * hash(self.__samples) - - def __getstate__(self): - """ - 1: __initial_datetime (datetime) - 2: __samples (ndarray) - """ - if isinstance(self.__samples, memmap): # Case: has been saved as .biosignal before - return (Timeseries._Timeseries__Segment._Segment__SERIALVERSION, self.__initial_datetime, self.__samples) - elif hasattr(self, '_Segment__memory_map'): # Case: being saved as .biosignal for the first time - return (Timeseries._Timeseries__Segment._Segment__SERIALVERSION, self.__initial_datetime, self.__memory_map) - else: # Case: being called by deepcopy - return (Timeseries._Timeseries__Segment._Segment__SERIALVERSION, self.__initial_datetime, self.__samples) - - def __setstate__(self, state): - """ - Version 1 and 2: - 1: __initial_datetime (datetime) - 2: __samples (ndarray) - 3: __sampling_frequency (Frequency) - """ - if state[0] == 1 or state[0] == 2: - self.__initial_datetime, self.__samples, self.__sampling_frequency = state[1], state[2], state[3] - self.__final_datetime = self.initial_datetime + timedelta(seconds=len(self.__samples) / self.__sampling_frequency) - self.__is_filtered = False - self.__raw_samples = self.__samples - else: - raise IOError( - f'Version of Segment object not supported. Serialized version: {state[0]}; ' - f'Supported versions: 1, 2.') - - - # =================================== - # Class: Timeseries - - def __init__(self, samples: ndarray | list | tuple, initial_datetime: datetime, sampling_frequency: float, - units: Unit = None, name: str = None): - """ - Give a sequence of contiguous samples, i.e. without interruptions, and the datetime of the first sample. - If there are interruptions, append the remaining segments using the 'append' method. - It also receives the sampling frequency of the samples. - Additionally, it can receive the samples' units and a name, if needed. - - Parameters - ------------ - samples: ndarray | list | tuple - The samples to store, without interruptions. - - initial_datetime: datetime - The date and time of the first sample. - - sampling_frequency: float | Frequency - The frequency at which the samples where sampled. - - units: Unit - The physical units of the variable measured. - - name: str - A symbolic name for the Timeseries. It is mentioned in plots, reports, error messages, etc. - """ - _sampling_frequency = sampling_frequency if isinstance(sampling_frequency, Frequency) else Frequency( - sampling_frequency) - - # Shortcut: Check if being copied - if isinstance(samples, list) and isinstance(samples[0], Timeseries.__Segment): - self.__segments = samples - - else: - # Creat first Segment - samples = array(samples) if not isinstance(samples, ndarray) else samples - segment = Timeseries.__Segment(samples, initial_datetime, _sampling_frequency) - self.__segments = [segment, ] - - # Metadata - self.__sampling_frequency = _sampling_frequency - self.__units = units - self.__name = name - self.__associated_events = {} - self.__tags:set[str] = set() - - - # Control Flags - self.__is_equally_segmented = True # Because there's only 1 Segment - - @classmethod - def withDiscontiguousSegments(cls, segments_by_time: Dict[datetime, ndarray | list | tuple], - sampling_frequency: float, units: Unit = None, name: str = None): - """ - Give a dictionary of discontiguous sequences of samples, keyed by their initial date and time. - It also receives the sampling frequency of the samples. - Additionally, it can receive the samples' units and a name, if needed. - - Parameters - ------------ - samples: dict [datetime, ndarray | list | tuple] - The sequence of samples to store as separate Segments, in the format { datetime: [, ... ], ... }. - - initial_datetime: datetime - The date and time of the first sample. - - sampling_frequency: float | Frequency - The frequency at which the samples where sampled. - - units: Unit - The physical units of the variable measured. - - name: str - A symbolic name for the Timeseries. It is mentioned in plots, reports, error messages, etc. - """ - - if len(segments_by_time) < 2: - raise TypeError("Use the regular initializer to instantiate a Timeseries with 1 contiguous segment.") - - # Sort the segments - ordered_arrays = sorted(segments_by_time.items()) # E.g. [ (datetime, array), (.., ..), .. ] - - # Create Timeseries with the first Segment - initial_datetime, first_array = ordered_arrays[0] - new = cls(first_array, initial_datetime, sampling_frequency, units, name) - - # Append the remaining Segments - for datetime, array in ordered_arrays[1:]: - new.append(datetime, array) - - return new - - # =================================== - # Properties - - @property - def segments(self) -> list: # FIXME: deprecate this - return self.__segments - - @property - def samples(self) -> list | ndarray: - if len(self.__segments) == 1: - return self.__segments[0].samples#.copy() - else: - #return [segment.samples.copy() for segment in self.__segments] - return [segment.samples for segment in self.__segments] - - @property - def initial_datetime(self) -> datetime: - """The date and time of the first sample.""" - return self.__segments[0].initial_datetime # Is the initial datetime of the first Segment. - - @property - def final_datetime(self) -> datetime: - """The date and time of the last sample.""" - return self.__segments[-1].final_datetime # Is the final datetime of the last Segment. - - @property - def domain(self) -> Tuple[DateTimeRange]: - """The intervals of date and time in which the Timeseries is defined, i.e., samples were acquired.""" - return tuple([DateTimeRange(segment.initial_datetime, segment.final_datetime) for segment in self]) - - @property - def subdomains(self) -> Tuple[DateTimeRange]: - return self.domain - - @property - def duration(self) -> timedelta: - """ returns actual recorded time without interruptions - """ - total_time = timedelta(seconds=0) - for segment in self: - total_time += segment.duration - return total_time - - @property - def sampling_frequency(self) -> float: - """The frequency at which the samples were acquired, in Hz.""" - return float(self.__sampling_frequency) - - @property - def units(self): - """The physical unit at which the samples should be interpreted.""" - return self.__units - - @property - def name(self): - """The name of the Timeseries, if any.""" - return self.__name if self.__name != None else "No Name" - - @name.setter - def name(self, name: str): - """Set or reset a name for the Timeseries.""" - self.__name = name - - @property - def is_contiguous(self) -> bool: - """The logic value stating if there are no interruptions in time.""" - return len(self.__segments) == 1 - - @property - def is_equally_segmented(self) -> bool: - """The logic value stating if each interval in the domain has the same duration.""" - return self.__is_equally_segmented - - @property - def segment_duration(self) -> timedelta: - """Duration of segments, if equally segmented.""" - if not self.is_equally_segmented: - raise AttributeError("There is no segment duration because this Timeseries is not equally segmented.") - else: - return self.__segments[0].duration - - @property - def segment_length(self) -> int: - """Number of samples of segments, if equally segmented.""" - if not self.is_equally_segmented: - raise AttributeError("There is no segment length because this Timeseries is not equally segmented.") - else: - return len(self.__segments[0]) - - @property - def n_segments(self) -> int: - """The number of uninterrupted segments.""" - return len(self.__segments) - - @property - def events(self) -> Tuple[Event]: - """The events timely associated to the Timeseries, timely ordered.""" - return tuple(sorted(self.__associated_events.values())) - - @property - def tags(self) -> tuple[str]: - return tuple(self.__tags) - - # =================================== - # Built-ins - - def __len__(self): - return sum([len(seg) for seg in self.__segments]) - - def __iter__(self) -> Iterable: - return self.__segments.__iter__() - - def __contains__(self, item): - '''Checks if event occurs in Timeseries.''' - if isinstance(item, str): - return item in self.__associated_events - elif isinstance(item, (datetime, DateTimeRange)): - return any([item in segment for segment in self.__segments]) - - def __getitem__(self, item): - '''The built-in slicing and indexing ([x:y]) operations.''' - - if isinstance(item, tuple): - if isinstance(item[0], (datetime, str)): - res = list() - for timepoint in item: - if isinstance(timepoint, datetime): - res.append(self.__get_sample(timepoint)) - elif isinstance(timepoint, str): - res.append(self.__get_sample(to_datetime(timepoint))) - else: - raise IndexError("Index types not supported. Give a tuple of datetimes (can be in string format).") - return tuple(res) - - if isinstance(item[0], DateTimeRange): # This is not publicly documented. Only Biosignal sends a tuple of DateTimeRanges, when it is dealing with Timelines. - segments = [] - for i, interval in enumerate(item): - if i == 193: - pass - try: - x = self.__get_samples(interval.start_datetime, interval.end_datetime) - if x is None: - raise AssertionError(f"x is None for interval {interval}") - segments += x - print(f"Indexed interval {i}") - except IndexError: # one interval was outside of boundaries - pass # there's no problem - if len(segments) == 0: - raise IndexError("All intervals given are outside of the Timeseries domain.") - return self.__new(segments) - - if isinstance(item, int): - return self.__segments[item].samples - - if isinstance(item, datetime): - return self.__get_sample(item) - - if isinstance(item, str): - return self.__get_sample(to_datetime(item)) - - if isinstance(item, slice): - if item.step is not None: - raise IndexError("Indexing with step is not allowed for Timeseries. Try resampling it first.") - initial = to_datetime(item.start) if isinstance(item.start, str) else self.initial_datetime if item.start is None else item.start - final = to_datetime(item.stop) if isinstance(item.stop, str) else self.final_datetime if item.stop is None else item.stop - if isinstance(initial, datetime) and isinstance(final, datetime): - return self.__new(segments=self.__get_samples(initial, final)) - else: - raise IndexError("Index types not supported. Give a slice of datetimes (can be in string format).") - - if isinstance(item, DateTimeRange): # This is not publicly documented. Only Biosignal sends DateTimeRanges, when it is dealing with Events. - # First, trim the start and end limits of the interval. - start, end = None, None - for subdomain in self.domain: # ordered subdomains - if subdomain.is_intersection(item): - intersection = subdomain.intersection(item) - if start is None: - start = intersection.start_datetime - end = intersection.end_datetime - elif start is not None: # if there's no intersection with further subdomains and start was already found... - break # ... then, the end was already reached - if start is None and end is None: - return None - else: - return self[start:end] - - raise IndexError( - "Index types not supported. Give a datetime (can be in string format), a slice or a tuple of those.") - - def __add__(self, other): - """The built-in + operation that adds sample-by-sample two Timeseries.""" - # Check errors - if not isinstance(other, Timeseries): - raise TypeError("Trying to add an object of type {}. Expected type: Timeseries.".format(type(other))) - if other.sampling_frequency != self.__sampling_frequency: - raise ArithmeticError("Both Timeseries must have the same sampling frequency ({} and {}).".format( - self.__sampling_frequency, other.sampling_frequency)) - if other.units is not None and self.__units is not None and other.units != self.__units: - raise ArithmeticError( - "Both Timeseries must have the same units ({} and {}).".format(self.__units, other.units)) - if self.domain != other.domain: - raise ArithmeticError("Timeseries to add must have the same domain.") - - # Perform addition - new_segments = [] - for x, y in zip(self.__segments, other.segments): - new_segments.append(x + y) - - return self.__new(segments=new_segments, units=self.units if self.__units is not None else other.units, - name=self.name + ' + ' + other.name if self.name != other.name else self.name) - - def __rshift__(self, other): - """The built-in >> operation that concatenates two Timeseries.""" - if isinstance(other, Timeseries): - if other.initial_datetime < self.final_datetime: - raise ArithmeticError( - "The second Timeseries must start after the first one ends ({} + {}).".format(self.initial_datetime, - other.final_datetime)) - if other.sampling_frequency != self.__sampling_frequency: - raise ArithmeticError("Both Timeseries must have the same sampling frequency ({} and {}).".format( - self.__sampling_frequency, other.sampling_frequency)) - if other.units is not None and self.__units is not None and other.units != self.__units: - raise ArithmeticError( - "Both Timeseries must have the same units ({} and {}).".format(self.__units, other.units)) - new_segments = self.__segments + other.segments # concatenate lists - return self.__new(segments=new_segments, units=self.units if self.__units is not None else other.units, - name=self.name + ' >> ' + other.name if self.name != other.name else self.name) - - raise TypeError("Trying to concatenate an object of type {}. Expected type: Timeseries.".format(type(other))) - - # =================================== - # Binary Logic using Time and Conditions - - def __lt__(self, other): - if isinstance(other, Timeseries): - return self.final_datetime < other.initial_datetime - else: - return self._indices_to_timepoints([seg < other for seg in self.__segments]) - - def __le__(self, other): - if isinstance(other, Timeseries): - return self.final_datetime <= other.initial_datetime - else: - return self._indices_to_timepoints(np.concatenate([seg <= other for seg in self.__segments]), by_segment=False) - - def __gt__(self, other): - if isinstance(other, Timeseries): - return self.initial_datetime > other.final_datetime - else: - return self._indices_to_timepoints([seg > other for seg in self.__segments]) - - def __ge__(self, other): - if isinstance(other, Timeseries): - return self.initial_datetime >= other.final_datetime - else: - return self._indices_to_timepoints(np.concatenate([seg >= other for seg in self.__segments]), by_segment=False) - - def __eq__(self, other): - if isinstance(other, Timeseries): - return self.initial_datetime == other.initial_datetime and self.final_datetime == other.final_datetime - else: - return self._indices_to_timepoints(np.concatenate([seg == other for seg in self.__segments]), by_segment=False) - - def __ne__(self, other): - if isinstance(other, Timeseries): - return not self.__eq__(other) - else: - return self._indices_to_timepoints(np.concatenate([seg != other for seg in self.__segments]), by_segment=False) - - def _when(self, condition, window: timedelta): - if window is not None: - window_length = int(window.total_seconds() * self.__sampling_frequency) - x = [seg._when(condition, window_length) for seg in self.__segments] - else: - x = [seg._when(condition) for seg in self.__segments] - return self._indices_to_timepoints(x, by_segment=False) - - # =================================== - # Methods - - def max(self): - """Returns the maximum aplitude value of the Timeseries.""" - return max([seg.max() for seg in self.__segments]) - - def min(self): - """Returns the minimum aplitude value of the Timeseries.""" - return min([seg.min() for seg in self.__segments]) - - def overlap(self, other) -> Tuple[DateTimeRange]: - if isinstance(other, Timeseries): - domain1:Tuple[DateTimeRange] = self.domain - domain2:Tuple[DateTimeRange] = other.domain - - elif isinstance(other, tuple) and all(isinstance(x, DateTimeRange) for x in other): - domain1: Tuple[DateTimeRange] = self.domain - domain2: Tuple[DateTimeRange] = other - - else: - raise TypeError("Overlap method must be used with another Timeseries or a union of intervals (Tuple[DateTimeRange]).") - - intersections = [] # Union of the intervals fom both Timeseries that intersect - for interval1 in domain1: - for interval2 in domain2: - if interval1.is_intersection(interval2): - intersections.append(interval1.intersection(interval2)) - - return tuple(intersections) - - def append(self, initial_datetime: datetime, samples: ndarray | list | tuple): - """ - Appends a new sequence of samples in a separate Segment. - :param initial_datetime: The date and time of the first sample in 'samples'. - :param samples: The sequence of samples to add as a separate Segment. - :return: None - """ - assert len(self.__segments) > 0 - if self.__segments[-1].final_datetime > initial_datetime: # Check for order and overlaps - raise AssertionError("Cannot append more samples starting before the ones already existing.") - """ # FROM THESIS # - print("!! Cutting the begining of the new segment") - segment = Timeseries.__Segment(array(samples) if not isinstance(samples, ndarray) else samples, - initial_datetime, self.__sampling_frequency) - segment = segment[ceil((self.__segments[-1].final_datetime - initial_datetime).total_seconds() * self.sampling_frequency): ] - self.__segments.append(segment) - else: - """ - segment = Timeseries.__Segment(array(samples) if not isinstance(samples, ndarray) else samples, - initial_datetime, self.__sampling_frequency) - self.__segments.append(segment) - - # Check if equally segmented - if self.__is_equally_segmented and len(samples) != len(self.__segments[0]): - self.__is_equally_segmented = False - - def associate(self, events: Event | Collection[Event] | Dict[str, Event]): - """ - Associates an Event with the Timeseries. Events have names that serve as keys. If keys are given, - i.e. if 'events' is a dict, then the Event names are override. - :param events: One or multiple Event objects. - :return: None - """ - - def __add_event(event: Event): - try: - if event.has_onset and not event.has_offset: - self.__check_boundaries(event.onset) # raises IndexError - if event.has_offset and not event.has_onset: - self.__check_boundaries(event.offset) # raises IndexError - if event.has_onset and event.has_offset: - self.__check_boundaries(event.domain) - except IndexError: - raise ValueError( - f"Event '{event.name}' is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") - if event.name in self.__associated_events and event != self.__associated_events[event.name]: - raise NameError( - f"There is already another Event named with '{events.name}'. Cannot have two Events with the same name.") - else: - self.__associated_events[event.name] = event - - if isinstance(events, Event): - __add_event(events) - elif isinstance(events, dict): - for event_key in events: - event = events[event_key] - __add_event(Event(event_key, event._Event__onset, event._Event__offset)) # rename with given key - else: - for event in events: - __add_event(event) - - def disassociate(self, event_name: str): - """ - Dissociate the event named after the given name. - :param event_name: The name of the event to dissociate. - :return: None - :raise NameError: If there is no associated Event with the given name. - """ - if event_name in self.__associated_events: - del self.__associated_events[event_name] - else: - raise NameError(f"There's no Event '{event_name}' associated to this Timeseries.") - - def delete_events(self): - self.__associated_events = {} - - def tag(self, tags: str | tuple[str]): - """ - Mark the Timeseries with a tag. Useful to mark machine learning targets. - :param tags: The label or labels to tag the Timeseries. - :return: None - """ - if isinstance(tags, str): - self.__tags.add(tags) - elif isinstance(tags, tuple) and all(isinstance(x, str) for x in tags): - for x in tags: - self.__tags.add(x) - else: - raise TypeError("Give one or multiple string labels to tag the Timeseries.") - - # =================================== - # INTERNAL USAGE - Convert indexes <-> timepoints && Get Samples - - def __get_sample(self, datetime: datetime) -> float: - self.__check_boundaries(datetime) - for segment in self.__segments: # finding the first Segment - if datetime in segment: - return segment[int((datetime - segment.initial_datetime).total_seconds() * self.sampling_frequency)] - raise IndexError("Datetime given is in not defined in this Timeseries.") - - def __get_samples(self, initial_datetime: datetime, final_datetime: datetime) -> List[__Segment]: - '''Returns the samples between the given initial and end datetimes.''' - self.__check_boundaries(DateTimeRange(initial_datetime, final_datetime)) - res_segments = [] - for i in range(len(self.__segments)): # finding the first Segment - segment = self.__segments[i] - if segment.initial_datetime <= initial_datetime <= segment.final_datetime or segment.initial_datetime <= final_datetime <= segment.final_datetime: - if final_datetime <= segment.final_datetime: - trimmed_segment = segment[int(( - initial_datetime - segment.initial_datetime).total_seconds() * self.sampling_frequency):int( - (final_datetime - segment.initial_datetime).total_seconds() * self.sampling_frequency)] - res_segments.append(trimmed_segment) - return res_segments - else: - if not initial_datetime == segment.final_datetime: # skip what would be an empty set - trimmed_segment = segment[int((initial_datetime - segment.initial_datetime).total_seconds() * self.sampling_frequency):] - res_segments.append(trimmed_segment) - for j in range(i + 1, - len(self.__segments)): # adding the remaining samples, until the last Segment is found - segment = self.__segments[j] - if final_datetime <= segment.final_datetime: - trimmed_segment = segment[:int( - (final_datetime - segment.initial_datetime).total_seconds() * self.sampling_frequency)] - res_segments.append(trimmed_segment) - return res_segments - else: - trimmed_segment = segment[:] - res_segments.append(trimmed_segment) - - return res_segments - - def __check_boundaries(self, datetime_or_range: datetime | DateTimeRange) -> None: - intersects = False - if isinstance(datetime_or_range, datetime): - if datetime_or_range < self.initial_datetime: - raise IndexError(f"Datetime given, {datetime_or_range}, is outside of Timeseries domain, which starts at {self.initial_datetime}.") - if datetime_or_range > self.final_datetime: - raise IndexError(f"Datetime given, {datetime_or_range}, is outside of Timeseries domain, which precisely ends at {self.final_datetime}.") - - domain = self.domain - for i, subdomain in enumerate(domain): - if datetime_or_range in subdomain: # success case - intersects = True - break - if datetime_or_range < subdomain.start_datetime: # already passed - raise IndexError("Datetime given is outside of Timeseries domain. " - f"Timeseries is defined in [{domain[i-1].start_datetime}, {domain[i-1].end_datetime}[ " - f"and in [{subdomain.start_datetime}, {subdomain.end_datetime}[, " - f"but not at {datetime_or_range}.") - - elif isinstance(datetime_or_range, DateTimeRange): - if datetime_or_range.end_datetime < self.initial_datetime: - raise IndexError(f"Interval given, {datetime_or_range}, is outside of Timeseries domain, which starts at {self.initial_datetime}.") - if datetime_or_range.start_datetime >= self.final_datetime: - raise IndexError(f"Interval given, {datetime_or_range}, is outside of Timeseries domain, which precisely ends at {self.final_datetime}.") - - domain = self.domain - for i, subdomain in enumerate(domain): - if subdomain.is_intersection(datetime_or_range) and datetime_or_range.start_datetime != subdomain.end_datetime: # success case - intersects = True - break - if datetime_or_range.end_datetime < subdomain.start_datetime: # already passed - raise IndexError("Interval given is outside of Timeseries domain. " - f"Timeseries is defined in [{domain[i-1].start_datetime}, {domain[i-1].end_datetime}[ " - f"and in [{subdomain.start_datetime}, {subdomain.end_datetime}[, " - f"but not in {datetime_or_range}.") - - def _indices_to_timepoints(self, indices: Sequence[Sequence[int]] | Sequence[Sequence[Sequence[int]]], by_segment=False) -> Sequence[datetime] | Sequence[Sequence[datetime]] | Sequence[DateTimeRange] | Sequence[Sequence[DateTimeRange]]: - all_timepoints = [] - for index, segment in zip(indices, self.__segments): - timepoints = divide(index, self.__sampling_frequency) # Transform to seconds - if isinstance(timepoints, ndarray) and len(timepoints.shape) == 2 and timepoints.shape[1] == 2: # Intervals - x = [DateTimeRange(segment.initial_datetime + timedelta(seconds=tp[0]), segment.initial_datetime + timedelta(seconds=tp[1])) for tp in timepoints] - else: # Timepoints - x = [segment.initial_datetime + timedelta(seconds=tp) for tp in timepoints] - if by_segment: - all_timepoints.append(x) # Append as list - else: - all_timepoints += x # Join them all - return tuple(all_timepoints) - - def _to_array(self) -> ndarray: - """ - Converts Timeseries to NumPy ndarray, if it is equally segmented. - :return: MxN array, where M is the number of segments and N is their length. - :rtype: numpy.ndarray - """ - if not self.__is_equally_segmented: - raise AssertionError("Timeseries needs to be equally segmented to produce a matricial NumPy ndarray.") - return np.vstack([segment.samples for segment in self.__segments]) - - # =================================== - # INTERNAL USAGE - Plots - - def _plot_spectrum(self): - colors = ('blue', 'green', 'red') - n_columns = len(self.__segments) - for i in range(n_columns): - segment = self.__segments[i] - x, y = power_spectrum(signal=segment.samples) - plt.plot(x, y, alpha=0.6, linewidth=0.5, - label='From {0} to {1}'.format(segment.initial_datetime, segment.final_datetime)) - - def _plot(self, label:str = None): - xticks, xticks_labels = [], [] # to store the initial and final ticks of each Segment - SPACE = int(self.__sampling_frequency) * 2 # the empty space between each Segment - - for i in range(len(self.__segments)): - segment = self.__segments[i] - x, y = range(len(segment)), segment.samples - if i > 0: # except for the first Segment - x = array(x) + (xticks[-1] + SPACE) # shift right in time - plt.gca().axvspan(x[0] - SPACE, x[0], alpha=0.05, color='black') # add empty space in between Segments - plt.gca().plot(x, y, linewidth=0.5, alpha=0.7, label=label) - - xticks += [x[0], x[-1]] # add positions of the first and last samples of this Segment - - # add datetimes of the first and last samples of this Segment - if segment.duration > timedelta(days=1): # if greater that a day, include dates - time_format = "%d-%m-%Y %H:%M:%S" - else: # otherwise, just the time - time_format = "%H:%M:%S" - xticks_labels += [segment.initial_datetime.strftime(time_format), - segment.final_datetime.strftime(time_format)] - - plt.gca().set_xticks(xticks, xticks_labels) - plt.tick_params(axis='x', direction='in') - - if self.units is not None: # override ylabel - plt.gca().set_ylabel("Amplitude ({})".format(str(self.units))) - - # =================================== - # INTERNAL USAGE - Accept methods - - # General-purpose - - def _apply_operation(self, operation, **kwargs): - """ - Applies operation in-place to every Segment's samples. - """ - for segment in self.__segments: - segment._apply_operation(operation, **kwargs) - - def _apply_operation_and_return(self, operation, iterate_along_segments_key: [str] = None, **kwargs) -> list: - """ - Applies operation out-of-place to every Segment's samples and returns the ordered output of each in a list. - - Procedure 'operation' must receive a ndarray of samples as first argument. - It can receive other arguments, which should be passed in '**kwargs'. - Procedure output can return whatever, which shall be returned. - """ - res = [] - - if isinstance(iterate_along_segments_key, str): - items = kwargs[iterate_along_segments_key] - for segment, item in zip(self, items): - kwargs[iterate_along_segments_key] = item - new_segment = segment._apply_operation_and_return(operation, **kwargs) - res.append(new_segment) - elif isinstance(iterate_along_segments_key, list) and all(isinstance(x, str) for x in iterate_along_segments_key): - items = [kwargs[it] for it in iterate_along_segments_key] - for segment, item in zip(self, *items): - for it in iterate_along_segments_key: - kwargs[it] = item - new_segment = segment._apply_operation_and_return(operation, *items, **kwargs) - res.append(new_segment) - - else: - for segment in self.__segments: - res.append(segment._apply_operation_and_return(operation, **kwargs)) - return res - - # Purpose-specific - - def _accept_filtering(self, filter_design): - filter_design._setup(self.__sampling_frequency) - for segment in self.__segments: - segment._accept_filtering(filter_design) - - def _undo_filters(self): - for segment in self.__segments: - segment._restore_raw() - - def _resample(self, frequency: float): - frequency = frequency if isinstance(frequency, Frequency) else Frequency(frequency) - for segment in self.__segments: - segment._resample(frequency) - self.__sampling_frequency = frequency # The sf of all Segments points to this property in Timeseries. So, this is only changed here. - - # =================================== - # INTERNAL USAGE - Make similar copies or itself - - def __copy__(self): - """ Creates an exact copy of the Timeseries' contents and returns the new object. """ - new = type(self)([seg.__copy__() for seg in self.__segments], self.initial_datetime, - self.__sampling_frequency.__copy__(), self.__units, - str(self.name)) # Uses shortcut in __init__ - new._Timeseries__is_equally_segmented = self.__is_equally_segmented - new.associate(self.events) - new.tag(self.tags) - return new - - def _new_samples(self, samples_by_segment: List[ndarray] = None): - """ - Protected Access: For use of this module, since who uses is not aware of Segment. - - Creates a similar copy of the Timeseries' contents and returns the new object, with the samples of each segment changed. - - :return: A new Timeseries with the samples changed. All other fields shall remain the same. - :rtype: Timeseries | OverlappingTimeseries - """ - - assert len(samples_by_segment) == len(self.__segments) - - segments = [] - for segment, samples in zip(self, samples_by_segment): - segments.append(segment._new(samples=samples, raw_samples=samples, is_filtered=False)) - - return self.__new(segments=segments) - - def __new(self, segments: List[__Segment] = None, sampling_frequency: float = None, units: Unit = None, - name: str = None, equally_segmented: bool = None, overlapping_segments: bool = None, - events: Collection[Event] = None): - """ - Private Access: For in-class usage, since who uses is aware of Segment. - - Creates a similar copy of the Timeseries' contents and returns the new object. - The value of any field can be changed, when explicitly given a new value for it. All others will be copied. - - :param segments: A list of new Segments to substitute. Optional. - :param sampling_frequency: A different sampling frequency. Optional. - :param units: Different units. Optional. - :param name: A different name. Optional. - :param equally_segmented: Alter the is_equally_segmented state. Optional. - :param overlapping_segments: Opt to instantiate a Timeseries or an OverlappingTimeseries. Optional. - :param events: A collections of different Events. Optional. - - :return: A new Timeseries with the given fields changed. All other contents shall remain the same. - :rtype: Timeseries | biosignals.timeseries.OverlappingTimeseries.OverlappingTimeseries - """ - - initial_datetime = self.initial_datetime if segments is None else segments[0].initial_datetime - segments = self.__segments if segments is None else segments # Uses shortcut in __init__ - sampling_frequency = self.__sampling_frequency if sampling_frequency is None else sampling_frequency if isinstance( - sampling_frequency, - Frequency) else Frequency(sampling_frequency) - units = self.__units if units is None else units - name = str(self.__name) if name is None else name - equally_segmented = self.__is_equally_segmented if equally_segmented is None else equally_segmented - events = self.__associated_events if events is None else events - - if overlapping_segments is None: - new = type(self)(segments, initial_datetime, sampling_frequency, units, name) - elif overlapping_segments is True: - new = OverlappingTimeseries(segments, initial_datetime, sampling_frequency, units, name) - else: - new = Timeseries(segments, initial_datetime, sampling_frequency, units, name) - - new._Timeseries__is_equally_segmented = equally_segmented - - events = events.values() if isinstance(events, dict) else events - for event in events: - try: - new.associate(event) - except ValueError: - pass # it's outside the new boundaries - - new.tag(self.tags) - - return new - - def _new(self, segments_by_time: Dict[datetime, ndarray | list | tuple] = None, - sampling_frequency: float = None, - units: Unit = None, name: str = None, equally_segmented: bool = None, - overlapping_segments: bool = None, - events: Collection[Event] = None, rawsegments_by_time: Dict[datetime, ndarray | list | tuple] = None): - """ - Protected Access: For use of this module, since who uses is not aware of Segment. - - Creates a similar copy of the Timeseries' contents and returns the new object. - The value of any field can be changed, when explicitly given a new value for it. All others will be copied. - - :param segments_by_time: The sequence of samples to store as separate Segments, keyed by their initial date and time. Optional. - :param sampling_frequency: A different sampling frequency. Optional. - :param units: Different units. Optional. - :param name: A different name. Optional. - :param equally_segmented: Alter the is_equally_segmented state. Optional. - :param overlapping_segments: Opt to instantiate a Timeseries or an OverlappingTimeseries. Optional. - :param events: A collections of different Events. Optional. - :param rawsegments_by_time: The sequence of raw samples to associate to the Segments, keyed by their initial date and time. Optional. - - Note: If both 'segments_by_time' and 'rawsegments_by_time' are given, their key sets must be identical. - - :return: A new Timeseries with the given fields changed. All other contents shall remain the same. - :rtype: Timeseries | OverlappingTimeseries - """ - - # Sampling frequency - sampling_frequency = self.__sampling_frequency if sampling_frequency is None else sampling_frequency - - if segments_by_time is not None: - # Transform dict into Segments - segments = [] - for initial_datetime, samples in segments_by_time.items(): - seg = Timeseries.__Segment(samples, initial_datetime, sampling_frequency, - is_filtered=rawsegments_by_time is not None) - if rawsegments_by_time is not None: - seg._Segment__raw_samples = rawsegments_by_time[initial_datetime] - segments.append(seg) - else: - # Send nothing - segments = None - - return self.__new(segments=segments, sampling_frequency=sampling_frequency, units=units, name=name, - equally_segmented=equally_segmented, overlapping_segments=overlapping_segments, - events=events) - - def _apply_operation_and_new(self, operation, sampling_frequency: float = None, units: Unit = None, - name: str = None, equally_segmented: bool = None, - overlapping_segments: bool = None, - events: Collection[Event] = None, - iterate_over_each_segment_key: str = None, **kwargs): - """ - For outside usage. Who uses is not aware of Segment. - Creates new Segments from the existing ones, using Segment._new(). - - If there is one item in '**kwargs' that has input to be iteratively passed to 'method', - indicate its key in 'iterate_over_each_segment_key'. - """ - - # Sampling frequency - sampling_frequency = self.__sampling_frequency if sampling_frequency is None else sampling_frequency - - # Apply operation - all_new_segments = [] - if iterate_over_each_segment_key is not None: - items = kwargs[iterate_over_each_segment_key] - for segment, item in zip(self, items): - kwargs[iterate_over_each_segment_key] = item - new_segment = segment._apply_operation_and_new(operation, sampling_frequency=sampling_frequency, **kwargs) - all_new_segments.append(new_segment) - else: - for segment in self: - new_segment = segment._apply_operation_and_new(operation, sampling_frequency=sampling_frequency, **kwargs) - all_new_segments.append(new_segment) - - # Get new Timeseries - return self.__new(all_new_segments, sampling_frequency=sampling_frequency, units=units, name=name, - equally_segmented=equally_segmented, overlapping_segments=overlapping_segments, - events=events) - - def _equally_segment_and_new(self, window_length: timedelta, overlap_length: timedelta = timedelta(seconds=0)): - """ - For internal usage. - - Segments the Timseries in equal junks. If the Timeseries is already segmented, if concatenates all before re-segmenting. - - :param window_length: - :param overlap_length: - :return: - """ - - n_window_length = int(window_length.total_seconds() * self.sampling_frequency) - n_overlap_length = int(overlap_length.total_seconds() * self.sampling_frequency) - - res_trimmed_segments = [] - for segment in self.__segments: - res_trimmed_segments += segment._partition(n_window_length, n_overlap_length) - - return self.__new(segments=res_trimmed_segments, equally_segmented=True, overlapping_segments=n_overlap_length != 0) - - def _segment_and_new(self, method: Callable, - samples_rkey: str, indexes_rkey: str, - iterate_over_each_segment_key: str = None, - initial_datetimes_shift: timedelta = None, - equally_segmented: bool = True, - overlapping_segments: bool = False, - **kwargs): - """ - For internal usage. - - Segments the Timeseries into smaller portions, using any 'method' that follows the following signature. - - Procedure 'method' should receive as first argument the array of samples to partition. It can receive other - arguments after that, which should be passed in' **kwargs'. - If there is one item in '**kwargs' that has input to be iteratively passed to 'method', - indicate its key in 'iterate_over_each_segment_key'. - - Procedure 'method' should return a dictionary of objects, and at least two of them must be: - - The arrays of samples destined to be the smaller Segments. Indicate their key in the dict using 'samples_rkey'. - - The start indexes of each corresponding smaller Segment. Indicate their key in the dict using 'indexes_rkey'. - - If what 'indexes_rkey' contains are shifted initial indexes, indicate that offset in 'initial_datetimes_shift'. - - If procedures 'method' will return equally segmented partitions, pass equally_segmented=True. - If procedures 'method' will return overlapping partitions, pass overlapping_segments=True. - """ - - def __patition(segment: Timeseries.__Segment, indices=None) -> list: - """ - Indices should be an array of timepoint where to cut, if 'method' does not find them. - """ - load = kwargs - if indices is not None: - load[iterate_over_each_segment_key] = indices - res = method(segment.samples, **load) - _, raw_values = method(segment.raw_samples, **load) - assert len(res) >= 2 - assert samples_rkey in res.keys() - assert indexes_rkey in res.keys() - indexes, values = res[indexes_rkey], res[samples_rkey] - assert len(indexes) == len(values) - initial_datetimes = [timedelta(seconds=index / self.__sampling_frequency) + segment.initial_datetime for - index in indexes] - - if initial_datetimes_shift is not None: - initial_datetimes = [idt + initial_datetimes_shift for idt in initial_datetimes] - - trimmed_segments = [segment._new(samples=values[i], initial_datetime=initial_datetimes[i], - raw_samples=raw_values[i]) for i in range(len(values))] - - return trimmed_segments - - res_trimmed_segments = [] - if iterate_over_each_segment_key is not None: - for segment, indices in zip(self.__segments, kwargs[iterate_over_each_segment_key]): - res_trimmed_segments += __patition(segment, indices) - else: - for segment in self.__segments: - res_trimmed_segments += __patition(segment) - - return self.__new(segments=res_trimmed_segments, equally_segmented=equally_segmented, - overlapping_segments=overlapping_segments) - - # =================================== - # INTERNAL USAGE - Reshape - - def _concatenate_segments(self): - if len(self.__segments) > 1: - self.__segments = [Timeseries.__Segment(concatenate(self.samples), self.initial_datetime, self.__sampling_frequency, False), ] - self.__is_equally_segmented = True - else: - pass # no need - - def _partition(self, time_intervals:tuple[DateTimeRange]): - assert len(self.__segments) == 1 - samples = self.__segments[0] - partitions = [] - i = 0 - for x in time_intervals: - n_samples_required = ceil(x.timedelta.total_seconds() * self.__sampling_frequency) - if n_samples_required > len(samples): - samples = tile(samples, ceil(n_samples_required/len(samples))) # repeat - samples = samples[:n_samples_required] # cut where it is enough - partitions.append(Timeseries.__Segment(samples, x.start_datetime, self.__sampling_frequency)) - i = 0 - else: - f = i + n_samples_required - partitions.append(Timeseries.__Segment(samples[i: f], x.start_datetime, self.__sampling_frequency)) - i += f - - self.__segments = partitions - - def _delete_segments(self, selection_function: Callable[[ndarray], bool]): - self.__segments = list(filter(lambda seg: selection_function(seg.samples), self.__segments)) - - def _merge(self, time_intervals:tuple[DateTimeRange]): - res_segments = [] - begin_search = 0 - for t in time_intervals: - start, end = None, None - for i in range(begin_search, len(self.__segments)): - seg = self.__segments[i] - if seg.initial_datetime >= t.start_datetime: - if not start: - start = i - else: - pass - if seg.final_datetime > t.end_datetime: - if start: - end = i - to_merge = self.__segments[start: end + 1] - if len(to_merge) > 0: - res_segments.append(Timeseries.__Segment._merge(*to_merge)) - begin_search = end + 1 - break - else: - pass - - - - - # =================================== - # SERIALIZATION - - def _memory_map(self, path): - # Create a memory map for the array - for seg in self: - seg._memory_map(path) - - def __getstate__(self): - """ - Version 1: - 1: __name (str) - 2: __sampling_frequency (Frequency) - 3: __units (Unit) - 4: __is_equally_segmented (bool) - 5: segments_state (list) - - Version 2: - 1: __name (str) - 2: __sampling_frequency (Frequency) - 3: __units (Unit) - 4: __is_equally_segmented (bool) - 5: __tags (set) - 6: segments_state (list) - """ - segments_state = [segment.__getstate__() for segment in self.__segments] - return (self.__SERIALVERSION, self.__name, self.__sampling_frequency, self.__units, self.__is_equally_segmented, self.__tags, - segments_state) - - def __setstate__(self, state): - if state[0] == 1: - self.__name, self.__sampling_frequency, self.__units = state[1], state[2], state[3] - self.__is_equally_segmented = state[4] - self.__segments = [] - for segment_state in state[5]: - segment_state = list(segment_state) - segment_state.append(self.__sampling_frequency) - segment = object.__new__(Timeseries.__Segment) - segment.__setstate__(segment_state) - self.__segments.append(segment) - self.__associated_events = {} # empty; to be populated by Biosignal - self.__tags = set() # In version 1, tags were not a possibility, so none existed. - elif state[0] == 2: - self.__name, self.__sampling_frequency, self.__units = state[1], state[2], state[3] - self.__is_equally_segmented = state[4] - self.__segments = [] - for segment_state in state[6]: - segment_state = list(segment_state) - segment_state.append(self.__sampling_frequency) - segment = object.__new__(Timeseries.__Segment) - segment.__setstate__(segment_state) - self.__segments.append(segment) - self.__associated_events = {} # empty; to be populated by Biosignal - self.__tags = state[5] - else: - raise IOError(f'Version of {self.__class__.__name__} object not supported. Serialized version: {state[0]};' - f'Supported versions: 1 and 2.') - - -class OverlappingTimeseries(Timeseries): - """ - An OverlappingTimeseries is a Timeseries that violates the rule that to each time point of its domain it must - correspond one and only one sample. This special kind of Timeseries allows overlapping Segments, although it looses - all its interpretational meaning in the context of being successive data points in time. This kind is useful to - extract features from modalities or to train machine learning models. - - It inherits all properties of Timeseries and most of its behaviour. - In order to have overlapping Segments, indexing an exact timepoint is no longer possible; Although it is legal to - index slices. # FIXME - """ - - def __init__(self, samples: ndarray | list | tuple, initial_datetime: datetime, sampling_frequency: float, - units: Unit = None, name: str = None): - super().__init__(samples, initial_datetime, sampling_frequency, units, name) - - def append(self, initial_datetime: datetime, samples: ndarray | list | tuple): - assert len(self.__segments) > 0 - segment = Timeseries._Timeseries__Segment(array(samples) if not isinstance(samples, ndarray) else samples, - initial_datetime, self._Timeseries__sampling_frequency) - self._Timeseries__segments.append(segment) - - def _concatenate_segments(self): - raise NotImplementedError("") - - def __getitem__(self, item): - """ - The built-in slicing ([x:y]) operation. - A segment is assumed to belong to the interval [x,y] if and only if its final datetime belongs to the interval. - """ - - if isinstance(item, int): - return self._Timeseries__segments[item].samples - - if isinstance(item, datetime): - raise IndexError('OverlappingTimeseries cannot return a unique value correpondent of a datetime.') - - if isinstance(item, tuple) and all(isinstance(dt, datetime) for dt in item): - raise IndexError('OverlappingTimeseries cannot return a unique value correpondent of datetimes.') - - if isinstance(item, slice): - if item.step is not None: - raise IndexError("Indexing with step is not allowed for OverlappingTimeseries. Try resampling it first.") - initial = to_datetime(item.start) if isinstance(item.start, str) else self.initial_datetime if item.start is None else item.start - final = to_datetime(item.stop) if isinstance(item.stop, str) else self.final_datetime if item.stop is None else item.stop - #self.__check_boundaries(initial) - #self.__check_boundaries(final) - if isinstance(initial, datetime) and isinstance(final, datetime): - return self._Timeseries__new(segments=self.__get_samples(initial, final)) - else: - raise IndexError("Index types not supported. Give a slice of datetimes (can be in string format).") - - if isinstance(item, DateTimeRange): # Not publicly documented. Only Biosignal sends DateTimeRanges, when it is dealing with Events. - #self.__check_boundaries(item) - return self._Timeseries__new(segments=self.__get_samples(item.start_datetime, item.end_datetime)) - - raise IndexError( - "Index types not supported. Give a datetime (can be in string format), a slice of those.") - - def __get_samples(self, initial_datetime: datetime, final_datetime: datetime): - '''Returns the segemnts between the given initial and end datetimes, acording to the final datetime of each.''' - res_segments = [] - for i in range(len(self._Timeseries__segments)): - segment = self._Timeseries__segments[i] - if initial_datetime <= segment.final_datetime < final_datetime: # if the last timepoint of the Segment is inside the interval - res_segments.append(segment) # keep the whole Segment - elif initial_datetime <= segment.final_datetime == final_datetime == self.final_datetime: # if the last timepoint of the Segment is the end of the Timeseries - res_segments.append(segment) # keep that Segment as well - - return res_segments - - def __check_boundaries(self, datetime_or_range: datetime | DateTimeRange) -> None: - intersects = False - if isinstance(datetime_or_range, datetime): - for subdomain in self.domain: - if datetime_or_range in subdomain: - intersects = True - break - if not intersects: - raise IndexError( - f"Datetime given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") - - elif isinstance(datetime_or_range, DateTimeRange): - for subdomain in self.domain: - if subdomain.is_intersection(datetime_or_range) and datetime_or_range.start_datetime != subdomain.end_datetime: - intersects = True - break - if not intersects: - raise IndexError( - f"Interval given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") - - @property - def domain(self) -> Tuple[DateTimeRange]: - """The intervals of date and time in which the Timeseries is defined, i.e., samples were acquired.""" - domain = [DateTimeRange(self._Timeseries__segments[0].initial_datetime, self._Timeseries__segments[0].final_datetime)] - for i in range(1, len(self._Timeseries__segments)): - if self._Timeseries__segments[i].overlaps(self._Timeseries__segments[i-1]): - domain[-1].set_end_datetime(self._Timeseries__segments[i].final_datetime) - else: - domain.append(DateTimeRange(self._Timeseries__segments[i].initial_datetime, self._Timeseries__segments[i].final_datetime)) - - return tuple(domain) - - @property - def subdomains(self) -> Tuple[DateTimeRange]: - return tuple([DateTimeRange(segment.initial_datetime, segment.final_datetime) for segment in self]) - - def _block_subdomain(self, i) -> DateTimeRange: - return DateTimeRange(self._Timeseries__segments[i].initial_datetime, self._Timeseries__segments[i].final_datetime) - - @property - def duration(self) -> timedelta: - """Returns real time passed from start to end, without overlaps.""" - domain = self.domain - res = domain[0].timedelta - for i in range(1, len(domain)): - res += domain[i].timedelta - return res diff --git a/src/ltbio/biosignals/units.py b/src/ltbio/biosignals/units.py index d0a340e7..4d5eb097 100644 --- a/src/ltbio/biosignals/units.py +++ b/src/ltbio/biosignals/units.py @@ -171,3 +171,25 @@ def __init__(self, multiplier=Multiplier._): def convert_to(self, unit): pass +class Frequency(float): + + def __init__(self, value:float): + self.value = float(value) + + def __str__(self): + return str(self.value) + ' Hz' + + def __repr__(self): + return self.__str__() + + def __eq__(self, other): + if isinstance(other, float): + return other == self.value + elif isinstance(other, Frequency): + return other.value == self.value + + def __float__(self): + return self.value + + def __copy__(self): + return Frequency(self.value) diff --git a/src/ltbio/biosignals/units.pyi b/src/ltbio/biosignals/units.pyi new file mode 100644 index 00000000..aa502a1f --- /dev/null +++ b/src/ltbio/biosignals/units.pyi @@ -0,0 +1,106 @@ +# -*- encoding: utf-8 -*- + +# =================================== + +# IT - LongTermBiosignals + +# Package: biosignals +# Module: Unit +# Description: Defines relevant units for electrical and mechanical measures, and possible associated multipliers. + +# Contributors: João Saraiva +# Created: 22/04/2022 +# Last Updated: 22/07/2022 + +# =================================== + +from abc import ABC, abstractmethod +from enum import unique, Enum +from typing import Callable + +from numpy import array + + +@unique +class Multiplier(Enum): + """ Common multipliers used when describing orders of magnitude.""" + m: float + u: float + n: float + k: float + M: float + G: float + _: float + + +class Unit(ABC): + + SHORT: str + # Subclasses should override the conventional shorter version of writing a unit. Perhaps one-three letters. + + # INITIALIZER + def __init__(self, multiplier: Multiplier = Multiplier._) -> Unit: ... + + # BUILT-INS + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + def __eq__(self, other: Unit) -> bool: ... + + # GETTERS + @property + def multiplier(self) -> Multiplier: ... + @property + def prefix(self) -> str: ... + + # TRANSFER FUNCTION TO OTHER UNITS + @abstractmethod + def convert_to(self, unit: type) -> Callable[[array], array]: ... + + # SERIALIZATION + __SERIALVERSION: int = 1 + def __getstate__(self) -> tuple: ... + def __setstate__(self, state: tuple) -> None: ... + + +class Unitless(Unit): + SHORT = 'n.d.' + +class G(Unit): + SHORT = "G" + + +class Volt(Unit): + SHORT = "V" + + +class Siemens(Unit): + SHORT = "S" + + +class DegreeCelsius(Unit): + SHORT = "ºC" + + +class BeatsPerMinute(Unit): + SHORT = "bpm" + + +class Decibels(Unit): + SHORT = "dB" + + +class Grams(Unit): + SHORT = "g" + + +class Second(Unit): + SHORT = "s" + + +class Frequency(float): + def __init__(self, value: float) -> Frequency: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + def __eq__(self, other) -> bool: ... + def __float__(self) -> float: ... + def __copy__(self) -> Frequency: ... diff --git a/src/ltbio/clinical/Patient.py b/src/ltbio/clinical/Patient.py index c4a6ac76..843814db 100644 --- a/src/ltbio/clinical/Patient.py +++ b/src/ltbio/clinical/Patient.py @@ -83,6 +83,21 @@ def get_protected_info(self): return """ + @staticmethod + def generate_random_code(length: int = 4, letters: bool = False, numbers: bool = False) -> str | int: + """ + Generates a random code 4 figure alphanumeric code. + """ + import random + import string + if letters and numbers: + return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length)) + elif letters: + return ''.join(random.choices(string.ascii_uppercase, k=length)) + elif numbers: + return int(''.join(random.choices(string.digits, k=length))) + + def __getstate__(self): """ 1: code diff --git a/src/ltbio/clinical/conditions/Epilepsy.py b/src/ltbio/clinical/conditions/Epilepsy.py index ad19d517..697e3cd0 100644 --- a/src/ltbio/clinical/conditions/Epilepsy.py +++ b/src/ltbio/clinical/conditions/Epilepsy.py @@ -20,7 +20,7 @@ from .. import BodyLocation, Semiology from .MedicalCondition import MedicalCondition -from ...biosignals.timeseries.Event import Event +from ...biosignals._Event import Event @unique class SeizureOnset(Enum): diff --git a/src/ltbio/clinical/medications/Medication.py b/src/ltbio/clinical/medications/Medication.py index 7a0d536f..e67a7ec9 100644 --- a/src/ltbio/clinical/medications/Medication.py +++ b/src/ltbio/clinical/medications/Medication.py @@ -16,7 +16,7 @@ from abc import ABC, abstractmethod -from ltbio.biosignals.timeseries.Unit import Unit +from ltbio.biosignals.units import Unit class Medication(ABC): diff --git a/tests/biosignals/modalities/test_Biosignal.py b/tests/biosignals/Biosignal/test_Biosignal.py similarity index 96% rename from tests/biosignals/modalities/test_Biosignal.py rename to tests/biosignals/Biosignal/test_Biosignal.py index af29cfde..aafb3797 100644 --- a/tests/biosignals/modalities/test_Biosignal.py +++ b/tests/biosignals/Biosignal/test_Biosignal.py @@ -1,14 +1,13 @@ import unittest -from datetime import datetime, timedelta from os import remove -from ltbio.biosignals.timeseries.Unit import * -from ltbio.biosignals.modalities.Biosignal import * -from ltbio.biosignals.modalities.ECG import ECG -from ltbio.biosignals.modalities.EDA import EDA -from ltbio.biosignals.sources.HSM import HSM +from ltbio.biosignals.units import * +from ltbio.biosignals._Biosignal import * +from ltbio.biosignals.modalities._ECG import ECG +from ltbio.biosignals.modalities._EDA import EDA +from ltbio.biosignals.sources._HSM import HSM from ltbio.biosignals.timeseries.Frequency import Frequency -from ltbio.biosignals.timeseries.Timeseries import Timeseries +from ltbio.biosignals._Timeseries import Timeseries from ltbio.clinical.conditions.Epilepsy import Epilepsy from ltbio.clinical.BodyLocation import BodyLocation from ltbio.clinical.Patient import Patient, Sex @@ -229,8 +228,8 @@ def test_concatenate_channels_of_two_biosignals(cls): cls.assertEqual(ecg3["c"][initial2], cls.samples3[0]) cls.assertEqual(ecg3["b"][cls.initial1], cls.samples2[0]) cls.assertEqual(ecg3["d"][initial2], cls.samples1[0]) - cls.assertEqual(ecg3.initial_datetime, cls.initial1) - cls.assertEqual(ecg3.final_datetime, cls.ts3.final_datetime+timedelta(days=1)) + cls.assertEqual(ecg3.start, cls.initial1) + cls.assertEqual(ecg3.end, cls.ts3.end + timedelta(days=1)) # This should not work with cls.assertRaises(TypeError): # different types; e.g. ecg + eda diff --git a/tests/biosignals/Biosignal/test_builtins.py b/tests/biosignals/Biosignal/test_builtins.py new file mode 100644 index 00000000..70d8646d --- /dev/null +++ b/tests/biosignals/Biosignal/test_builtins.py @@ -0,0 +1,65 @@ +import unittest + +from ltbio.biosignals._Timeline import Timeline +from ...resources.biosignals import * + + +class BiosignalBuiltinsTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + # Single-channel + cls.alpha = get_biosignal_alpha() # contiguous + cls.beta = get_biosignal_beta() # discontiguous + # Multi-channel + cls.gamma = get_biosignal_gamma() # contiguous + cls.delta = get_biosignal_delta() # discontiguous + + + # BUILT-INS (Basic) + def test_len_on_single_channel(self): + # Contiguous + self.assertEqual(len(self.alpha), get_segment_length('small')) + # Discontiguous + self.assertEqual(len(self.beta), get_segment_length('medium')) + + def test_len_on_multi_channel(self): + # With different lengths (returns dict): + # Contiguous + res = {channel_name_a: get_segment_length('small'), + channel_name_b: get_segment_length('small') + get_segment_length('medium'), + channel_name_c: get_segment_length('small') + get_segment_length('medium') + get_segment_length('large')} + self.assertEqual(len(self.gamma), res) + # Discontiguous + res = {channel_name_a: get_segment_length('small') + get_segment_length('medium'), + channel_name_b: get_segment_length('small') + get_segment_length('medium') + get_segment_length('large')} + self.assertEqual(len(self.delta), res) + + # With same length (returns int): + x = NoModalityBiosignal({channel_name_a: get_timeseries('medium', 2, False, sf_low, units_volt), + channel_name_b: get_timeseries('medium', 3, False, sf_low, units_volt)}) + res = get_segment_length('medium') + self.assertEqual(len(x), res) + + def test_str(self): + res = self.gamma + self.assertIsInstance(res, str) # a string that + self.assertIn(get_biosignal_name(2), res) # contains the name + self.assertIn(NoModalityBiosignal.__name__, res) # the modality + self.assertIn(3, res) # and the number of channels + + def test_repr(self): + res = self.gamma + self.assertIsInstance(res, str) # a string that + self.assertIn(get_biosignal_name(2), res) # contains the name + self.assertIn(NoModalityBiosignal.__name__, res) # the modality + self.assertIn(3, res) # the number of channels + self.assertIn(source, res) # the source + self.assertIn(sf_high, res) # and the sampling frequency + + def test_iter(self): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Biosignal/test_get_properties.py b/tests/biosignals/Biosignal/test_get_properties.py new file mode 100644 index 00000000..ad9f1ca6 --- /dev/null +++ b/tests/biosignals/Biosignal/test_get_properties.py @@ -0,0 +1,90 @@ +import unittest + +from ltbio.biosignals._Timeline import Timeline +from ...resources.biosignals import * + + +class GetBiosignalPropertiesTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.alpha = get_biosignal_alpha() + cls.gamma = get_biosignal_gamma() + + def test_get_channel_names(self): + self.assertEqual(self.alpha.channel_names, (channel_name_a, )) + self.assertEqual(self.gamma.channel_names, (channel_name_a, channel_name_b, channel_name_c,)) + + def test_get_channels(self): + alpha_channels = self.alpha.get_channels() + beta_channels = self.gamma.get_channels() + for x in (alpha_channels, beta_channels): + self.assertIsInstance(x, tuple) + for ch in x: + self.assertIsInstance(ch, Timeseries) + self.assertEqual(len(alpha_channels), 1) + self.assertEqual(len(beta_channels), 3) + + def test_has_single_channel(self): + self.assertTrue(self.alpha.has_single_channel) + self.assertFalse(self.gamma.has_single_channel) + + def test_get_n_channels(self): + self.assertEqual(self.alpha.n_channels, 1) + self.assertEqual(self.gamma.n_channels, 3) + + def test_get_patient(self): + self.assertEqual(self.alpha.patient, patient_M) + self.assertEqual(self.gamma.patient, patient_F) + + def test_get_acquisition_location(self): + self.assertEqual(self.alpha.acquisition_location, location_C) + self.assertEqual(self.gamma.acquisition_location, location_W) + + def test_get_source(self): + self.assertEqual(self.alpha.source, source) + self.assertEqual(self.gamma.source, source) + + def test_get_name(self): + self.assertEqual(self.alpha.name, get_biosignal_name(1)) + self.assertEqual(self.gamma.name, get_biosignal_name(2)) + + def test_get_sampling_frequency(self): + self.assertEqual(self.alpha.sampling_frequency, sf_low) + self.assertEqual(self.gamma.sampling_frequency, sf_high) + + def test_get_sampling_frequency_when_different(self): + pass + + def test_get_units(self): + self.assertEqual(self.alpha.units, units_volt) + self.assertEqual(self.gamma.units, units_siemens) + + def test_get_units_when_different(self): + pass + + def test_get_start(self): + self.assertEqual(self.alpha.start, start_a) + self.assertEqual(self.gamma.start, start_a) + + def test_get_end(self): + self.assertEqual(self.alpha.end, get_timeseries_end('small', False, sf_low)) + self.assertEqual(self.gamma.end, get_timeseries_end('large', False, sf_high)) + + def test_get_duration(self): + self.assertEqual(self.alpha.duration, get_timeseries_duration('small', False, sf_low)) + self.assertEqual(self.gamma.duration, get_timeseries_duration('small', False, sf_low)) + + def test_get_domain(self): + alpha_domain = self.alpha.domain + beta_domain = self.gamma.domain + for x in (alpha_domain, beta_domain): + self.assertIsInstance(x, Timeline) + self.assertEqual(alpha_domain.group_names, self.alpha.channel_names) + self.assertEqual(beta_domain.group_names, self.gamma.channel_names) + self.assertEqual(alpha_domain.duration, self.alpha.duration) + self.assertEqual(beta_domain.duration, self.gamma.duration) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Biosignal/test_set_properties.py b/tests/biosignals/Biosignal/test_set_properties.py new file mode 100644 index 00000000..9fcc6e80 --- /dev/null +++ b/tests/biosignals/Biosignal/test_set_properties.py @@ -0,0 +1,71 @@ +import unittest + +from ltbio._core.exceptions import ChannelNotFoundError +from ...resources.biosignals import * + + +class SetBiosignalPropertiesTestCase(unittest.TestCase): + + def setUp(self): + self.alpha = get_biosignal_alpha() + + def test_set_name(self): + old_value = self.alpha.name + new_value = "New Name" + self.assertEqual(self.alpha.name, old_value) + self.alpha.name = new_value + self.assertEqual(self.alpha.name, new_value) + + def test_set_name_with_non_string_raises_error(self): + with self.assertRaises(ValueError): + self.alpha.name = 1 + + def test_set_patient(self): + old_value = self.alpha.patient + new_value = patient_F + self.assertEqual(self.alpha.patient, old_value) + self.alpha.patient = new_value + self.assertEqual(self.alpha.patient, new_value) + + def test_set_patient_with_non_Patient_raises_error(self): + with self.assertRaises(ValueError): + self.alpha.patient = "KSJ4" + + def test_set_acquisition_location(self): + old_value = self.alpha.acquisition_location + new_value = BodyLocation.FRONTAL_R + self.assertEqual(self.alpha.acquisition_location, old_value) + self.alpha.acquisition_location = new_value + self.assertEqual(self.alpha.acquisition_location, new_value) + + def test_set_acquisition_location_with_non_BodyLocation_raises_error(self): + with self.assertRaises(ValueError): + self.alpha.acquisition_location = "FRONTAL_R" + + def test_set_channel_name_with_string(self): + old_value = channel_name_a + new_value = channel_name_c + self.assertEqual(self.alpha.channel_names.pop(), old_value) + self.alpha.set_channel_name(old_value, new_value) + self.assertEqual(self.alpha.channel_names.pop(), new_value) + + def test_set_channel_name_with_BodyLocation(self): + old_value = channel_name_a + new_value = channel_name_b + self.assertEqual(self.alpha.channel_names.pop(), old_value) + self.alpha.set_channel_name(old_value, new_value) + self.assertEqual(self.alpha.channel_names.pop(), new_value) + + def test_set_channel_name_with_other_raises_error(self): + with self.assertRaises(ValueError): + self.alpha.set_channel_name(channel_name_a, 4) + + def test_set_unknown_channel_name_raises_error(self): + with self.assertRaises(ChannelNotFoundError): + self.alpha.set_channel_name(channel_name_d, channel_name_b) + + + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/dependencies/__init__.py b/tests/dependencies/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/dependencies/_import_from_source.py b/tests/dependencies/_import_from_source.py new file mode 100644 index 00000000..96f681de --- /dev/null +++ b/tests/dependencies/_import_from_source.py @@ -0,0 +1,156 @@ +import sys +from ast import parse, NodeVisitor, ImportFrom +from importlib import util as import_util, import_module +from importlib.machinery import ModuleSpec +from os import path +from pkgutil import iter_modules +from typing import Any, List, Iterator + +import pytest + + +def _is_test_module(module_name: str) -> bool: + components = module_name.split(".") + + return len(components) >= 2 and components[1] == "tests" + + +def _is_package(module_spec: ModuleSpec) -> bool: + return module_spec.origin is not None and module_spec.origin.endswith("__init__.py") + + +def _recurse_modules(module_name: str, ignore_tests: bool, packages_only: bool) -> Iterator[str]: + if ignore_tests and _is_test_module(module_name): + return + + module_spec = import_util.find_spec(module_name) + + if module_spec is not None and module_spec.origin is not None: + if not (packages_only and not _is_package(module_spec)): + yield module_name + + for child in iter_modules([path.dirname(module_spec.origin)]): + if child.ispkg: + yield from _recurse_modules( + f"{module_name}.{child.name}", + ignore_tests=ignore_tests, + packages_only=packages_only, + ) + elif not packages_only: + yield f"{module_name}.{child.name}" + + +class _ImportFromSourceChecker(NodeVisitor): + def __init__(self, module: str): + module_spec = import_util.find_spec(module) + is_pkg = ( + module_spec is not None and module_spec.origin is not None and module_spec.origin.endswith("__init__.py") + ) + + self._module = module if is_pkg else ".".join(module.split(".")[:-1]) + self._top_level_module = self._module.split(".")[0] + + def visit_ImportFrom(self, node: ImportFrom) -> Any: + # Check that there are no relative imports that attempt to read from a parent module. We've found that there + # generally is no good reason to have such imports. + if node.level >= 2: + raise ValueError( + f"Import in {self._module} attempts to import from parent module using relative import. Please " + f"switch to absolute import instead." + ) + + # Figure out which module to import in the case where this is a... + if node.level == 0: + # (1) absolute import where a submodule is specified + assert node.module is not None + module_to_import: str = node.module + elif node.module is None: + # (2) relative import where no module is specified (ie: "from . import foo") + module_to_import = self._module + else: + # (3) relative import where a submodule is specified (ie: "from .bar import foo") + module_to_import = f"{self._module}.{node.module}" + + # We're only looking at imports of objects defined inside this top-level package + if not module_to_import.startswith(self._top_level_module): + return + + # Actually import the module and iterate through all the objects potentially exported by it. + module = import_module(module_to_import) + for alias in node.names: + assert hasattr(module, alias.name) + attr = getattr(module, alias.name) + + # For some objects (pretty much everything except for classes and functions), we are not able to figure + # out which module they were defined in... in that case there's not much we can do here, since we cannot + # easily figure out where we *should* be importing this from in the first place. + if isinstance(attr, type) or callable(attr): + attribute_module = attr.__module__ + else: + continue + + # Figure out where we should be importing this class from, and assert that the *actual* import we found + # matches the place we *should* import from. + should_import_from = self._get_module_should_import(module_to_import=attribute_module) + assert module_to_import == should_import_from, ( + f"Imported {alias.name} from {module_to_import}, which is not the public module where this object " + f"is defined. Please import from {should_import_from} instead." + ) + + def _get_module_should_import(self, module_to_import: str) -> str: + """ + This function figures out the correct import path for "module_to_import" from the "self._module" module in + this instance. The trivial solution here would be to always just return "module_to_import", but we want + to actually take into account the fact that some submodules can be "private" (ie: start with an "_"), in + which case we should only import from them if self._module is internal to that private module. + """ + module_components = module_to_import.split(".") + result: List[str] = [] + + for component in module_components: + if component.startswith("_") and not self._module.startswith(".".join(result)): + break + result.append(component) + + return ".".join(result) + + +def _apply_visitor(module: str, visitor: NodeVisitor) -> None: + module_spec = import_util.find_spec(module) + assert module_spec is not None + assert module_spec.origin is not None + + with open(module_spec.origin, "r") as source_file: + ast = parse(source=source_file.read(), filename=module_spec.origin) + + visitor.visit(ast) + + +def _test_imports_from_source(module: str) -> None: + _apply_visitor(module=module, visitor=_ImportFromSourceChecker(module)) + + +def add_module_organization_tests(module_name: str) -> None: + """ + This function dynamically generates a set of python tests which can be used to ensure that all modules follow + the convention "classes and functions should always be imported from the module they are defined in (or the + closest public module to that)". + + Let's say that you have a package "foo", and want to use this function. In that case, go into your test module + (probably "foo.tests") and create a test file that imports and calls `add_module_organization_tests(__name__)`. + Once this is defined, you can use pytest to run your tests, and note that a unique test has been generated for + each file in your project. The tests will scan each file and look for cases of imports that do not follow the + convention above. If the test finds any violations, it will error out with a message similar to: + + AssertionError: Imported Child from objects, which is not the public module where this object is defined. Please + import from objects.child instead. + """ + module_root = module_name.split(".")[0] + setattr( + sys.modules[module_name], + "test_imports_from_source", + pytest.mark.parametrize( + "module", + list(_recurse_modules(module_root, ignore_tests=True, packages_only=False)), + )(_test_imports_from_source), + ) diff --git a/tests/dependencies/test_import_from_source.py b/tests/dependencies/test_import_from_source.py new file mode 100644 index 00000000..152dcd7f --- /dev/null +++ b/tests/dependencies/test_import_from_source.py @@ -0,0 +1,3 @@ +from ._import_from_source import add_module_organization_tests + +add_module_organization_tests(__name__) diff --git a/tests/features/test_FeatureSelector.py b/tests/features/test_FeatureSelector.py index f5b200b6..82f4cc26 100644 --- a/tests/features/test_FeatureSelector.py +++ b/tests/features/test_FeatureSelector.py @@ -5,7 +5,7 @@ from numpy import ndarray -from ltbio.biosignals.timeseries.Timeseries import Timeseries +from ltbio.biosignals._Timeseries import Timeseries from ltbio.features.FeatureSelector import FeatureSelector diff --git a/tests/pipeline/test_Packet.py b/tests/pipeline/test_Packet.py index b4ba5262..2457d85f 100644 --- a/tests/pipeline/test_Packet.py +++ b/tests/pipeline/test_Packet.py @@ -1,7 +1,7 @@ import unittest from datetime import datetime -from ltbio.biosignals.timeseries.Timeseries import Timeseries +from ltbio.biosignals._Timeseries import Timeseries from ltbio.pipeline.Packet import Packet diff --git a/tests/pipeline/test_Pipeline.py b/tests/pipeline/test_Pipeline.py index 8a0ccb9d..ecc2d8be 100644 --- a/tests/pipeline/test_Pipeline.py +++ b/tests/pipeline/test_Pipeline.py @@ -1,8 +1,8 @@ import unittest from datetime import timedelta, datetime -from ltbio.biosignals.modalities.ECG import ECG -from ltbio.biosignals.timeseries.Timeseries import Timeseries +from ltbio.biosignals.modalities._ECG import ECG +from ltbio.biosignals._Timeseries import Timeseries from ltbio.decision.DecisionMaker import DecisionMaker from ltbio.decision.NAryDecision import NAryDecision from ltbio.features.FeatureExtractor import FeatureExtractor diff --git a/tests/pipeline/test_PipelineUnitsUnion.py b/tests/pipeline/test_PipelineUnitsUnion.py index 85f67533..5622969b 100644 --- a/tests/pipeline/test_PipelineUnitsUnion.py +++ b/tests/pipeline/test_PipelineUnitsUnion.py @@ -1,7 +1,7 @@ import unittest from datetime import datetime, timedelta -from ltbio.biosignals.timeseries.Timeseries import Timeseries +from ltbio.biosignals._Timeseries import Timeseries from ltbio.features.FeatureExtractor import FeatureExtractor from ltbio.features.FeatureSelector import FeatureSelector from ltbio.features.Features import TimeFeatures diff --git a/tests/pipeline/test_SinglePipelineUnit.py b/tests/pipeline/test_SinglePipelineUnit.py index ab65f413..308859e4 100644 --- a/tests/pipeline/test_SinglePipelineUnit.py +++ b/tests/pipeline/test_SinglePipelineUnit.py @@ -1,7 +1,7 @@ import unittest from datetime import datetime, timedelta -from ltbio.biosignals.timeseries.Timeseries import Timeseries +from ltbio.biosignals._Timeseries import Timeseries from ltbio.decision.DecisionMaker import DecisionMaker from ltbio.decision.NAryDecision import NAryDecision from ltbio.features.FeatureExtractor import FeatureExtractor diff --git a/tests/processing/filters/test_FrequencyDomainFilter.py b/tests/processing/filters/test_FrequencyDomainFilter.py index d92b115f..99bf3ce3 100644 --- a/tests/processing/filters/test_FrequencyDomainFilter.py +++ b/tests/processing/filters/test_FrequencyDomainFilter.py @@ -4,8 +4,8 @@ from numpy import array, allclose -from ltbio.biosignals.modalities.ECG import ECG -from ltbio.biosignals.sources.HEM import HEM +from ltbio.biosignals.modalities._ECG import ECG +from ltbio.biosignals.sources._HEM import HEM from ltbio.processing.filters.FrequencyDomainFilter import FrequencyDomainFilter, FrequencyResponse, BandType diff --git a/tests/processing/formatters/test_Normalizer.py b/tests/processing/formatters/test_Normalizer.py index eeb326af..cb5a56ae 100644 --- a/tests/processing/formatters/test_Normalizer.py +++ b/tests/processing/formatters/test_Normalizer.py @@ -1,11 +1,10 @@ import unittest -from datetime import datetime, timedelta +from datetime import datetime from numpy import allclose -from ltbio.biosignals.modalities.ECG import ECG -from ltbio.biosignals.sources.MITDB import MITDB -from ltbio.biosignals.timeseries.Timeseries import OverlappingTimeseries +from ltbio.biosignals.modalities._ECG import ECG +from ltbio.biosignals.sources._MITDB import MITDB from ltbio.processing.formaters.Normalizer import Normalizer diff --git a/tests/processing/test_Segmenter.py b/tests/processing/test_Segmenter.py index e6df42ea..2b65199e 100644 --- a/tests/processing/test_Segmenter.py +++ b/tests/processing/test_Segmenter.py @@ -1,9 +1,9 @@ import unittest from datetime import datetime, timedelta -from ltbio.biosignals.modalities.ECG import ECG -from ltbio.biosignals.sources.MITDB import MITDB -from ltbio.biosignals.timeseries.Timeseries import OverlappingTimeseries +from ltbio.biosignals.modalities._ECG import ECG +from ltbio.biosignals.sources._MITDB import MITDB +from ltbio.biosignals._Timeseries import OverlappingTimeseries from ltbio.processing.formaters.Segmenter import Segmenter diff --git a/tests/resources/biosignals.py b/tests/resources/biosignals.py new file mode 100644 index 00000000..3aebac2a --- /dev/null +++ b/tests/resources/biosignals.py @@ -0,0 +1,127 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: tests.resources +# Module: biosignals +# Description: A set of Biosignals objects for testing purposes +# +# Contributors: João Saraiva +# Created: 17/05/2016 +# Last Updated: 19/05/2016 +# =================================== +from ltbio.biosignals import Biosignal +from ltbio.clinical import Patient, BodyLocation +from ltbio.clinical.Patient import Sex + +from .timeseries import * + + +# METADATA +# You can use these variables to assert the metadata of the Biosignal objects or to create personalized ones. + +# Sources +# There will be no sources, because these Biosignals are created ad-hoc. +# Tests with everything related to sources will be done separately. +source = None + +# Patients +patient_M = Patient(101, "João Miguel Areias Saraiva", 23, Sex.M) +patient_F = Patient("KS7M", "Maria de Lurdes Vale e Sousa", 73, Sex.F) + +# Acquisition Locations +location_C = BodyLocation.CHEST +location_W = BodyLocation.WRIST_L + +# Names +def get_biosignal_name(group: int) -> str: + return f"Test Biosignal of Group {group}" + +# Channel names +channel_name_a = "ch1" +channel_name_b = BodyLocation.V2 +channel_name_c = "ch3" +channel_name_d = BodyLocation.V4 + + +# Mock-up class representing no modality, because Biosignal is abstract +class NoModalityBiosignal(Biosignal): + ... + + +def get_biosignal(channels: tuple[tuple[str, int, bool, float, str]], patient, location): + """ + Use get_biosignal to get a new Biosignal object populated for testing purposes. + + :param channels: A tuple containing instructions on how to generate each channel. + Each value must be also a tuple with (length, group, discontiguous?, sf, units). + The channel names will be given in the following order: 'ch1', V2, 'ch30, V4. + + :param patient: A random patient: 'M' for male or 'F' for female + :param location: Acquisition location: 'chest' or 'wrist' + """ + + if patient == 'M': + patient = patient_M + if patient == 'F': + patient = patient_F + + if location == 'chest': + location = location_C + if location == 'wrist': + location = location_W + + name = get_biosignal_name(channels[0][1]) # use the group of the first channel + + channel_names = (channel_name_a, channel_name_b, channel_name_c, channel_name_d) + timeseries = {} + + for channel_name, instructions in zip(channel_names, channels): + length, group, discontiguous, sf, units = instructions + timeseries[channel_name] = get_timeseries(length, group, discontiguous, sf, units) + + return NoModalityBiosignal(timeseries, source, patient, location, name) + + +# CLASSIC EXAMPLES + +def get_biosignal_alpha(): + """ + 1 channel with group 1 small contiguous timeseries, 2 Hz, mV, associated with patient_M and location_C + """ + length, group = 'small', 1 + return NoModalityBiosignal({channel_name_a: get_timeseries(length, group, False, sf_low, units_volt)}, + source, patient_M, location_C, get_biosignal_name(1)) + + +def get_biosignal_beta(): + """ + 1 channel with group 1 discontiguous medium timeseries, 2 Hz, mV, associated with patient_M and location_C + """ + length, group = 'medium', 1 + return NoModalityBiosignal({channel_name_a: get_timeseries(length, group, True, sf_low, units_volt)}, + source, patient_M, location_C, get_biosignal_name(1)) + + +def get_biosignal_gamma(): + """ + 3 channels with group 2 variable length contiguous timeseries, 4 Hz, uS, associated with patient_F and location_W + """ + length, group = None, 2 + return NoModalityBiosignal({channel_name_a: get_timeseries('small', group, False, sf_high, units_siemens), + channel_name_b: get_timeseries('medium', group, False, sf_high, units_siemens), + channel_name_c: get_timeseries('large', group, False, sf_high, units_siemens), + }, + source, patient_F, location_W, get_biosignal_name(group)) + + +def get_biosignal_delta(): + """ + 2 channels with group 2 variable length discontiguous timeseries, 4 Hz, uS, associated with patient_F and location_W + """ + length, group = None, 2 + return NoModalityBiosignal({channel_name_a: get_timeseries('medium', group, True, sf_high, units_siemens), + channel_name_b: get_timeseries('large', group, True, sf_high, units_siemens), + }, + source, patient_F, location_W, get_biosignal_name(group)) + diff --git a/tests/resources/segments.py b/tests/resources/segments.py new file mode 100644 index 00000000..31834c23 --- /dev/null +++ b/tests/resources/segments.py @@ -0,0 +1,75 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: tests.resources +# Module: segments +# Description: Factory of Segment objects for testing purposes +# +# Contributors: João Saraiva +# Created: 17/05/2016 +# Last Updated: 19/05/2016 +# =================================== + +import numpy as np + +from ltbio.biosignals import Segment + + +# ARRAYS +# You can use this to assert the sample values according to length and group +small_samples_1 = np.array([506.0, 501.0, 497.0, 374.5, 383.4, 294.2]) +small_samples_2 = np.array([502.0, 505.0, 505.0, 924.3, 293.4, 383.5]) +small_samples_3 = np.array([527.0, 525.0, 525.0, 849.2, 519.5, 103.4]) +medium_samples_1 = np.array([686.4, 753.6, 845.9, 806.1, 247.7, 107.1, 598.2, 518.8, 502.5, 641.6, 582.9, 139.1]) +medium_samples_2 = np.array([412.3, 702.9, 731.2, 200.6, 517. , 428. , 298.9, 419.4, 289.5, 249.4, 880.1, 382.4]) +medium_samples_3 = np.array([678.9, 707.8, 144.4, 908.3, 723.2, 912.2, 789.5, 428.1, 919.8, 876. , 333.3, 709.1]) +large_samples_1 = np.array([ 49.5, 367. , 503.8, 111.5, 853.3, 503.1, 312. , 167.8, 417. , + 413.7, 449.7, 829.8, 306.6, 169.5, 774.4, 845. , 777.6, 605.6, + 208.9, 364.4, 364.5, 872.8, 704.1, 625.2]) +large_samples_2 = np.array([422.4, 989.6, 381. , 449.3, 231.6, 29.3, 753.9, 88. , 257.1, + 125.4, 666.5, 943.2, 900.4, 755.2, 857.1, 607.8, 97.8, 48. , + 86.2, 582.2, 317.1, 546.2, 97.5, 403.9]) +large_samples_3 = np.array([907.2, 787.4, 391.8, 505.4, 606. , 597.1, 957.9, 713.7, 957.7, + 151. , 725.3, 163.6, 882.9, 933.2, 3.9, 754.4, 892.5, 36.9, + 880.6, 139.6, 305.9, 508. , 618.6, 235.7]) + + +def get_segment_length(length: str) -> int: + if length == 'small': + return 6 + if length == 'medium': + return 12 + if length == 'large': + return 24 + + +def get_segment(length: str, group: int) -> Segment: + """ + Use get_segment to get a new Segment object with samples for testing purposes. + Samples were generated randomly with amplitude values between 0 and 1000. + + :param length: Length of the segment: 'small' = 6, 'medium' = 12, 'large' = 24. + :param group: Group of examples: 1, 2 or 3. + """ + if length == 'small': + if group == 1: + return Segment(small_samples_1) + if group == 2: + return Segment(small_samples_2) + if group == 3: + return Segment(small_samples_3) + if length == 'medium': + if group == 1: + return Segment(medium_samples_1) + if group == 2: + return Segment(medium_samples_2) + if group == 3: + return Segment(medium_samples_3) + if length == 'large': + if group == 1: + return Segment(large_samples_1) + if group == 2: + return Segment(large_samples_2) + if group == 3: + return Segment(large_samples_3) diff --git a/tests/resources/timeseries.py b/tests/resources/timeseries.py new file mode 100644 index 00000000..241d9f00 --- /dev/null +++ b/tests/resources/timeseries.py @@ -0,0 +1,105 @@ +# -- encoding: utf-8 -- +# =================================== +# ScientISST LTBio | Long-Term Biosignals +# +# Package: tests.resources +# Module: timeseries +# Description: A set of Timeseries objects for testing purposes +# +# Contributors: João Saraiva +# Created: 17/05/2016 +# Last Updated: 19/05/2016 +# =================================== +from datetime import datetime, timedelta + +from ltbio.biosignals.units import * +from ltbio.biosignals import Timeseries +from .segments import * + + +# METADATA +# You can use these variables to assert the metadata of the Timeseries objects. + +# Sampling frequencies +sf_low = 2. +sf_high = 4. + +# Units +units_volt = Volt(Multiplier.m) +units_siemens = Siemens(Multiplier.u) + +# Start timepoints +start_a = datetime(2000, 1, 1, 0, 0, 0) +start_b = datetime(2000, 1, 1, 0, 10, 0) +start_c = datetime(2000, 1, 1, 1, 0, 0) +start_d = datetime(2000, 1, 2, 0, 0, 0) + +# End timepoints +def get_timeseries_end(length: str, discontiguous: bool, sf: str) -> datetime: + if sf == 'low': + sf = sf_low + if sf == 'high': + sf = sf_high + + if not discontiguous: + return start_a + timedelta(seconds=get_segment_length(length)/sf) + else: + if length == 'medium': + return start_b + timedelta(seconds=get_segment_length('medium')/sf) + elif length == 'large': + return start_c + timedelta(seconds=get_segment_length('large') / sf) + +def get_timeseries_duration(length: str, discontiguous: bool, sf: str) -> timedelta: + if not discontiguous: + if sf == 'low': + sf = sf_low + if sf == 'high': + sf = sf_high + return timedelta(seconds=get_segment_length(length)/sf) + else: + if length == 'medium': + return get_timeseries_duration('small', False, sf) + timedelta(seconds=get_segment_length('medium')/sf) + elif length == 'large': + return get_timeseries_duration('medium', True, sf) + timedelta(seconds=get_segment_length('large')/sf) + +# Name +def get_timeseries_name(group: int) -> str: + return f"Test Timeseries of Group {group}" + + +def get_timeseries(length: str, group: int, discontiguous: bool, sf: str, units: str): + """ + Use get_timeseries to get a new Timeseries object populated for testing purposes. + If contiguous, it starts at 'start_a'. + If discontiguous, the first segment starts at 'start_a', the second at 'start_b', and the third at 'start_c'. + + :param length: Length of the segments; and the number of samples, if discontiguous. + :param group: Group of examples: 1, 2 or 3. + :param discontiguous: Whether the Timeseries should be discontiguous. + :param sf: Sampling frequency: 'low' or 'high'. + :param units: 'volt' or 'siemens'. + """ + if sf == 'low': + sf = sf_low + if sf == 'high': + sf = sf_high + + if units == 'volt': + units = units_volt.__class__(units_volt.multiplier) + else: + units = units_siemens.__class__(units_siemens.multiplier) + + name = get_timeseries_name(group) + + if not discontiguous: + return Timeseries({start_a: get_segment(length, group)}, sf, units, name) + else: + if length == 'medium': + return Timeseries({start_a: get_segment('small', group), + start_b: get_segment('medium', group),}, sf, units, name) + elif length == 'large': + return Timeseries({start_a: get_segment('small', group), + start_b: get_segment('medium', group), + start_c: get_segment('large', group)}, sf, units, name) + + From be1e15db442f94deea1a801141ee50e3d43860f7 Mon Sep 17 00:00:00 2001 From: saraiva Date: Wed, 30 Aug 2023 10:38:46 +0200 Subject: [PATCH 26/47] More tests --- src/ltbio/biosignals/_Segment.py | 99 +++++++++++----- src/ltbio/biosignals/_Segment.pyi | 20 ++-- src/ltbio/biosignals/_Timeseries.py | 8 +- src/ltbio/biosignals/__init__.py | 2 +- .../Biosignal/test_get_properties.py | 80 +++++++++---- tests/biosignals/Segment/test_arithmetics.py | 102 ++++++++++++++++ tests/biosignals/Segment/test_builtins.py | 59 ++++++++++ .../biosignals/Segment/test_get_properties.py | 22 ++++ tests/biosignals/Segment/test_indexing.py | 63 ++++++++++ tests/biosignals/Segment/test_initializers.py | 24 ++++ tests/biosignals/Segment/test_joining.py | 37 ++++++ tests/biosignals/Segment/test_logics.py | 71 ++++++++++++ tests/biosignals/Segment/test_processing.py | 36 ++++++ .../biosignals/Segment/test_serialization.py | 52 +++++++++ .../biosignals/Segment/test_set_properties.py | 21 ++++ .../timeseries/test_TimeseriesSegment.py | 109 ------------------ tests/resources/biosignals.py | 86 +++++++------- tests/resources/timeseries.py | 8 +- 18 files changed, 672 insertions(+), 227 deletions(-) create mode 100644 tests/biosignals/Segment/test_arithmetics.py create mode 100644 tests/biosignals/Segment/test_builtins.py create mode 100644 tests/biosignals/Segment/test_get_properties.py create mode 100644 tests/biosignals/Segment/test_indexing.py create mode 100644 tests/biosignals/Segment/test_initializers.py create mode 100644 tests/biosignals/Segment/test_joining.py create mode 100644 tests/biosignals/Segment/test_logics.py create mode 100644 tests/biosignals/Segment/test_processing.py create mode 100644 tests/biosignals/Segment/test_serialization.py create mode 100644 tests/biosignals/Segment/test_set_properties.py delete mode 100644 tests/biosignals/timeseries/test_TimeseriesSegment.py diff --git a/src/ltbio/biosignals/_Segment.py b/src/ltbio/biosignals/_Segment.py index 936c73b6..7eb4fd17 100644 --- a/src/ltbio/biosignals/_Segment.py +++ b/src/ltbio/biosignals/_Segment.py @@ -16,9 +16,10 @@ # =================================== from datetime import datetime, timedelta +from os import remove from os.path import join from tempfile import mkstemp -from typing import Callable, Sequence, Any +from typing import Callable, Sequence, Any, Union import numpy as np from multimethod import multimethod @@ -53,20 +54,25 @@ def __init__(self, samples: ndarray | Sequence[float]): self.__samples = np.array(samples, dtype=float) # =================================== - # Properties (Getters) + # BUILT-INS (Basics) + def __copy__(self): + return Segment(self.__samples) - @property - def samples(self) -> ndarray: - return self.__samples.view() + def __str__(self) -> str: + return f"Segment ({len(self)})" - # =================================== - # Built-ins (Basics) + def __repr__(self) -> str: + return str(self) - def __len__(self): + def __len__(self) -> int: return len(self.__samples) - def __copy__(self): - return Segment(self.__samples.copy()) + # =================================== + # Properties (Getters) + + @property + def samples(self) -> ndarray: + return self.__samples.view() # =================================== # Built-ins (Joining Segments) @@ -129,14 +135,14 @@ def __sub__(self, other: float): return self._unary_operation(self, (lambda x: x - other)) @multimethod - def __mul__(self, other: 'Segment'): + def __mul__(self: 'Segment', other: 'Segment'): """Multiplies two Segments, sample by sample.""" return self._binary_operation((lambda x, y: x * y), self, other) @multimethod - def __mul__(self, other: float): + def __mul__(self: 'Segment', other: Union[int, float]): """Multiplies the Segment by a constant (contraction).""" - return self._unary_operation(self, (lambda x: x * other)) + return Segment(self.samples * other) @multimethod def __truediv__(self, other: 'Segment'): @@ -164,7 +170,16 @@ def __getitem__(self, index: int | slice | tuple): """ The built-in slicing and indexing (segment[x:y]) operations. """ - return self.__samples[index] + try: + if isinstance(index, tuple): + return tuple([self[ix] for ix in index]) + elif isinstance(index, slice | int): + res = self.__samples[index] + return Segment(res) if isinstance(res, np.ndarray) else res + else: + raise TypeError(f"Indexing type {type(index)} not supported.") + except IndexError as e: + raise IndexError(f"Index {index} out of bounds for Segment of length {len(self)}") def __iter__(self) -> iter: return iter(self.__samples) @@ -181,11 +196,21 @@ def min(self): # =================================== # Binary Logic - def __eq__(self, other): - return self.__samples == other.samples + @multimethod + def __eq__(self, other: 'Segment') -> bool: + return all(self.__samples == other.samples) + + @multimethod + def __eq__(self, other: Union[int, float]) -> bool: + return all(self.__samples == other) - def __ne__(self, other): - return self.__samples != other.samples + @multimethod + def __ne__(self, other: 'Segment') -> bool: + return all(self.__samples != other.samples) + + @multimethod + def __ne__(self, other: Union[int, float]) -> bool: + return all(self.__samples != other) # =================================== # PROCESSING @@ -210,30 +235,40 @@ def apply_and_return(self, operation: Callable, **kwargs) -> Any: # =================================== # SERIALIZATION + @property + def __is_memory_mapped(self): + return isinstance(self.__samples, memmap) + + @property + def __is_temp_memory_mapped(self): + return hasattr(self, '_Segment__memory_map') + def _memory_map(self, path): - if not isinstance(self.__samples, memmap): # Create a memory map for the array + if not self.__is_memory_mapped and not self.__is_temp_memory_mapped: # Create a memory map for the array _, file_name = mkstemp(dir=path, suffix='.segment') filepath = join(path, file_name) self.__memory_map = memmap(filepath, dtype='float32', mode='r+', shape=self.__samples.shape) self.__memory_map[:] = self.__samples[:] self.__memory_map.flush() # release memory in RAM; don't know if this is actually helping - def __hash__(self): - return hash(self.__initial_datetime) * hash(self.__final_datetime) * hash(self.__samples) + def __del__(self): + if self.__is_temp_memory_mapped: + self.__memory_map._mmap.close() + remove(self.__memory_map.filename) - __SERIALVERSION: int = 2 + __SERIALVERSION: int = 3 def __getstate__(self): """ - 1: __initial_datetime (datetime) - 2: __samples (ndarray) + Version 3: + 1: __samples (ndarray) """ - if isinstance(self.__samples, memmap): # Case: has been saved as .biosignal before - return (Segment._Segment__SERIALVERSION, self.__initial_datetime, self.__samples) - elif hasattr(self, '_Segment__memory_map'): # Case: being saved as .biosignal for the first time - return (Segment._Segment__SERIALVERSION, self.__initial_datetime, self.__memory_map) + if self.__is_memory_mapped: # Case: has been saved as .biosignal before + return (Segment._Segment__SERIALVERSION, self.__samples) + elif self.__is_temp_memory_mapped: # Case: being saved as .biosignal for the first time + return (Segment._Segment__SERIALVERSION, self.__memory_map) else: # Case: being called by deepcopy - return (Segment._Segment__SERIALVERSION, self.__initial_datetime, self.__samples) + return (Segment._Segment__SERIALVERSION, self.__samples) def __setstate__(self, state): """ @@ -241,12 +276,16 @@ def __setstate__(self, state): 1: __initial_datetime (datetime) 2: __samples (ndarray) 3: __sampling_frequency (Frequency) + Version 3: + 1: __samples (ndarray) """ if state[0] == 1 or state[0] == 2: - self.__initial_datetime, self.__samples, self.__sampling_frequency = state[1], state[2], state[3] + self.__samples = state[1] self.__final_datetime = self.initial_datetime + timedelta(seconds=len(self.__samples) / self.__sampling_frequency) self.__is_filtered = False self.__raw_samples = self.__samples + elif state[0] == 3: + self.__samples = state[1] else: raise IOError( f'Version of Segment object not supported. Serialized version: {state[0]}; ' diff --git a/src/ltbio/biosignals/_Segment.pyi b/src/ltbio/biosignals/_Segment.pyi index 7a5dcac5..c9560f18 100644 --- a/src/ltbio/biosignals/_Segment.pyi +++ b/src/ltbio/biosignals/_Segment.pyi @@ -16,7 +16,7 @@ # =================================== from datetime import datetime -from typing import Sequence +from typing import Sequence, Union from multimethod import multimethod from numpy import ndarray @@ -56,10 +56,10 @@ class Segment(): def __sub__(self, other: float) -> 'Segment': ... @multimethod - def __mul__(self, other: 'Segment') -> 'Segment': ... + def __mul__(self: 'Segment', other: 'Segment') -> 'Segment': ... @multimethod - def __mul__(self, other: float) -> 'Segment': ... + def __mul__(self: 'Segment', other: Union[int, float]) -> 'Segment': ... @multimethod def __truediv__(self, other: 'Segment') -> 'Segment': ... @@ -75,16 +75,21 @@ class Segment(): # BUILT-INS (Indexing) - def __getitem__(self, index: int) -> float | Segment: ... + def __getitem__(self, index: int | slice | tuple) -> float | Segment: ... def __iter__(self) -> iter: ... # BUILT-INS (Binary Logic) + @multimethod def __eq__(self, other: Segment) -> bool: ... + + @multimethod + def __eq__(self, other: Union[int, float]) -> bool: ... + + @multimethod def __ne__(self, other: Segment) -> bool: ... - # OTHER LOGIC WITH TIME - def overlaps(self, other) -> bool: ... - def adjacent(self, other) -> bool: ... + @multimethod + def __ne__(self, other: Union[int, float]) -> bool: ... # SHORTCUT STATISTICS def max(self) -> float: ... @@ -99,6 +104,5 @@ class Segment(): def diff(self) -> Segment: ... # SERIALIZATION - __SERIALVERSION: int = 2 def __getstate__(self) -> tuple: ... def __setstate__(self, state: tuple) -> None: ... diff --git a/src/ltbio/biosignals/_Timeseries.py b/src/ltbio/biosignals/_Timeseries.py index 852c2fd4..308d05cd 100644 --- a/src/ltbio/biosignals/_Timeseries.py +++ b/src/ltbio/biosignals/_Timeseries.py @@ -668,12 +668,12 @@ def _plot(self, label:str = None): def apply(self, operator: Operator, inplace: bool = True, **kwargs): ... - @multimethod(Operation) - def undo(self, operation) -> None: + @multimethod + def undo(self, operation: Operation) -> None: ... - @multimethod(int) - def undo(self, operation) -> None: + @multimethod + def undo(self, operation: int) -> None: ... def _apply_operation(self, operation, **kwargs): diff --git a/src/ltbio/biosignals/__init__.py b/src/ltbio/biosignals/__init__.py index 565804ae..4c3cc5ea 100644 --- a/src/ltbio/biosignals/__init__.py +++ b/src/ltbio/biosignals/__init__.py @@ -6,7 +6,7 @@ from ._Event import Event from ._Timeline import Timeline from ._Timeseries import Timeseries - +from ._Segment import Segment def plot_comparison(*biosignals: Biosignal, show: bool = True, save_to: str = None): diff --git a/tests/biosignals/Biosignal/test_get_properties.py b/tests/biosignals/Biosignal/test_get_properties.py index ad9f1ca6..b024022c 100644 --- a/tests/biosignals/Biosignal/test_get_properties.py +++ b/tests/biosignals/Biosignal/test_get_properties.py @@ -8,8 +8,15 @@ class GetBiosignalPropertiesTestCase(unittest.TestCase): @classmethod def setUpClass(cls): - cls.alpha = get_biosignal_alpha() - cls.gamma = get_biosignal_gamma() + # Contiguous + cls.alpha = get_biosignal_alpha() # single channel + cls.gamma = get_biosignal_gamma() # multi channel + # Discontiguous + cls.beta = get_biosignal_beta() # single channel + cls.delta = get_biosignal_delta() # multi channel + + ############################ + # Channels and Channel Names def test_get_channel_names(self): self.assertEqual(self.alpha.channel_names, (channel_name_a, )) @@ -33,6 +40,13 @@ def test_get_n_channels(self): self.assertEqual(self.alpha.n_channels, 1) self.assertEqual(self.gamma.n_channels, 3) + ############################ + # Associated metadata + + def test_get_name(self): + self.assertEqual(self.alpha.name, get_biosignal_name(1)) + self.assertEqual(self.gamma.name, get_biosignal_name(2)) + def test_get_patient(self): self.assertEqual(self.alpha.patient, patient_M) self.assertEqual(self.gamma.patient, patient_F) @@ -45,45 +59,61 @@ def test_get_source(self): self.assertEqual(self.alpha.source, source) self.assertEqual(self.gamma.source, source) - def test_get_name(self): - self.assertEqual(self.alpha.name, get_biosignal_name(1)) - self.assertEqual(self.gamma.name, get_biosignal_name(2)) - def test_get_sampling_frequency(self): - self.assertEqual(self.alpha.sampling_frequency, sf_low) - self.assertEqual(self.gamma.sampling_frequency, sf_high) + self.assertEqual(self.alpha.sampling_frequency, sf_low) # single channel + self.assertEqual(self.gamma.sampling_frequency, sf_high) # multi channel def test_get_sampling_frequency_when_different(self): - pass + # one low, one high + x = get_biosignal(('small', 1, False, 'low', 'volt'), ('small', 1, False, 'high', 'volt'), + patient=patient_M, location=location_C) + self.assertEqual(x.sampling_frequency, {channel_name_a: sf_low, channel_name_b: sf_high}) def test_get_units(self): - self.assertEqual(self.alpha.units, units_volt) - self.assertEqual(self.gamma.units, units_siemens) + self.assertEqual(self.alpha.units, units_volt) # single channel + self.assertEqual(self.gamma.units, units_siemens) # multi channel def test_get_units_when_different(self): - pass + # one mV, uS + x = get_biosignal(('small', 1, False, 'low', 'volt'), ('small', 1, False, 'low', 'siemens'), + patient=patient_M, location=location_C) + self.assertEqual(x.units, {channel_name_a: units_volt, channel_name_b: units_siemens}) + + ############################ + # Time-related properties def test_get_start(self): self.assertEqual(self.alpha.start, start_a) self.assertEqual(self.gamma.start, start_a) - def test_get_end(self): - self.assertEqual(self.alpha.end, get_timeseries_end('small', False, sf_low)) - self.assertEqual(self.gamma.end, get_timeseries_end('large', False, sf_high)) + def test_get_end_contiguous_biosignals(self): + self.assertEqual(self.alpha.end, get_timeseries_end('small', False, 'low')) + self.assertEqual(self.gamma.end, get_timeseries_end('large', False, 'high')) - def test_get_duration(self): - self.assertEqual(self.alpha.duration, get_timeseries_duration('small', False, sf_low)) - self.assertEqual(self.gamma.duration, get_timeseries_duration('small', False, sf_low)) + def test_get_end_discontiguous_biosignals(self): + self.assertEqual(self.beta.end, get_timeseries_end('medium', True, 'low')) + self.assertEqual(self.delta.end, get_timeseries_end('large', True, 'high')) - def test_get_domain(self): - alpha_domain = self.alpha.domain - beta_domain = self.gamma.domain - for x in (alpha_domain, beta_domain): + def test_get_duration_contiguous_biosignals(self): + self.assertEqual(self.alpha.duration, get_timeseries_duration('small', False, 'low')) + self.assertEqual(self.gamma.duration, get_timeseries_duration('large', False, 'high')) + + def test_get_duration_discontiguous_biosignals(self): + self.assertEqual(self.beta.duration, get_timeseries_duration('medium', True, 'low')) + self.assertEqual(self.delta.duration, get_timeseries_duration('large', True, 'high')) + + def test_get_domain_contiguous_biosignals(self): + alpha_domain = self.alpha.domain # single channel + gamma_domain = self.gamma.domain # multi channel + for x in (alpha_domain, gamma_domain): self.assertIsInstance(x, Timeline) + # Group Names == Channel Names self.assertEqual(alpha_domain.group_names, self.alpha.channel_names) - self.assertEqual(beta_domain.group_names, self.gamma.channel_names) - self.assertEqual(alpha_domain.duration, self.alpha.duration) - self.assertEqual(beta_domain.duration, self.gamma.duration) + self.assertEqual(gamma_domain.group_names, self.gamma.channel_names) + # Intervals + for group in alpha_domain.groups: + #self.assertEqual(alpha_domain.intervals, (Interval(start_a, get_timeseries_end('small', False, 'low')),)) + if __name__ == '__main__': diff --git a/tests/biosignals/Segment/test_arithmetics.py b/tests/biosignals/Segment/test_arithmetics.py new file mode 100644 index 00000000..d9cb37c4 --- /dev/null +++ b/tests/biosignals/Segment/test_arithmetics.py @@ -0,0 +1,102 @@ +import unittest + +from numpy import allclose + +from ltbio._core.exceptions import DifferentLengthsError +from ltbio.biosignals import Segment +from resources.segments import get_segment, get_segment_length, small_samples_1, small_samples_2 + + +class SegmentArithmeticsTestCase(unittest.TestCase): + + LENGTH = 'small' + + def _assert_arithmetic_operation(self, operation, a, b, a_content, b_content): + expected_sum = operation(a_content, b_content) + # Out of place + result = operation(a, b) + self.assertIsInstance(result, Segment) + self.assertEqual(len(result), get_segment_length(self.LENGTH)) + self.assertTrue(allclose(result.samples, expected_sum)) + # In place + a += b + self.assertEqual(len(a), get_segment_length(self.LENGTH)) + self.assertTrue(allclose(a.samples, expected_sum)) + + def test_add_two_segments(self): + for operation in (Segment.__add__, Segment.__iadd__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2) + + def test_add_segment_and_number(self): + for operation in (Segment.__add__, Segment.__iadd__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30) + + def test_sub_two_segments(self): + for operation in (Segment.__sub__, Segment.__isub__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2) + def test_sub_segment_and_number(self): + for operation in (Segment.__sub__, Segment.__isub__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30) + + def test_mul_two_segments(self): + for operation in (Segment.__mul__, Segment.__imul__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2) + + def test_mul_segment_and_number(self): + for operation in (Segment.__mul__, Segment.__imul__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30) + + def test_truediv_two_segments(self): + for operation in (Segment.__truediv__, Segment.__itruediv__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2) + + def test_truediv_segment_and_number(self): + for operation in (Segment.__truediv__, Segment.__itruediv__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30) + + def test_floordiv_two_segments(self): + for operation in (Segment.__floordiv__, Segment.__ifloordiv__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2) + + def test_floordiv_segment_and_number(self): + for operation in (Segment.__floordiv__, Segment.__ifloordiv__): + self._assert_arithmetic_operation(operation, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30) + + def test_arithmetics_with_invalid_types(self): + for operation in (Segment.__add__, Segment.__iadd__, Segment.__mul__, Segment.__imul__, + Segment.__sub__, Segment.__isub__, Segment.__truediv__, Segment.__itruediv__, + Segment.__floordiv__, Segment.__ifloordiv__): + for invalid_type in (True, None, [], {}, (), set(), object()): + with self.assertRaises(TypeError): + operation(get_segment(self.LENGTH, 1), invalid_type) + + def test_arithmetics_with_different_length_segments(self): + for operation in (Segment.__add__, Segment.__iadd__, Segment.__mul__, Segment.__imul__, + Segment.__sub__, Segment.__isub__, Segment.__truediv__, Segment.__itruediv__, + Segment.__floordiv__, Segment.__ifloordiv__): + with self.assertRaises(DifferentLengthsError): + operation(get_segment(self.LENGTH, 1), get_segment('medium', 1)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Segment/test_builtins.py b/tests/biosignals/Segment/test_builtins.py new file mode 100644 index 00000000..0974fe82 --- /dev/null +++ b/tests/biosignals/Segment/test_builtins.py @@ -0,0 +1,59 @@ +import unittest +from copy import copy + +from numpy.testing import assert_array_equal + +from tests.resources.segments import get_segment, get_segment_length, small_samples_1 + + +class SegmentBuiltinsTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.group = 1 + cls.small_segment = get_segment('small', cls.group) + cls.medium_segment = get_segment('medium', cls.group) + cls.large_segment = get_segment('large', cls.group) + + def test_len(self): + self.assertEqual(len(self.small_segment), get_segment_length('small')) + self.assertEqual(len(self.medium_segment), get_segment_length('medium')) + self.assertEqual(len(self.large_segment), get_segment_length('large')) + + def test_iter(self): + for a, b in zip(self.small_segment, small_samples_1): + self.assertEqual(a, b) + + def test_str(self): # Assert the length is in the string representation + self.assertIn(str(get_segment_length('small')), str(self.small_segment)) + self.assertIn(str(get_segment_length('medium')), str(self.medium_segment)) + self.assertIn(str(get_segment_length('large')), str(self.large_segment)) + + def test_repr(self): + self.test_str() + + """ + def test_hash(self): + hash_small, hash_medium, hash_large = hash(self.small_segment), hash(self.medium_segment), hash(self.large_segment) + self.assertIsInstance(hash_small, int) + self.assertIsInstance(hash_medium, int) + self.assertIsInstance(hash_large, int) + self.assertNotEqual(hash_small, hash_medium) + self.assertNotEqual(hash_small, hash_large) + self.assertNotEqual(hash_medium, hash_large) + """ + + def test_copy(self): + copied = copy(self.small_segment) + self.assertFalse(self.small_segment is copied) # Assert objects are different + self.assertFalse(self.small_segment._Segment__samples is copied._Segment__samples) # Assert pointers are different + self.assertTrue(all(self.small_segment.samples == copied.samples)) # Assert content is the same + # Assert what happens to the copied does not affect the original + copied_modified = copied * 0 + self.assertTrue(all(self.small_segment.samples != copied_modified.samples)) + self.assertTrue(all(self.small_segment.samples == copied.samples)) + + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Segment/test_get_properties.py b/tests/biosignals/Segment/test_get_properties.py new file mode 100644 index 00000000..b2b4cef9 --- /dev/null +++ b/tests/biosignals/Segment/test_get_properties.py @@ -0,0 +1,22 @@ +import unittest + +from resources.segments import get_segment, small_samples_1 + + +class SegmentGetPropertiesTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.small_segment = get_segment('small', 1) + + def test_get_samples(self): + samples = self.small_segment.samples + # Assert '.samples' is a view of the actual stored array in Segment + self.assertFalse(samples.flags['OWNDATA']) + self.assertTrue(samples.base is self.small_segment._Segment__samples) + # Assert the content is the same + self.assertTrue(all(samples == small_samples_1)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Segment/test_indexing.py b/tests/biosignals/Segment/test_indexing.py new file mode 100644 index 00000000..486bf051 --- /dev/null +++ b/tests/biosignals/Segment/test_indexing.py @@ -0,0 +1,63 @@ +import unittest + +from numpy import allclose + +from ltbio.biosignals import Segment +from resources.segments import get_segment, medium_samples_1, get_segment_length + + +class SegmentIndexingTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.group = 1 + cls.medium_segment = get_segment('medium', cls.group) + + def _check_content_correctness(self, index, original_samples, indexed_content): + """Asserts if original_samples[index] == indexed_content.""" + if isinstance(index, int): + self.assertIsInstance(indexed_content, float) + self.assertTrue(original_samples[index] == indexed_content) + elif isinstance(index, slice): + self.assertIsInstance(indexed_content, Segment) + self.assertTrue(allclose(original_samples[index], indexed_content)) + else: + raise ValueError(f'Invalid index type: {type(index)}') + + def test_indexing_position(self): + for position in (0, -1, 5): + self._check_content_correctness(position, medium_samples_1, self.medium_segment[position]) + + def test_indexing_slice(self): + for slice_ in (slice(0, 5), # from start to 5, [0:5] + slice(None, 5), # from start to 5, [:5] + slice(0, get_segment_length('medium')), # from start to end, [0:12] + slice(None, get_segment_length('medium')), # from start to end, [:12] + slice(None, None), # from start to end, [:] + slice(5, 10), # in the middle, [5:10] + slice(None, -2), # from start to -2, [:-2] = [0:10] + slice(-8, -2), # in the middle, [-8:-2] = [4:10] + ): + self._check_content_correctness(slice_, medium_samples_1, self.medium_segment[slice_]) + + def test_indexing_tuple(self): + index = (8, slice(2, 5), 0, slice(None, -2)) + res = self.medium_segment[index] # self.medium_segment[8, 2:5, 0, :-2] + self.assertIsInstance(res, tuple) + for ix, sub_res in zip(index, res): + self._check_content_correctness(ix, medium_samples_1, sub_res) + + def test_indexing_out_of_range(self): + length = get_segment_length('medium') + for index in (-length-1, length, length+1, 100, -100): + with self.assertRaises(IndexError): + x = self.medium_segment[index] + + def test_indexing_invalid_type(self): + for index in (1.5, 'a', {1, 2, 3}, {1: 2, 3: 4}, None): + with self.assertRaises(TypeError): + x = self.medium_segment[index] + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Segment/test_initializers.py b/tests/biosignals/Segment/test_initializers.py new file mode 100644 index 00000000..ffbfc1fc --- /dev/null +++ b/tests/biosignals/Segment/test_initializers.py @@ -0,0 +1,24 @@ +import unittest + +from numpy import ndarray + +from ltbio.biosignals import Segment +from tests.resources.segments import large_samples_3 + + +class SegmentInitializersTestCase(unittest.TestCase): + + def test_initializer(self): + segment = Segment(large_samples_3) + self.assertIsInstance(segment, Segment) + samples = segment._Segment__samples + self.assertIsInstance(samples, ndarray) + self.assertEqual(samples.dtype, float) + # Assert content + self.assertTrue(all(samples == large_samples_3)) + # but not pointer + self.assertFalse(samples is large_samples_3) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Segment/test_joining.py b/tests/biosignals/Segment/test_joining.py new file mode 100644 index 00000000..debaab7e --- /dev/null +++ b/tests/biosignals/Segment/test_joining.py @@ -0,0 +1,37 @@ +import unittest + +from numpy import allclose + +from ltbio._core.exceptions import DifferentLengthsError +from ltbio.biosignals import Segment +from resources.segments import get_segment, get_segment_length, small_samples_1, small_samples_2 + + +class SegmentJoiningTestCase(unittest.TestCase): + def _assert_arithmetic_operation(self, operation, a, b, a_content, b_content): + expected_sum = operation(a_content, b_content) + # Out of place + result = operation(a, b) + self.assertIsInstance(result, Segment) + self.assertEqual(len(result), get_segment_length(self.LENGTH)) + self.assertTrue(allclose(result.samples, expected_sum)) + # In place + a += b + self.assertEqual(len(a), get_segment_length(self.LENGTH)) + self.assertTrue(allclose(a.samples, expected_sum)) + + def test_concatenate_one_segment(self): + pass + + def test_concatenate_multiple_segments(self): + pass + + def test_append_array(self): + pass + + def test_append_sequence(self): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Segment/test_logics.py b/tests/biosignals/Segment/test_logics.py new file mode 100644 index 00000000..421f2f77 --- /dev/null +++ b/tests/biosignals/Segment/test_logics.py @@ -0,0 +1,71 @@ +import unittest + +from ltbio.biosignals import Segment +from resources.segments import get_segment, small_samples_1 + + +class SegmentLogicsTestCase(unittest.TestCase): + + PRECISION = 1e-10 + + @classmethod + def setUpClass(cls) -> None: + cls.small_segment_1_a = get_segment('small', 1) + cls.small_segment_1_b = get_segment('small', 1) + cls.small_segment_2 = get_segment('small', 2) + + def test_segment_equals_segment(self): + # Real cases + self.assertTrue(self.small_segment_1_a == self.small_segment_1_b) + self.assertFalse(self.small_segment_1_a == self.small_segment_2) + + # Edge case 1: one sample is slightly different + very_similar_samples = small_samples_1 + very_similar_samples[1] += self.PRECISION + very_similar_seg = Segment(very_similar_samples) + self.assertFalse(self.small_segment_1_a == very_similar_seg) + + # Edge case 2: one sample more + longer_samples = small_samples_1 + [324.2, ] + longer_seg = Segment(longer_samples) + self.assertFalse(self.small_segment_1_a == longer_seg) + + # Edge case 3: one sample less + shorter_samples = small_samples_1[:-1] + shorter_seg = Segment(shorter_samples) + self.assertFalse(self.small_segment_1_a == shorter_seg) + + def test_segment_equals_number(self): + monotonic_samples = [1, 1, 1, 1, 1, 1, 1] + monotonic_seg = Segment(monotonic_samples) + + # Real cases + self.assertFalse(self.small_segment_1_a == 1) + self.assertTrue(monotonic_seg == 1) + self.assertFalse(monotonic_seg == 1 + self.PRECISION) + self.assertFalse(monotonic_seg == 1 - self.PRECISION) + + # Edge case 1: one sample is slightly different + very_similar_samples = monotonic_samples + very_similar_samples[1] += self.PRECISION + very_similar_seg = Segment(very_similar_samples) + self.assertFalse(very_similar_seg == 1) + + # Edge case 2: one sample more + longer_samples = monotonic_samples + [1, ] + longer_seg = Segment(longer_samples) + self.assertFalse(longer_seg == 1) + + # Edge case 3: one sample less + shorter_samples = monotonic_samples[:-1] + shorter_seg = Segment(shorter_samples) + self.assertFalse(shorter_seg == 1) + + def test_equals_with_invalid_type(self): + for invalid_type in (None, 'a', list(), tuple(), dict(), set()): + with self.assertRaises(TypeError): + x = self.small_segment_1_a == invalid_type + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Segment/test_processing.py b/tests/biosignals/Segment/test_processing.py new file mode 100644 index 00000000..49f04cb5 --- /dev/null +++ b/tests/biosignals/Segment/test_processing.py @@ -0,0 +1,36 @@ +import unittest + +from ltbio.biosignals import Segment +from resources.segments import get_segment, small_samples_1 + + +class SegmentProcessingTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + cls.original_samples = small_samples_1 + + @classmethod + def setUp(cls) -> None: + cls.segment = get_segment('small', 1) + + def test_apply_inplace(self): + self.assertTrue(all(self.segment.samples == self.original_samples)) + self.segment.apply(lambda x: x + 1, inplace=True) + self.assertTrue(all(self.segment.samples == self.original_samples + 1)) + + def test_apply_out_of_place(self): + self.assertTrue(all(self.segment.samples == self.original_samples)) + result = self.segment.apply(lambda x: x + 1, inplace=False) + self.assertTrue(all(result.samples == self.original_samples + 1)) + self.assertTrue(all(self.segment.samples == self.original_samples)) + + def test_apply_parametric_function(self): + self.assertTrue(all(self.segment.samples == self.original_samples)) + result = self.segment.apply(lambda x, a, b: x * b + a, a=3, b=2, inplace=False) + self.assertTrue(all(result.samples == self.original_samples * 2 + 3)) + self.assertTrue(all(self.segment.samples == self.original_samples)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Segment/test_serialization.py b/tests/biosignals/Segment/test_serialization.py new file mode 100644 index 00000000..5b891f92 --- /dev/null +++ b/tests/biosignals/Segment/test_serialization.py @@ -0,0 +1,52 @@ +import unittest +from os import remove +from os.path import isfile + +from numpy import ndarray, memmap + +from ltbio.biosignals import Segment +from resources.segments import get_segment, small_samples_1 + + +class SegmentSerializationTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + cls.original_samples = small_samples_1 + + @classmethod + def setUp(cls) -> None: + cls.segment = get_segment('small', 1) + + def test_create_memory_map(self): + self.assertFalse(hasattr(self.segment, '_Segment__memory_map')) + self.segment._memory_map('.') + self.assertTrue(hasattr(self.segment, '_Segment__memory_map')) + self.assertEqual(self.segment._Segment__memory_map, self.original_samples) + self.assertEqual(self.segment._Segment__memory_map, self.original_samples) + self.assertTrue(isfile(self.segment._Segment__memory_map.filename)) + remove(self.segment._Segment__memory_map.filename) + + def test_getstate_without_memory_map(self): + state = self.segment.__getstate__() + self.assertEqual(state[0], Segment._Segment__SERIALVERSION) + self.assertIsInstance(state[1], ndarray) + + def test_getstate_with_memory_map(self): + self.segment._memory_map('.') + state = self.segment.__getstate__() + self.assertEqual(state[0], Segment._Segment__SERIALVERSION) + self.assertIsInstance(state[1], memmap) + remove(self.segment._Segment__memory_map.filename) + + def test_apply_parametric_function(self): + self.assertEqual(self.segment.samples, self.original_samples) + result = self.segment.apply(lambda x, a, b: x * b + a, a=3, b=2, inplace=False) + self.assertEqual(result.samples, self.original_samples * 2 + 3) + self.assertEqual(self.segment.samples, self.original_samples) + + + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/Segment/test_set_properties.py b/tests/biosignals/Segment/test_set_properties.py new file mode 100644 index 00000000..95f40300 --- /dev/null +++ b/tests/biosignals/Segment/test_set_properties.py @@ -0,0 +1,21 @@ +import unittest + +from resources.segments import get_segment, small_samples_1, small_samples_2 + + +class SegmentSetPropertiesTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.small_segment = get_segment('small', 1) + + def test_set_samples(self): + samples = self.small_segment.samples + # Try to set samples + with self.assertRaises(AttributeError): + self.small_segment.samples = small_samples_2 + self.assertEqual(samples, small_samples_1) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/biosignals/timeseries/test_TimeseriesSegment.py b/tests/biosignals/timeseries/test_TimeseriesSegment.py deleted file mode 100644 index 10e061c1..00000000 --- a/tests/biosignals/timeseries/test_TimeseriesSegment.py +++ /dev/null @@ -1,109 +0,0 @@ -import unittest -from datetime import datetime, timedelta - -from numpy import allclose - -from ltbio.biosignals.timeseries.Frequency import Frequency -from ltbio.biosignals.timeseries.Timeseries import Timeseries - - -class TimeseriesSegmentTestCase(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.samples1, cls.samples2 = [0.34, 2.12, 3.75], [1.34, 3.12, 4.75], - cls.initial1, cls.initial2 = datetime(2022, 1, 1, 16, 0), datetime(2022, 1, 3, 9, 0) # 1/1/2022 4PM and 3/1/2022 9AM - cls.final1, cls.final2 = datetime(2022, 1, 1, 16, 0, 3), datetime(2022, 1, 3, 9, 0, 3) # 1/1/2022 4PM and 3/1/2022 9AM - cls.sf = Frequency(1) # 1 Hz - cls.Segment = Timeseries._Timeseries__Segment - - def test_create_segment(cls): - segment = cls.Segment(cls.samples1, cls.initial1, cls.sf) - cls.assertTrue(allclose(cls.samples1, segment.samples)) - cls.assertEqual(cls.initial1, segment.initial_datetime) - cls.assertEqual(cls.final1, segment.final_datetime) - - def test_has_sample_of_a_datetime(cls): # Does datetime x belong to the domain of Segment? - segment = cls.Segment(cls.samples1, cls.initial1, cls.sf) - cls.assertTrue(datetime(2022, 1, 1, 16, 0, 0) in segment) - cls.assertTrue(datetime(2022, 1, 1, 16, 0, 1) in segment) - cls.assertTrue(datetime(2022, 1, 1, 16, 0, 2) in segment) - cls.assertFalse(datetime(2022, 1, 1, 16, 0, 3) in segment) - cls.assertFalse(datetime(2022, 1, 1, 15, 59, 59) in segment) - - def test_indexing(cls): - segment = cls.Segment(cls.samples1, cls.initial1, cls.sf) - cls.assertEqual(cls.samples1[0], segment[0]) - cls.assertEqual(cls.samples1[-1], segment[-1]) - cls.assertTrue(allclose(cls.samples1[:1], segment[:1].samples)) - cls.assertTrue(allclose(cls.samples1[1:], segment[1:].samples)) - - def test_get_duration(cls): # time - segment = cls.Segment(cls.samples1, cls.initial1, cls.sf) - cls.assertEqual(segment.duration, timedelta(seconds=3)) - - def test_get_length(cls): # number of samples - segment = cls.Segment(cls.samples1, cls.initial1, cls.sf) - cls.assertEqual(len(segment), len(cls.samples1)) - - def test_superposition_two_segments(cls): # True when they comprehend exactly the same time interval - segment1 = cls.Segment(cls.samples1, cls.initial1, cls.sf) - segment2 = cls.Segment(cls.samples2, cls.initial1, cls.sf) - cls.assertTrue(segment1 == segment2) - segment3 = cls.Segment(cls.samples2, cls.initial2, cls.sf) - cls.assertFalse(segment2 == segment3) - - def test_not_superposition_two_segments(cls): # True when they do not comprehend exactly the same time interval - segment1 = cls.Segment(cls.samples1, cls.initial1, cls.sf) - segment2 = cls.Segment(cls.samples2, cls.initial2, cls.sf) - cls.assertTrue(segment1 != segment2) - segment3 = cls.Segment(cls.samples1, cls.initial2, cls.sf) - cls.assertFalse(segment2 != segment3) - - def test_segment_comes_before_another(cls): - segment1 = cls.Segment(cls.samples1, cls.initial1, cls.sf) - segment2 = cls.Segment(cls.samples2, cls.initial2, cls.sf) - cls.assertTrue(segment1 < segment2) - cls.assertFalse(segment2 < segment1) - segment3 = cls.Segment(cls.samples1, cls.initial1 + timedelta(seconds=3.1), cls.sf) # close, but not adjacent - cls.assertTrue(segment1 < segment3) - cls.assertTrue(segment1 <= segment3) - segment4 = cls.Segment(cls.samples1, cls.initial1 + timedelta(seconds=3), cls.sf) # adjacent - cls.assertFalse(segment1 < segment4) - cls.assertTrue(segment1 <= segment4) - - def test_segment_comes_after_another(cls): - segment1 = cls.Segment(cls.samples1, cls.initial1, cls.sf) - segment2 = cls.Segment(cls.samples2, cls.initial2, cls.sf) - cls.assertTrue(segment2 > segment1) - cls.assertFalse(segment1 > segment2) - segment3 = cls.Segment(cls.samples1, cls.initial1 + timedelta(seconds=3.1), cls.sf) # close, but not adjacent - cls.assertTrue(segment3 > segment1) - cls.assertTrue(segment3 >= segment1) - segment4 = cls.Segment(cls.samples1, cls.initial1 + timedelta(seconds=3), cls.sf) # adjacent - cls.assertFalse(segment4 > segment1) - cls.assertTrue(segment4 >= segment1) - - def test_segment_overlaps_another(cls): - segment1 = cls.Segment(cls.samples1, cls.initial1, cls.sf) - segment2 = cls.Segment(cls.samples1, cls.initial1 + timedelta(seconds=1.5), cls.sf) - cls.assertTrue(segment1.overlaps(segment2)) - cls.assertTrue(segment2.overlaps(segment1)) - segment3 = cls.Segment(cls.samples1, cls.initial2, cls.sf) - cls.assertFalse(segment1.overlaps(segment3)) - cls.assertFalse(segment3.overlaps(segment1)) - segment4 = cls.Segment(cls.samples1, cls.initial1 + timedelta(seconds=3), cls.sf) # adjacent - cls.assertFalse(segment4.overlaps(segment1)) - cls.assertFalse(segment1.overlaps(segment4)) - - def test_segment_is_contained_in_another(cls): - outer_segment = cls.Segment(cls.samples1 + cls.samples2 + cls.samples1, cls.initial1, cls.sf) - inner_segment = cls.Segment(cls.samples1, cls.initial1 + timedelta(seconds=4), cls.sf) - cls.assertTrue(inner_segment in outer_segment) - inner_segment = cls.Segment(cls.samples1, cls.initial2, cls.sf) - cls.assertFalse(inner_segment in outer_segment) - - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/resources/biosignals.py b/tests/resources/biosignals.py index 3aebac2a..5c1df4c3 100644 --- a/tests/resources/biosignals.py +++ b/tests/resources/biosignals.py @@ -49,7 +49,7 @@ class NoModalityBiosignal(Biosignal): ... -def get_biosignal(channels: tuple[tuple[str, int, bool, float, str]], patient, location): +def get_biosignal(*channels_properties: tuple[str, int, bool, str, str], patient, location, source=None, name=None): """ Use get_biosignal to get a new Biosignal object populated for testing purposes. @@ -71,57 +71,51 @@ def get_biosignal(channels: tuple[tuple[str, int, bool, float, str]], patient, l if location == 'wrist': location = location_W - name = get_biosignal_name(channels[0][1]) # use the group of the first channel + if name is None: + name = get_biosignal_name(channels_properties[0][1]) # use the group of the first channel - channel_names = (channel_name_a, channel_name_b, channel_name_c, channel_name_d) - timeseries = {} + if source is None: + source = source - for channel_name, instructions in zip(channel_names, channels): - length, group, discontiguous, sf, units = instructions - timeseries[channel_name] = get_timeseries(length, group, discontiguous, sf, units) - - return NoModalityBiosignal(timeseries, source, patient, location, name) - - -# CLASSIC EXAMPLES - -def get_biosignal_alpha(): - """ - 1 channel with group 1 small contiguous timeseries, 2 Hz, mV, associated with patient_M and location_C - """ - length, group = 'small', 1 - return NoModalityBiosignal({channel_name_a: get_timeseries(length, group, False, sf_low, units_volt)}, - source, patient_M, location_C, get_biosignal_name(1)) + channel_names = (channel_name_a, channel_name_b, channel_name_c, channel_name_d)[:len(channels_properties)] + channel_properties_with_names = {channel_name: properties for channel_name, properties in zip(channel_names, channels_properties)} + return _get_biosignal(channel_properties_with_names, patient=patient, location=location, source=source, name=name) -def get_biosignal_beta(): - """ - 1 channel with group 1 discontiguous medium timeseries, 2 Hz, mV, associated with patient_M and location_C - """ - length, group = 'medium', 1 - return NoModalityBiosignal({channel_name_a: get_timeseries(length, group, True, sf_low, units_volt)}, - source, patient_M, location_C, get_biosignal_name(1)) +def _get_biosignal(channels_properties: dict[dict], patient, location, source, name): + channel_names = (channel_name_a, channel_name_b, channel_name_c, channel_name_d)[:len(channels_properties)] + timeseries = {channel_name: get_timeseries(**instructions) for channel_name, instructions in zip(channel_names, channels_properties)} + return NoModalityBiosignal(timeseries, source, patient, location, name) -def get_biosignal_gamma(): - """ - 3 channels with group 2 variable length contiguous timeseries, 4 Hz, uS, associated with patient_F and location_W - """ - length, group = None, 2 - return NoModalityBiosignal({channel_name_a: get_timeseries('small', group, False, sf_high, units_siemens), - channel_name_b: get_timeseries('medium', group, False, sf_high, units_siemens), - channel_name_c: get_timeseries('large', group, False, sf_high, units_siemens), - }, - source, patient_F, location_W, get_biosignal_name(group)) +# CLASSIC EXAMPLES -def get_biosignal_delta(): - """ - 2 channels with group 2 variable length discontiguous timeseries, 4 Hz, uS, associated with patient_F and location_W - """ - length, group = None, 2 - return NoModalityBiosignal({channel_name_a: get_timeseries('medium', group, True, sf_high, units_siemens), - channel_name_b: get_timeseries('large', group, True, sf_high, units_siemens), - }, - source, patient_F, location_W, get_biosignal_name(group)) +# Alpha +# 1 channel with group 1 small contiguous timeseries, 2 Hz, mV, associated with patient_M and location_C +biosignal_alpha_timeseries_properties = {channel_name_a: {'length': 'small', 'group': 1, 'discontiguous': False, 'sf': sf_low, 'units': units_volt}} +biosignal_alpha_properties = {'patient': patient_M, 'location': location_C, 'name': get_biosignal_name(1), source: source} +#biosignal_alpha_times = {'start': start_a, 'end': end_a +get_biosignal_alpha = lambda: _get_biosignal(biosignal_alpha_timeseries_properties, **biosignal_alpha_properties) + +# Beta +# 1 channel with group 1 discontiguous medium timeseries, 2 Hz, mV, associated with patient_M and location_C +biosignal_beta_timeseries_properties = {channel_name_a: {'length': 'medium', 'group': 1, 'discontiguous': True, 'sf': sf_low, 'units': units_volt}} +biosignal_beta_properties = {'patient': patient_M, 'location': location_C, 'name': get_biosignal_name(1), source: source} +get_biosignal_beta = lambda: _get_biosignal(biosignal_beta_timeseries_properties, **biosignal_beta_properties) + +# Gamma +# 3 channels with group 2 variable length contiguous timeseries, 4 Hz, uS, associated with patient_F and location_W +biosignal_gamma_timeseries_properties = {channel_name_a: {'length': 'small', 'group': 2, 'discontiguous': False, 'sf': sf_high, 'units': units_siemens}, + channel_name_b: {'length': 'medium', 'group': 2, 'discontiguous': False, 'sf': sf_high, 'units': units_siemens}, + channel_name_c: {'length': 'large', 'group': 2, 'discontiguous': False, 'sf': sf_high, 'units': units_siemens}} +biosignal_gamma_properties = {'patient': patient_F, 'location': location_W, 'name': get_biosignal_name(2), source: source} +get_biosignal_gamma = lambda: _get_biosignal(biosignal_gamma_timeseries_properties, **biosignal_gamma_properties) + +# Delta +# 2 channels with group 2 variable length discontiguous timeseries, 4 Hz, uS, associated with patient_F and location_W +biosignal_delta_timeseries_properties = {channel_name_a: {'length': 'medium', 'group': 2, 'discontiguous': True, 'sf': sf_high, 'units': units_siemens}, + channel_name_b: {'length': 'large', 'group': 2, 'discontiguous': True, 'sf': sf_high, 'units': units_siemens}} +biosignal_delta_properties = {'patient': patient_F, 'location': location_W, 'name': get_biosignal_name(2), source: source} +get_biosignal_delta = lambda: _get_biosignal(biosignal_delta_timeseries_properties, **biosignal_delta_properties) diff --git a/tests/resources/timeseries.py b/tests/resources/timeseries.py index 241d9f00..68c08e4a 100644 --- a/tests/resources/timeseries.py +++ b/tests/resources/timeseries.py @@ -21,12 +21,12 @@ # You can use these variables to assert the metadata of the Timeseries objects. # Sampling frequencies -sf_low = 2. -sf_high = 4. +sf_low: float = 2. +sf_high: float = 4. # Units -units_volt = Volt(Multiplier.m) -units_siemens = Siemens(Multiplier.u) +units_volt: Unit = Volt(Multiplier.m) +units_siemens: Unit = Siemens(Multiplier.u) # Start timepoints start_a = datetime(2000, 1, 1, 0, 0, 0) From b6df3cdc7e17c624119f6c60dbbd75dc58b69917 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:12:14 +0200 Subject: [PATCH 27/47] Add biosignals and clinical to public interface --- src/ltbio/__init__.py | 4 ---- src/ltbio/__init__.pyi | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) create mode 100644 src/ltbio/__init__.pyi diff --git a/src/ltbio/__init__.py b/src/ltbio/__init__.py index 2d744367..e69de29b 100644 --- a/src/ltbio/__init__.py +++ b/src/ltbio/__init__.py @@ -1,4 +0,0 @@ - - -__all__ = ["biosignals", "clinical"] - diff --git a/src/ltbio/__init__.pyi b/src/ltbio/__init__.pyi new file mode 100644 index 00000000..b187aaac --- /dev/null +++ b/src/ltbio/__init__.pyi @@ -0,0 +1,2 @@ + +__all__ = ["biosignals", "clinical"] From 77c40376fc2c2543bb95dc94d36e9877cde91f9e Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:13:11 +0200 Subject: [PATCH 28/47] Moved old test suits --- tests/biosignals/{timeseries => }/test_Event.py | 0 tests/biosignals/{timeseries => }/test_Timeseries.py | 0 tests/biosignals/{timeseries => }/test_Unit.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/biosignals/{timeseries => }/test_Event.py (100%) rename tests/biosignals/{timeseries => }/test_Timeseries.py (100%) rename tests/biosignals/{timeseries => }/test_Unit.py (100%) diff --git a/tests/biosignals/timeseries/test_Event.py b/tests/biosignals/test_Event.py similarity index 100% rename from tests/biosignals/timeseries/test_Event.py rename to tests/biosignals/test_Event.py diff --git a/tests/biosignals/timeseries/test_Timeseries.py b/tests/biosignals/test_Timeseries.py similarity index 100% rename from tests/biosignals/timeseries/test_Timeseries.py rename to tests/biosignals/test_Timeseries.py diff --git a/tests/biosignals/timeseries/test_Unit.py b/tests/biosignals/test_Unit.py similarity index 100% rename from tests/biosignals/timeseries/test_Unit.py rename to tests/biosignals/test_Unit.py From 41164afce4b57c79601f7beb07c47d7af44c40f3 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:13:57 +0200 Subject: [PATCH 29/47] Fix Segment test set properties --- tests/biosignals/Segment/test_set_properties.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/biosignals/Segment/test_set_properties.py b/tests/biosignals/Segment/test_set_properties.py index 95f40300..c913a2a6 100644 --- a/tests/biosignals/Segment/test_set_properties.py +++ b/tests/biosignals/Segment/test_set_properties.py @@ -1,5 +1,7 @@ import unittest +from numpy import allclose + from resources.segments import get_segment, small_samples_1, small_samples_2 @@ -10,11 +12,10 @@ def setUpClass(cls): cls.small_segment = get_segment('small', 1) def test_set_samples(self): - samples = self.small_segment.samples # Try to set samples with self.assertRaises(AttributeError): self.small_segment.samples = small_samples_2 - self.assertEqual(samples, small_samples_1) + self.assertTrue(allclose(self.small_segment.samples, small_samples_1)) if __name__ == '__main__': From 7112e7161140ec0b6e778afdc4effc9e200d019e Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:14:13 +0200 Subject: [PATCH 30/47] Fix Segment test serialization --- tests/biosignals/Segment/test_serialization.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tests/biosignals/Segment/test_serialization.py b/tests/biosignals/Segment/test_serialization.py index 5b891f92..edfde68f 100644 --- a/tests/biosignals/Segment/test_serialization.py +++ b/tests/biosignals/Segment/test_serialization.py @@ -2,7 +2,7 @@ from os import remove from os.path import isfile -from numpy import ndarray, memmap +from numpy import ndarray, memmap, allclose from ltbio.biosignals import Segment from resources.segments import get_segment, small_samples_1 @@ -22,8 +22,7 @@ def test_create_memory_map(self): self.assertFalse(hasattr(self.segment, '_Segment__memory_map')) self.segment._memory_map('.') self.assertTrue(hasattr(self.segment, '_Segment__memory_map')) - self.assertEqual(self.segment._Segment__memory_map, self.original_samples) - self.assertEqual(self.segment._Segment__memory_map, self.original_samples) + self.assertTrue(allclose(self.segment._Segment__memory_map, self.original_samples)) self.assertTrue(isfile(self.segment._Segment__memory_map.filename)) remove(self.segment._Segment__memory_map.filename) @@ -39,14 +38,6 @@ def test_getstate_with_memory_map(self): self.assertIsInstance(state[1], memmap) remove(self.segment._Segment__memory_map.filename) - def test_apply_parametric_function(self): - self.assertEqual(self.segment.samples, self.original_samples) - result = self.segment.apply(lambda x, a, b: x * b + a, a=3, b=2, inplace=False) - self.assertEqual(result.samples, self.original_samples * 2 + 3) - self.assertEqual(self.segment.samples, self.original_samples) - - - if __name__ == '__main__': unittest.main() From a7d2a7f72b8d30c18ac2a91d8f1503f302125f3b Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:14:24 +0200 Subject: [PATCH 31/47] Add Segment test statistics --- tests/biosignals/Segment/test_statistics.py | 61 +++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/biosignals/Segment/test_statistics.py diff --git a/tests/biosignals/Segment/test_statistics.py b/tests/biosignals/Segment/test_statistics.py new file mode 100644 index 00000000..4ade4271 --- /dev/null +++ b/tests/biosignals/Segment/test_statistics.py @@ -0,0 +1,61 @@ +import unittest + +import numpy as np +from numpy import allclose + +from ltbio.biosignals import Segment +from resources.segments import get_segment, small_samples_1 + + +class SegmentShortcutStatisticsTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.segment = get_segment('small', 1) + + def test_max(self): + res = self.segment.max() + self.assertEqual(res, np.max(small_samples_1)) + + def test_min(self): + res = self.segment.min() + self.assertEqual(res, np.min(small_samples_1)) + + def test_argmax(self): + res = self.segment.argmax() + self.assertEqual(res, np.argmax(small_samples_1)) + + def test_argmin(self): + res = self.segment.argmin() + self.assertEqual(res, np.argmin(small_samples_1)) + + def test_mean(self): + res = self.segment.mean() + self.assertEqual(res, np.mean(small_samples_1)) + + def test_median(self): + res = self.segment.median() + self.assertEqual(res, np.median(small_samples_1)) + + def test_std(self): + res = self.segment.std() + self.assertEqual(res, np.std(small_samples_1)) + + def test_var(self): + res = self.segment.var() + self.assertEqual(res, np.var(small_samples_1)) + + def test_diff(self): + res = self.segment.diff() + self.assertTrue(allclose(res, np.diff(small_samples_1))) + + def test_abs(self): + samples = [1, 2, -3, 4, -5, 6, -7] + segment = Segment(samples) + res = segment.abs() + self.assertTrue(allclose(res, np.abs(samples))) + + +if __name__ == '__main__': + unittest.main() + From 179c4700b6ab9e3e9c6cbe464994a5bc6daf6530 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:14:53 +0200 Subject: [PATCH 32/47] Add 2 Segment processing tests --- tests/biosignals/Segment/test_processing.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/biosignals/Segment/test_processing.py b/tests/biosignals/Segment/test_processing.py index 49f04cb5..530f6857 100644 --- a/tests/biosignals/Segment/test_processing.py +++ b/tests/biosignals/Segment/test_processing.py @@ -1,5 +1,7 @@ import unittest +import numpy as np + from ltbio.biosignals import Segment from resources.segments import get_segment, small_samples_1 @@ -14,12 +16,12 @@ def setUpClass(cls) -> None: def setUp(cls) -> None: cls.segment = get_segment('small', 1) - def test_apply_inplace(self): + def test_apply_function_operation_inplace(self): self.assertTrue(all(self.segment.samples == self.original_samples)) self.segment.apply(lambda x: x + 1, inplace=True) self.assertTrue(all(self.segment.samples == self.original_samples + 1)) - def test_apply_out_of_place(self): + def test_apply_function_out_of_place(self): self.assertTrue(all(self.segment.samples == self.original_samples)) result = self.segment.apply(lambda x: x + 1, inplace=False) self.assertTrue(all(result.samples == self.original_samples + 1)) @@ -31,6 +33,16 @@ def test_apply_parametric_function(self): self.assertTrue(all(result.samples == self.original_samples * 2 + 3)) self.assertTrue(all(self.segment.samples == self.original_samples)) + def test_extract_with_function(self): + self.assertTrue(all(self.segment.samples == self.original_samples)) + info = self.segment.extract(lambda x: np.mean(x)) + self.assertTrue(info == np.mean(self.original_samples)) + + def test_extract_with_parametric_function(self): + self.assertTrue(all(self.segment.samples == self.original_samples)) + info = self.segment.extract(lambda x, a: np.mean(x) > a, a=1) + self.assertTrue(info) + if __name__ == '__main__': unittest.main() From 464e653660bcaf1acaf1a0b951aa4f0f8741d624 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:15:08 +0200 Subject: [PATCH 33/47] Fix Segment logics tests --- tests/biosignals/Segment/test_logics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/biosignals/Segment/test_logics.py b/tests/biosignals/Segment/test_logics.py index 421f2f77..a01113f4 100644 --- a/tests/biosignals/Segment/test_logics.py +++ b/tests/biosignals/Segment/test_logics.py @@ -20,7 +20,7 @@ def test_segment_equals_segment(self): self.assertFalse(self.small_segment_1_a == self.small_segment_2) # Edge case 1: one sample is slightly different - very_similar_samples = small_samples_1 + very_similar_samples = small_samples_1.copy() very_similar_samples[1] += self.PRECISION very_similar_seg = Segment(very_similar_samples) self.assertFalse(self.small_segment_1_a == very_similar_seg) @@ -46,7 +46,7 @@ def test_segment_equals_number(self): self.assertFalse(monotonic_seg == 1 - self.PRECISION) # Edge case 1: one sample is slightly different - very_similar_samples = monotonic_samples + very_similar_samples = monotonic_samples.copy() very_similar_samples[1] += self.PRECISION very_similar_seg = Segment(very_similar_samples) self.assertFalse(very_similar_seg == 1) @@ -54,12 +54,12 @@ def test_segment_equals_number(self): # Edge case 2: one sample more longer_samples = monotonic_samples + [1, ] longer_seg = Segment(longer_samples) - self.assertFalse(longer_seg == 1) + self.assertTrue(longer_seg == 1) # Edge case 3: one sample less shorter_samples = monotonic_samples[:-1] shorter_seg = Segment(shorter_samples) - self.assertFalse(shorter_seg == 1) + self.assertTrue(shorter_seg == 1) def test_equals_with_invalid_type(self): for invalid_type in (None, 'a', list(), tuple(), dict(), set()): From 1a3c8587789875832def76d557347e2a9bc3832e Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:15:25 +0200 Subject: [PATCH 34/47] Add Segment joining tests --- tests/biosignals/Segment/test_joining.py | 99 +++++++++++++++++------- 1 file changed, 73 insertions(+), 26 deletions(-) diff --git a/tests/biosignals/Segment/test_joining.py b/tests/biosignals/Segment/test_joining.py index debaab7e..d877cd3d 100644 --- a/tests/biosignals/Segment/test_joining.py +++ b/tests/biosignals/Segment/test_joining.py @@ -1,37 +1,84 @@ import unittest +import numpy as np from numpy import allclose from ltbio._core.exceptions import DifferentLengthsError from ltbio.biosignals import Segment -from resources.segments import get_segment, get_segment_length, small_samples_1, small_samples_2 +from resources.segments import get_segment, get_segment_length, small_samples_1, medium_samples_1, large_samples_1 class SegmentJoiningTestCase(unittest.TestCase): - def _assert_arithmetic_operation(self, operation, a, b, a_content, b_content): - expected_sum = operation(a_content, b_content) - # Out of place - result = operation(a, b) - self.assertIsInstance(result, Segment) - self.assertEqual(len(result), get_segment_length(self.LENGTH)) - self.assertTrue(allclose(result.samples, expected_sum)) - # In place - a += b - self.assertEqual(len(a), get_segment_length(self.LENGTH)) - self.assertTrue(allclose(a.samples, expected_sum)) - - def test_concatenate_one_segment(self): - pass - - def test_concatenate_multiple_segments(self): - pass - - def test_append_array(self): - pass - - def test_append_sequence(self): - pass - - + + def setUp(cls): + cls.group = 1 + cls.small_segment = get_segment('small', cls.group) + cls.medium_segment = get_segment('medium', cls.group) + cls.large_segment = get_segment('large', cls.group) + + def test_append_more_samples(self): + # Assert before + self.assertEqual(len(self.large_segment), get_segment_length('large')) # Length + self.assertTrue(allclose(self.large_segment.samples, large_samples_1)) # Content + + # Append medium samples + self.large_segment.append(medium_samples_1) + expected_content = np.concatenate((large_samples_1, medium_samples_1)) + + # Assert after + self.assertEqual(len(self.large_segment), get_segment_length('large') + get_segment_length('medium')) # Length affected + self.assertEqual(len(medium_samples_1), get_segment_length('medium')) # Length not affected + self.assertTrue(allclose(self.large_segment.samples, expected_content)) # Content + + # Append a list + to_append = [1, 2, 3] + self.large_segment.append(to_append) + expected_content = np.concatenate((expected_content, to_append)) + + # Assert after + self.assertEqual(len(self.large_segment), get_segment_length('large') + get_segment_length('medium') + len(to_append)) # Length affected + self.assertEqual(len(to_append), 3) # Length not affected + self.assertEqual(len(medium_samples_1), get_segment_length('medium')) # Length not affected + self.assertTrue(allclose(self.large_segment.samples, expected_content)) # Content + + def test_append_type_error(self): + for invalid_type in (1, 1.0, True, False, None, {1, 2, 3}, {'a': 1, 'b': 2}, 'string'): + with self.assertRaises(TypeError): + self.small_segment.append(invalid_type) + + def test_concatenate_two_segments(self): + # Assert before + self.assertTrue(allclose(self.large_segment.samples, large_samples_1)) + self.assertTrue(allclose(self.medium_segment.samples, medium_samples_1)) + + res = Segment.concatenate(self.large_segment, self.medium_segment) + expected_content = np.concatenate((large_samples_1, medium_samples_1)) + + # Assert after + self.assertTrue(allclose(self.large_segment.samples, large_samples_1)) + self.assertTrue(allclose(self.medium_segment.samples, medium_samples_1)) + self.assertTrue(allclose(res.samples, expected_content)) + + def test_concatenate_three_segments(self): + # Assert before + self.assertTrue(allclose(self.large_segment.samples, large_samples_1)) + self.assertTrue(allclose(self.medium_segment.samples, medium_samples_1)) + self.assertTrue(allclose(self.small_segment.samples, small_samples_1)) + + res = Segment.concatenate(self.large_segment, self.medium_segment, self.small_segment) + expected_content = np.concatenate((large_samples_1, medium_samples_1, small_samples_1)) + + # Assert after + self.assertTrue(allclose(self.large_segment.samples, large_samples_1)) + self.assertTrue(allclose(self.medium_segment.samples, medium_samples_1)) + self.assertTrue(allclose(self.small_segment.samples, small_samples_1)) + self.assertTrue(allclose(res.samples, expected_content)) + + def test_concatenate_type_error(self): + for invalid_type in (1, 1.0, True, False, None, {1, 2, 3}, {'a': 1, 'b': 2}, 'string'): + with self.assertRaises(TypeError): + Segment.concatenate(self.small_segment, invalid_type) + + if __name__ == '__main__': unittest.main() From 71b423254017782e52ebb487d2bb926eec753ce9 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:15:57 +0200 Subject: [PATCH 35/47] Add 1 Segment initializer tests --- tests/biosignals/Segment/test_initializers.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/biosignals/Segment/test_initializers.py b/tests/biosignals/Segment/test_initializers.py index ffbfc1fc..77f0e73e 100644 --- a/tests/biosignals/Segment/test_initializers.py +++ b/tests/biosignals/Segment/test_initializers.py @@ -2,6 +2,7 @@ from numpy import ndarray +from ltbio._core.exceptions import EmptySegmentError from ltbio.biosignals import Segment from tests.resources.segments import large_samples_3 @@ -19,6 +20,10 @@ def test_initializer(self): # but not pointer self.assertFalse(samples is large_samples_3) + def test_initialize_with_empty_samples_raises_error(self): + with self.assertRaises(EmptySegmentError): + Segment([]) + if __name__ == '__main__': unittest.main() From fe6ac45fd64644ead86fe3d07a69e8b6ace53489 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:16:11 +0200 Subject: [PATCH 36/47] Fix Segment arithmetic tests --- tests/biosignals/Segment/test_arithmetics.py | 94 ++++++++++---------- 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/tests/biosignals/Segment/test_arithmetics.py b/tests/biosignals/Segment/test_arithmetics.py index d9cb37c4..aa26900f 100644 --- a/tests/biosignals/Segment/test_arithmetics.py +++ b/tests/biosignals/Segment/test_arithmetics.py @@ -1,5 +1,6 @@ import unittest +import numpy as np from numpy import allclose from ltbio._core.exceptions import DifferentLengthsError @@ -11,76 +12,71 @@ class SegmentArithmeticsTestCase(unittest.TestCase): LENGTH = 'small' - def _assert_arithmetic_operation(self, operation, a, b, a_content, b_content): - expected_sum = operation(a_content, b_content) + def _assert_arithmetic_operation(self, operation_outplace, operation_inplace, a, b, a_content, b_content, expected_result): # Out of place - result = operation(a, b) - self.assertIsInstance(result, Segment) - self.assertEqual(len(result), get_segment_length(self.LENGTH)) - self.assertTrue(allclose(result.samples, expected_sum)) + result = operation_outplace(a, b) + self.assertIsInstance(result, Segment) # check if a Segment + self.assertEqual(len(result), get_segment_length(self.LENGTH)) # check if correct length + self.assertTrue(allclose(result.samples, expected_result)) # check if correct content + self.assertTrue(allclose(a.samples, a_content)) # check if intact + self.assertTrue(allclose(b.samples if isinstance(b, Segment) else b, b_content)) # check if intact + self.assertTrue(a is not result and b is not result) # check pointers + # In place - a += b - self.assertEqual(len(a), get_segment_length(self.LENGTH)) - self.assertTrue(allclose(a.samples, expected_sum)) + operation_inplace(a, b) + self.assertIsInstance(a, Segment) # check if still a Segment + self.assertEqual(len(a), get_segment_length(self.LENGTH)) # check if correct length + self.assertTrue(allclose(a.samples, expected_result)) # check if correct content + self.assertTrue(allclose(b.samples if isinstance(b, Segment) else b, b_content)) # check if intact def test_add_two_segments(self): - for operation in (Segment.__add__, Segment.__iadd__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), - small_samples_1, small_samples_2) + self._assert_arithmetic_operation(Segment.__add__, Segment.__iadd__, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2, np.add(small_samples_1, small_samples_2)) def test_add_segment_and_number(self): - for operation in (Segment.__add__, Segment.__iadd__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), 30, - small_samples_1, 30) + self._assert_arithmetic_operation(Segment.__add__, Segment.__iadd__, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30, np.add(small_samples_1, 30)) def test_sub_two_segments(self): - for operation in (Segment.__sub__, Segment.__isub__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), - small_samples_1, small_samples_2) + self._assert_arithmetic_operation(Segment.__sub__, Segment.__isub__, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2, np.subtract(small_samples_1, small_samples_2)) def test_sub_segment_and_number(self): - for operation in (Segment.__sub__, Segment.__isub__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), 30, - small_samples_1, 30) + self._assert_arithmetic_operation(Segment.__sub__, Segment.__isub__, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30, np.subtract(small_samples_1, 30)) def test_mul_two_segments(self): - for operation in (Segment.__mul__, Segment.__imul__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), - small_samples_1, small_samples_2) + self._assert_arithmetic_operation(Segment.__mul__, Segment.__imul__, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2, np.multiply(small_samples_1, small_samples_2)) def test_mul_segment_and_number(self): - for operation in (Segment.__mul__, Segment.__imul__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), 30, - small_samples_1, 30) + self._assert_arithmetic_operation(Segment.__mul__, Segment.__imul__, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30, np.multiply(small_samples_1, 30)) def test_truediv_two_segments(self): - for operation in (Segment.__truediv__, Segment.__itruediv__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), - small_samples_1, small_samples_2) + self._assert_arithmetic_operation(Segment.__truediv__, Segment.__itruediv__, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2, np.true_divide(small_samples_1, small_samples_2)) def test_truediv_segment_and_number(self): - for operation in (Segment.__truediv__, Segment.__itruediv__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), 30, - small_samples_1, 30) + self._assert_arithmetic_operation(Segment.__truediv__, Segment.__itruediv__, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30, np.true_divide(small_samples_1, 30)) def test_floordiv_two_segments(self): - for operation in (Segment.__floordiv__, Segment.__ifloordiv__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), - small_samples_1, small_samples_2) + self._assert_arithmetic_operation(Segment.__floordiv__, Segment.__ifloordiv__, + get_segment(self.LENGTH, 1), get_segment(self.LENGTH, 2), + small_samples_1, small_samples_2, np.floor_divide(small_samples_1, small_samples_2)) def test_floordiv_segment_and_number(self): - for operation in (Segment.__floordiv__, Segment.__ifloordiv__): - self._assert_arithmetic_operation(operation, - get_segment(self.LENGTH, 1), 30, - small_samples_1, 30) + self._assert_arithmetic_operation(Segment.__floordiv__, Segment.__ifloordiv__, + get_segment(self.LENGTH, 1), 30, + small_samples_1, 30, np.floor_divide(small_samples_1, 30)) def test_arithmetics_with_invalid_types(self): for operation in (Segment.__add__, Segment.__iadd__, Segment.__mul__, Segment.__imul__, From 30362ad0404c457eb958237515d61314dba6fc67 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:16:37 +0200 Subject: [PATCH 37/47] Add more custom Exceptions --- src/ltbio/_core/exceptions.py | 36 ++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/ltbio/_core/exceptions.py b/src/ltbio/_core/exceptions.py index b942a279..81d38d5c 100644 --- a/src/ltbio/_core/exceptions.py +++ b/src/ltbio/_core/exceptions.py @@ -1,6 +1,8 @@ # -- encoding: utf-8 -- # =================================== # ScientISST LTBio | Long-Term Biosignals +from datetime import datetime + from datetimerange import DateTimeRange #from ltbio.biosignals._Timeline import Timeline @@ -19,6 +21,22 @@ # =================================== +class TimeseriesError(Exception): + def __init__(self, why: str): + super().__init__(why) + + +class EmptyTimeseriesError(TimeseriesError): + def __init__(self): + super().__init__(f"Trying to create a Timeseries with no samples.") + +class OverlapingSegmentsError(TimeseriesError): + def __init__(self, first_start: datetime, first_end: datetime, second_start: datetime, second_end: datetime): + super().__init__(f"Trying to add two overlapping segments to a Timeseries. " + f"First Segment starts at {first_start} and ends at {first_end}. " + f"Second Segment starts at {second_start} and ends at {second_end}.") + + class IncompatibleTimeseriesError(Exception): def __init__(self, why: str): super().__init__(f"These Timeseries are incompatible because {why}") @@ -53,11 +71,27 @@ def __init__(self, first, second): super().__init__(f"at least two different patients were found: {first} and {second}. " f"Try to drop the patients first.") +class SegmentError(Exception): + def __intit__(self, description: str): + super().__init__(description) + +class NotASegmentError(SegmentError): + def __init__(self, segment, intend_use=""): + super().__init__(f"{type(segment)} is not a segment. {intend_use}") + +class SamplesNotValidError(SegmentError): + def __init__(self, samples, why): + super().__init__(f"Samples are not valid, because {why}.") + +class EmptySegmentError(SegmentError): + def __init__(self): + super().__init__(f"Trying to create a Segment with no samples.") + class IncompatibleSegmentsError(Exception): def __init__(self, why: str): super().__init__(f"These Segments are incompatible because {why}") -class DifferentLengthsError(Exception): +class DifferentLengthsError(IncompatibleSegmentsError): def __init__(self, first: int, second: int): super().__init__(f"the first has length {first} and the second has length {second}.") From 75a02e028c97afcf4b7c173828b4633f97074805 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:18:47 +0200 Subject: [PATCH 38/47] Fix units -> unit --- src/ltbio/biosignals/_Timeseries.py | 6 +++--- src/ltbio/biosignals/sources/_MITDB.py | 2 +- src/ltbio/biosignals/sources/_Seer.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ltbio/biosignals/_Timeseries.py b/src/ltbio/biosignals/_Timeseries.py index 308d05cd..62b9ca8a 100644 --- a/src/ltbio/biosignals/_Timeseries.py +++ b/src/ltbio/biosignals/_Timeseries.py @@ -148,7 +148,7 @@ class Timeseries(): # INITIALIZERS @multimethod def __init__(self, segments_by_time: dict[datetime, ndarray | Sequence[float] | Segment], sampling_frequency: float, - units: Unit = None, name: str = None): + unit: Unit = None, name: str = None): """ Give one or multiple instantiated Segments. It also receives the sampling frequency of the samples. @@ -165,7 +165,7 @@ def __init__(self, segments_by_time: dict[datetime, ndarray | Sequence[float] | sampling_frequency: float | Frequency The frequency at which the samples where sampled. - units: Unit + unit: Unit The physical units of the variable measured. name: str @@ -173,7 +173,7 @@ def __init__(self, segments_by_time: dict[datetime, ndarray | Sequence[float] | """ # Metadata self.__sampling_frequency = Frequency(sampling_frequency) - self.__units = units + self.__unit = unit self.__name = name # Segments diff --git a/src/ltbio/biosignals/sources/_MITDB.py b/src/ltbio/biosignals/sources/_MITDB.py index 15694fc5..98dc249b 100644 --- a/src/ltbio/biosignals/sources/_MITDB.py +++ b/src/ltbio/biosignals/sources/_MITDB.py @@ -96,7 +96,7 @@ def _timeseries(dir, type, **options): if len(segments) > 1: new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=channels[ch], units=unit) else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch], units=unit) + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=channels[ch], unit=unit) new_dict[channels[ch]] = new_timeseries return new_dict diff --git a/src/ltbio/biosignals/sources/_Seer.py b/src/ltbio/biosignals/sources/_Seer.py index 0d5d84bb..35523fe8 100644 --- a/src/ltbio/biosignals/sources/_Seer.py +++ b/src/ltbio/biosignals/sources/_Seer.py @@ -86,7 +86,7 @@ def _timeseries(dir, type, **options): if len(segments) > 1: new_timeseries = timeseries.Timeseries.withDiscontiguousSegments(segments, sampling_frequency=sfreq, name=name, units=unit) else: - new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=name, units=unit) + new_timeseries = timeseries.Timeseries(tuple(segments.values())[0], tuple(segments.keys())[0], sfreq, name=name, unit=unit) new_dict[dict_key] = new_timeseries return new_dict From e5894157d6dbb0c76c2273ce29a68269d2803ca9 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:19:15 +0200 Subject: [PATCH 39/47] Fix Segment + add statistics --- src/ltbio/biosignals/_Segment.py | 153 ++++++++++++++++++++---------- src/ltbio/biosignals/_Segment.pyi | 44 ++++----- 2 files changed, 122 insertions(+), 75 deletions(-) diff --git a/src/ltbio/biosignals/_Segment.py b/src/ltbio/biosignals/_Segment.py index 7eb4fd17..b6a0eeef 100644 --- a/src/ltbio/biosignals/_Segment.py +++ b/src/ltbio/biosignals/_Segment.py @@ -25,7 +25,7 @@ from multimethod import multimethod from numpy import ndarray, memmap -from ltbio._core.exceptions import DifferentLengthsError +from ltbio._core.exceptions import DifferentLengthsError, NotASegmentError class Segment(): @@ -74,6 +74,23 @@ def __len__(self) -> int: def samples(self) -> ndarray: return self.__samples.view() + # =================================== + # Properties (Setters) + + @classmethod + def __samples_valid(cls, samples) -> bool: + if isinstance(samples, ndarray): + return samples.dtype in (int, float) + elif isinstance(samples, Sequence): + return all([isinstance(sample, (int, float)) for sample in samples]) + else: + return False + + def __set_samples(self, samples=None): + if not self.__samples_valid(samples): + raise TypeError(f"Trying to set Segment' samples to a {type(samples)}. Must be a sequence of numbers.") + self.__samples = samples + # =================================== # Built-ins (Joining Segments) @@ -86,7 +103,9 @@ def append(self, samples: ndarray | Sequence[float]): samples: ndarray The samples to append. """ - self.__samples = np.append(self.__samples, samples) + if not self.__samples_valid(samples): + raise TypeError(f"Trying to append to Segment a {type(samples)}. Must be a sequence of numbers.") + self.__set_samples(np.append(self.__samples, samples)) @classmethod def concatenate(cls, *other: 'Segment') -> 'Segment': @@ -94,6 +113,10 @@ def concatenate(cls, *other: 'Segment') -> 'Segment': Concatenates the Segments in the given order. """ # Get the samples + for o in other: + if not isinstance(o, Segment): + raise TypeError(f"Trying to concatenate a {type(o)}. Must be a Segment.") + all_samples = np.concatenate([segment.samples for segment in other]) return Segment(all_samples) @@ -105,64 +128,66 @@ def _check_length_compatibility(cls, first: 'Segment', second: 'Segment'): if len(first) != len(second): raise DifferentLengthsError(len(first), len(second)) - @classmethod - def _binary_operation(cls, operation: Callable, first: 'Segment', second: 'Segment') -> 'Segment': - Segment._check_length_compatibility(first, second) - return Segment(operation(first, second)) - - @classmethod - def _unary_operation(cls, segment: 'Segment', operation: Callable) -> 'Segment': - return Segment(operation(segment)) + def __binary_arithmetics(self, other, operation: Callable, inplace=False): + if inplace: + if type(other) is Segment: + Segment._check_length_compatibility(self, other) + self.__set_samples(operation(self.samples, other.samples)) + elif type(other) in (float, int): + self.__set_samples(operation(self.samples, other)) + else: + raise TypeError(f"Arithmetic operation between Segment and {type(other)} not allowed. " + f"Second operator should be a number or another Segment.") + return self + else: + if type(other) is Segment: + Segment._check_length_compatibility(self, other) + return Segment(operation(self.samples, other.samples)) + elif type(other) in (float, int): + return Segment(operation(self.samples, other)) + else: + raise TypeError(f"Arithmetic operation between Segment and {type(other)} not allowed. " + f"Second operator should be a number or another Segment.") - @multimethod - def __add__(self, other: 'Segment'): - """Adds two Segments, sample by sample.""" - return self._binary_operation((lambda x, y: x + y), self, other) + def __add__(self, other): + """Adds two Segments, sample by sample, or translates the Segment by a constant.""" + return self.__binary_arithmetics(other, np.add, inplace=False) - @multimethod - def __add__(self, other: float): - """Translates the Segment by a constant.""" - return self._unary_operation(self, (lambda x: x + other)) + def __iadd__(self, other): + """Adds two Segments, sample by sample, or translates the Segment by a constant, in-place.""" + return self.__binary_arithmetics(other, np.add, inplace=True) - @multimethod def __sub__(self, other): """Subtracts two Segments, sample by sample.""" - return self._binary_operation((lambda x, y: x - y), self, other) + return self.__binary_arithmetics(other, np.subtract, inplace=False) - @multimethod - def __sub__(self, other: float): + def __isub__(self, other): """Translates the Segment by a constant.""" - return self._unary_operation(self, (lambda x: x - other)) + return self.__binary_arithmetics(other, np.subtract, inplace=True) - @multimethod - def __mul__(self: 'Segment', other: 'Segment'): + def __mul__(self, other): """Multiplies two Segments, sample by sample.""" - return self._binary_operation((lambda x, y: x * y), self, other) + return self.__binary_arithmetics(other, np.multiply, inplace=False) - @multimethod - def __mul__(self: 'Segment', other: Union[int, float]): + def __imul__(self, other): """Multiplies the Segment by a constant (contraction).""" - return Segment(self.samples * other) + return self.__binary_arithmetics(other, np.multiply, inplace=True) - @multimethod - def __truediv__(self, other: 'Segment'): + def __truediv__(self, other): """Divides two Segments, sample by sample.""" - return self._binary_operation((lambda x, y: x / y), self, other) + return self.__binary_arithmetics(other, np.true_divide, inplace=False) - @multimethod - def __truediv__(self, other: float): + def __itruediv__(self, other): """Divides the Segment by a constant (expansion).""" - return self._unary_operation(self, (lambda x: x / other)) + return self.__binary_arithmetics(other, np.true_divide, inplace=True) - @multimethod - def __floordiv__(self, other: 'Segment'): + def __floordiv__(self, other): """Divides two Segments, sample by sample.""" - return self._binary_operation((lambda x, y: x // y), self, other) + return self.__binary_arithmetics(other, np.floor_divide, inplace=False) - @multimethod - def __floordiv__(self, other: float): + def __ifloordiv__(self, other): """Divides the Segment by a constant (expansion).""" - return self._unary_operation(self, (lambda x: x // other)) + return self.__binary_arithmetics(other, np.floor_divide, inplace=True) # =================================== # Built-ins (Indexing) @@ -185,24 +210,50 @@ def __iter__(self) -> iter: return iter(self.__samples) # =================================== - # Amplitude methods + # Shortcut Statistics + + def max(self) -> float: + return self.extract(lambda x: np.max(x)) + + def argmax(self) -> int: + return self.extract(lambda x: np.argmax(x)) + + def min(self) -> float: + return self.extract(lambda x: np.min(x)) + + def argmin(self) -> int: + return self.extract(lambda x: np.argmin(x)) + + def mean(self) -> float: + return self.extract(lambda x: np.mean(x)) + + def median(self) -> float: + return self.extract(lambda x: np.median(x)) + + def std(self) -> float: + return self.extract(lambda x: np.std(x)) + + def var(self) -> float: + return self.extract(lambda x: np.var(x)) - def max(self): - return np.max(self.__samples) + def abs(self) -> 'Segment': + return self.extract(lambda x: np.abs(x)) - def min(self): - return np.min(self.__samples) + def diff(self) -> 'Segment': + return self.extract(lambda x: np.diff(x)) # =================================== # Binary Logic @multimethod def __eq__(self, other: 'Segment') -> bool: - return all(self.__samples == other.samples) + if len(self) != len(other): + return False + return np.equal(self.__samples, other.samples).all() @multimethod def __eq__(self, other: Union[int, float]) -> bool: - return all(self.__samples == other) + return np.equal(self.__samples, other).all() @multimethod def __ne__(self, other: 'Segment') -> bool: @@ -221,12 +272,12 @@ def apply(self, operation: Callable, inplace: bool = True, **kwargs): """ processed_samples = operation(self.samples, **kwargs) if inplace: - self.__samples = processed_samples - return + self.__set_samples(processed_samples) + return self else: return Segment(processed_samples) - def apply_and_return(self, operation: Callable, **kwargs) -> Any: + def extract(self, operation: Callable, **kwargs) -> Any: """ Applies a procedure to its samples and returns the output. """ diff --git a/src/ltbio/biosignals/_Segment.pyi b/src/ltbio/biosignals/_Segment.pyi index c9560f18..c54bc949 100644 --- a/src/ltbio/biosignals/_Segment.pyi +++ b/src/ltbio/biosignals/_Segment.pyi @@ -16,7 +16,7 @@ # =================================== from datetime import datetime -from typing import Sequence, Union +from typing import Sequence, Union, Callable, Any from multimethod import multimethod from numpy import ndarray @@ -39,43 +39,34 @@ class Segment(): # BUILT-INS (Joining Segments) def append(self, samples: ndarray | Sequence[float]) -> None: ... + @classmethod def concatenate(cls, *other: 'Segment') -> 'Segment': ... # BUILT-INS (Arithmetic) - @multimethod - def __add__(self, other: 'Segment') -> 'Segment': ... + def __add__(self, other: 'Segment' | float | int) -> 'Segment': ... - @multimethod - def __add__(self, other: float) -> 'Segment': ... + def __iadd__(self, other: 'Segment' | float | int) -> 'Segment': ... - @multimethod - def __sub__(self, other) -> 'Segment': ... + def __sub__(self, other: 'Segment' | float | int) -> 'Segment': ... - @multimethod - def __sub__(self, other: float) -> 'Segment': ... + def __isub__(self, other: 'Segment' | float | int) -> 'Segment': ... - @multimethod - def __mul__(self: 'Segment', other: 'Segment') -> 'Segment': ... + def __mul__(self, other: 'Segment' | float | int) -> 'Segment': ... - @multimethod - def __mul__(self: 'Segment', other: Union[int, float]) -> 'Segment': ... + def __imul__(self, other: 'Segment' | float | int) -> 'Segment': ... - @multimethod - def __truediv__(self, other: 'Segment') -> 'Segment': ... + def __truediv__(self, other: 'Segment' | float | int) -> 'Segment': ... - @multimethod - def __truediv__(self, other: float) -> 'Segment': ... - - @multimethod - def __floordiv__(self, other: 'Segment') -> 'Segment': ... + def __itruediv__(self, other: 'Segment' | float | int) -> 'Segment': ... - @multimethod - def __floordiv__(self, other: float) -> 'Segment': ... + def __floordiv__(self, other: 'Segment' | float | int) -> 'Segment': ... + def __ifloordiv__(self, other: 'Segment' | float | int) -> 'Segment': ... # BUILT-INS (Indexing) def __getitem__(self, index: int | slice | tuple) -> float | Segment: ... + def __iter__(self) -> iter: ... # BUILT-INS (Binary Logic) @@ -91,11 +82,16 @@ class Segment(): @multimethod def __ne__(self, other: Union[int, float]) -> bool: ... + # PROCESSING + def apply(self, operation: Callable, inplace: bool = True, **kwargs): ... + + def extract(self, operation: Callable, **kwargs) -> Any: ... + # SHORTCUT STATISTICS def max(self) -> float: ... - def argmax(self) -> tuple[datetime]: ... + def argmax(self) -> int: ... def min(self) -> float: ... - def argmin(self) -> tuple[datetime]: ... + def argmin(self) -> int: ... def mean(self) -> float: ... def median(self) -> float: ... def std(self) -> float: ... From 50dc34382c02534f4cfc08fe3a835905a1f600ae Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:19:41 +0200 Subject: [PATCH 40/47] Fix Timeseries + add default constructor --- src/ltbio/biosignals/_Timeseries.py | 97 ++++++++++++++++++++-------- src/ltbio/biosignals/_Timeseries.pyi | 1 - 2 files changed, 71 insertions(+), 27 deletions(-) diff --git a/src/ltbio/biosignals/_Timeseries.py b/src/ltbio/biosignals/_Timeseries.py index 62b9ca8a..0191cd18 100644 --- a/src/ltbio/biosignals/_Timeseries.py +++ b/src/ltbio/biosignals/_Timeseries.py @@ -1,5 +1,5 @@ # -*- encoding: utf-8 -*- - +from collections import OrderedDict # =================================== # IT - LongTermBiosignals @@ -19,7 +19,7 @@ from math import ceil from os.path import join from tempfile import mkstemp -from typing import List, Iterable, Collection, Dict, Tuple, Callable, Sequence +from typing import List, Iterable, Collection, Dict, Tuple, Callable, Sequence, Union import matplotlib.pyplot as plt import numpy as np @@ -35,11 +35,11 @@ from ._Timeline import Timeline from .units import Unit, Frequency from .._core.exceptions import DifferentSamplingFrequenciesError, DifferentUnitsError, TimeseriesOverlappingError, \ - DifferentDomainsError + DifferentDomainsError, EmptyTimeseriesError from .._core.operations import Operator, Operation -#from ltbio.processing.filters.Filter import Filter +# from ltbio.processing.filters.Filter import Filter class Timeseries(): """ @@ -146,6 +146,36 @@ class Timeseries(): """ # INITIALIZERS + @multimethod + def __init__(self, segments_by_time=None, sampling_frequency=None, unit=None, name=None): + """ + Type-checking and validation of the parameters. + """ + # Segments + if segments_by_time is None: + raise EmptyTimeseriesError() + else: + if not isinstance(segments_by_time, dict): + raise ValueError(f"Invalid segments: {segments_by_time}") + if len(segments_by_time) == 0: + raise EmptyTimeseriesError() + for start, segment in segments_by_time.items(): + if not isinstance(start, datetime): + raise ValueError(f"Invalid start datetime: {start}") + if not isinstance(segment, Segment): + raise ValueError(f"Invalid Segment: {segment}") + # Sampling frequency + if sampling_frequency is None: + raise ValueError("Sampling frequency is required.") + elif not isinstance(sampling_frequency, (float, int)): + raise ValueError(f"Invalid sampling frequency: {sampling_frequency}") + # Unit + if unit is not None and not isinstance(unit, Unit): + raise ValueError(f"Invalid unit: {unit}") + # Name + if name is not None and not isinstance(name, str): + raise ValueError(f"Invalid name: {name}") + @multimethod def __init__(self, segments_by_time: dict[datetime, ndarray | Sequence[float] | Segment], sampling_frequency: float, unit: Unit = None, name: str = None): @@ -177,14 +207,22 @@ def __init__(self, segments_by_time: dict[datetime, ndarray | Sequence[float] | self.__name = name # Segments - self.__segments = [Segment(samples, start) for start, samples in segments_by_time.items()] - self.__segments = sorted(self.__segments, key = lambda segment: segment.start) # Sort by start datetime + if len(segments_by_time) == 0: + raise EmptyTimeseriesError() + self.__segments = OrderedDict() + for start, segment in segments_by_time.items(): + if not isinstance(start, datetime): raise TypeError() + if not isinstance(segment, Segment): + if not isinstance(segment, (ndarray, Sequence[float])): + raise TypeError() + segment = Segment(segment) + self.__segments[start] = segment # =================================== # Properties (Getters) @property def segments(self) -> tuple[Segment]: - return tuple(self.__segments) + return self.__segments @property def __samples(self) -> ndarray: @@ -210,7 +248,7 @@ def end(self) -> datetime: return self.__segments[-1].end # Is the final datetime of the last Segment def __segment_duration(self, segment: Segment) -> timedelta: - return timedelta(seconds = len(segment) / self.sampling_frequency) + return timedelta(seconds=len(segment) / self.sampling_frequency) def __segment_end(self, segment: Segment) -> datetime: return segment.start + self.__segment_duration(segment) @@ -296,7 +334,8 @@ def __getitem__(self, item: slice): end = to_datetime(end) if isinstance(end, str) else end # Get the samples return Timeseries(segments=self.__get_samples(start, end), sampling_frequency=self.sampling_frequency, - units=self.unit, name=self.name) + unit=self.unit, name=self.name) + @multimethod def __getitem__(self, item: DateTimeRange): return self[item.start_datetime:item.end_datetime] @@ -381,7 +420,7 @@ def append(self, other: Segment): @property def __sampling_period(self) -> timedelta: - return timedelta(seconds = 1 / self.sampling_frequency) + return timedelta(seconds=1 / self.sampling_frequency) @multimethod def append(self, other: ndarray | Sequence[float | int]): @@ -416,20 +455,20 @@ def overlap(cls, first: 'Timeseries', second: 'Timeseries') -> Timeline: # BUILT-INS (Arithmetic) @classmethod def _binary_operation(cls, operation: Callable, operator_string: str, - first: 'Timeseries', second: 'Timeseries') -> 'Timeseries': + first: 'Timeseries', second: 'Timeseries') -> 'Timeseries': # Check compatibility Timeseries._check_meta_compatibility(first, second) Timeseries._check_domain_compatibility(first, second) # Apply operation new_segments = [operation(x, y) for x, y in zip(first.segments, second.segments)] - return Timeseries(segments=new_segments, sampling_frequency=first.sampling_frequency, units=first.unit, - name=first.name + ' ' + operator_string + ' ' + second.name) + return Timeseries(segments=new_segments, sampling_frequency=first.sampling_frequency, unit=first.unit, + name=first.name + ' ' + operator_string + ' ' + second.name) @classmethod def _unary_operation(cls, timeseries: 'Timeseries', operation: Callable, operator_string: str) -> 'Timeseries': # Apply operation new_segments = [operation(x) for x in timeseries.segments] - return Timeseries(segments=new_segments, sampling_frequency=first.sampling_frequency, units=first.unit, + return Timeseries(segments=new_segments, sampling_frequency=first.sampling_frequency, unit=first.unit, name=timeseries.name + ' ' + operator_string) @multimethod @@ -526,14 +565,15 @@ def abs(self) -> 'Timeseries': Returns a new Timeseries with the absolute value of all samples. """ return Timeseries(segments=[seg.abs() for seg in self.__segments], sampling_frequency=self.__sampling_frequency, - units=self.__units, name=f'Absolute of {self.__name})') + unit=self.__units, name=f'Absolute of {self.__name})') def diff(self) -> 'Timeseries': """ Returns a new Timeseries with the difference between consecutive samples, i.e. the discrete derivative. """ - return Timeseries(segments=[seg.diff() for seg in self.__segments], sampling_frequency=self.__sampling_frequency, - units=self.__units, name=f'Derivative of {self.__name})') + return Timeseries(segments=[seg.diff() for seg in self.__segments], + sampling_frequency=self.__sampling_frequency, + unit=self.__units, name=f'Derivative of {self.__name})') # =================================== # INTERNAL USAGE - Convert indexes <-> timepoints && Get Samples @@ -560,8 +600,9 @@ def __get_samples(self, initial_datetime: datetime, final_datetime: datetime) -> res_segments.append(trimmed_segment) return res_segments else: - if not initial_datetime == segment.end: # skip what would be an empty set - trimmed_segment = segment[int((initial_datetime - segment.start).total_seconds() * self.sampling_frequency):] + if not initial_datetime == segment.end: # skip what would be an empty set + trimmed_segment = segment[int(( + initial_datetime - segment.start).total_seconds() * self.sampling_frequency):] res_segments.append(trimmed_segment) for j in range(i + 1, len(self.__segments)): # adding the remaining samples, until the last Segment is found @@ -588,14 +629,16 @@ def __check_boundaries(self, datetime_or_range: datetime | DateTimeRange) -> Non elif isinstance(datetime_or_range, DateTimeRange): for subdomain in self.domain: - if subdomain.is_intersection(datetime_or_range) and datetime_or_range.start_datetime != subdomain.end_datetime: + if subdomain.is_intersection( + datetime_or_range) and datetime_or_range.start_datetime != subdomain.end_datetime: intersects = True break if not intersects: raise IndexError( f"Interval given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") - def _indices_to_timepoints(self, indices: list[list[int]], by_segment=False) -> tuple[datetime] | tuple[list[datetime]]: + def _indices_to_timepoints(self, indices: list[list[int]], by_segment=False) -> tuple[datetime] | tuple[ + list[datetime]]: all_timepoints = [] for index, segment in zip(indices, self.__segments): timepoints = divide(index, self.__sampling_frequency) # Transform to timepoints @@ -634,7 +677,7 @@ def _plot_spectrum(self): plt.plot(x, y, alpha=0.6, linewidth=0.5, label='From {0} to {1}'.format(segment.start, segment.end)) - def _plot(self, label:str = None): + def _plot(self, label: str = None): xticks, xticks_labels = [], [] # to store the initial and final ticks of each Segment SPACE = int(self.__sampling_frequency) * 2 # the empty space between each Segment @@ -699,7 +742,8 @@ def _apply_operation_and_return(self, operation, iterate_along_segments_key: [st kwargs[iterate_along_segments_key] = item new_segment = segment._apply_operation_and_return(operation, **kwargs) res.append(new_segment) - elif isinstance(iterate_along_segments_key, list) and all(isinstance(x, str) for x in iterate_along_segments_key): + elif isinstance(iterate_along_segments_key, list) and all( + isinstance(x, str) for x in iterate_along_segments_key): items = [kwargs[it] for it in iterate_along_segments_key] for segment, item in zip(self, *items): for it in iterate_along_segments_key: @@ -730,7 +774,7 @@ def contiguous(self): single_segment = Segment.concatenate(self.__segments) return Timeseries(single_segment, self.__sampling_frequency, self.unit, "Contiguous " + self.name) - def reshape(self, time_intervals:tuple[DateTimeRange]): + def reshape(self, time_intervals: tuple[DateTimeRange]): assert len(self.__segments) == 1 samples = self.__segments[0] partitions = [] @@ -738,7 +782,7 @@ def reshape(self, time_intervals:tuple[DateTimeRange]): for x in time_intervals: n_samples_required = ceil(x.timedelta.total_seconds() * self.__sampling_frequency) if n_samples_required > len(samples): - samples = tile(samples, ceil(n_samples_required/len(samples))) # repeat + samples = tile(samples, ceil(n_samples_required / len(samples))) # repeat samples = samples[:n_samples_required] # cut where it is enough partitions.append(Timeseries.__Segment(samples, x.start_datetime, self.__sampling_frequency)) i = 0 @@ -777,7 +821,8 @@ def __getstate__(self): 6: segments_state (list) """ segments_state = [segment.__getstate__() for segment in self.__segments] - return (self.__SERIALVERSION, self.__name, self.__sampling_frequency, self._Units, self.__is_equally_segmented, self.__tags, + return (self.__SERIALVERSION, self.__name, self.__sampling_frequency, self._Units, self.__is_equally_segmented, + self.__tags, segments_state) def __setstate__(self, state): diff --git a/src/ltbio/biosignals/_Timeseries.pyi b/src/ltbio/biosignals/_Timeseries.pyi index 983c9dcf..75c4db41 100644 --- a/src/ltbio/biosignals/_Timeseries.pyi +++ b/src/ltbio/biosignals/_Timeseries.pyi @@ -20,7 +20,6 @@ from typing import Tuple, Sequence from datetimerange import DateTimeRange from multimethod import multimethod -from multipledispatch import dispatch from numpy import ndarray from pandas import DataFrame From 6cf64a3f5089ec277a6615f55bc980320c331965 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 23 Oct 2023 11:20:01 +0200 Subject: [PATCH 41/47] Add Timeseries initializers tests --- .../Timeseries/test_initializers.py | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 tests/biosignals/Timeseries/test_initializers.py diff --git a/tests/biosignals/Timeseries/test_initializers.py b/tests/biosignals/Timeseries/test_initializers.py new file mode 100644 index 00000000..e3840280 --- /dev/null +++ b/tests/biosignals/Timeseries/test_initializers.py @@ -0,0 +1,146 @@ +import unittest +from datetime import timedelta + +import numpy as np +from numpy import ndarray + +from ltbio._core.exceptions import EmptyTimeseriesError, OverlapingSegmentsError +from ltbio.biosignals import Timeseries +from resources.segments import get_segment +from resources.timeseries import start_a, start_b, start_c, sf_low, units_volt + + +class TimeseriesInitializersTestCase(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.seg1 = get_segment('small', 1) + cls.start1 = start_a + + cls.seg2 = get_segment('medium', 2) + cls.start2 = start_b + + cls.seg3 = get_segment('large', 3) + cls.start3 = start_c + + cls.sf = sf_low + cls.units = units_volt + + def test_initialize_discontiguous_timeseries(self): + timeseries = Timeseries({self.start1: self.seg1, self.start2: self.seg2, self.start3: self.seg3}, self.sf) + + # Assert type + self.assertIsInstance(timeseries, Timeseries) + + # Assert segments + segments = timeseries._Timeseries__segments + for (start, seg), (start_og, seg_og) in zip(segments.items(), ((self.start1, self.seg1), (self.start2, self.seg2), (self.start3, self.seg3))): + self.assertTrue(seg is seg_og) # same pointer + self.assertIsInstance(seg.samples, ndarray) # type + self.assertEqual(seg.samples.dtype, float) # dtype + self.assertTrue(all(seg.samples == seg_og.samples)) # content + self.assertEqual(start, start_og) # start timepoint + + # Assert sampling frequency + self.assertEqual(timeseries.sampling_frequency, self.sf) + + def test_initialize_contiguous_timeseries(self): + timeseries = Timeseries({self.start1: self.seg1}, self.sf) + + # Assert type + self.assertIsInstance(timeseries, Timeseries) + + # Assert segments + segments = timeseries._Timeseries__segments + self.assertEqual(len(segments), 1) + start, seg = list(segments.items())[0] + self.assertTrue(seg is self.seg1) # same pointer + self.assertIsInstance(seg.samples, ndarray) # type + self.assertEqual(seg.samples.dtype, float) # dtype + self.assertTrue(all(seg.samples == self.seg1.samples)) # content + self.assertEqual(start, self.start1) # start timepoint + + # Assert sampling frequency + self.assertEqual(timeseries.sampling_frequency, self.sf) + + def test_initialize_timeseries_with_sequence_samples(self): + for sequence in ([1, 2, 3], np.array([1, 2, 3]), (1, 2, 3)): + timeseries = Timeseries({self.start1: sequence}, self.sf) + + # Assert type + self.assertIsInstance(timeseries, Timeseries) + + # Assert segments + segments = timeseries._Timeseries__segments + self.assertEqual(len(segments), 1) + start, seg = list(segments.items())[0] + self.assertTrue(seg is self.seg1) # same pointer + self.assertIsInstance(seg.samples, ndarray) # type + self.assertEqual(seg.samples.dtype, float) # dtype + self.assertTrue(all(seg.samples == self.seg1.samples)) # content + self.assertEqual(start, self.start1) # start timepoint + + # Assert sampling frequency + self.assertEqual(timeseries.sampling_frequency, self.sf) + + def test_initialize_timeseries_with_units(self): + timeseries = Timeseries({self.start1: self.seg1, self.start2: self.seg2}, self.sf, self.units) + self.assertEqual(timeseries.unit, self.units) + + def test_initialize_timeseries_with_name(self): + timeseries = Timeseries({self.start1: self.seg1, self.start2: self.seg2}, self.sf, name="Test Timeseries") + self.assertEqual(timeseries.name, "Test Timeseries") + + def test_initialize_timeseries_with_no_segments_raises_error(self): + with self.assertRaises(EmptyTimeseriesError): + Timeseries({}, self.sf) + with self.assertRaises(EmptyTimeseriesError): + Timeseries({}) + with self.assertRaises(EmptyTimeseriesError): + Timeseries() + + def test_initialize_timeseries_with_no_sequence_samples_raises_error(self): + for seg in (1, 1.0, 1+1j, {}, set(), None): + with self.assertRaises(ValueError): + Timeseries({self.start1: seg}, self.sf) + + def test_initialize_timeseries_with_no_dates_raises_error(self): + for date in ('2023-01-01', 2023, 2023.0, 2023+1j, None): + with self.assertRaises(ValueError): + Timeseries({date: self.seg1}, self.sf) + + def test_initialize_timeseries_with_no_sampling_frequency_raises_error(self): + with self.assertRaises(ValueError): + Timeseries({self.start1: self.seg1}) + + def test_initialize_timeseries_with_not_number_sf_raises_error(self): + for sf in ("", "a", [], (), {}, set(), None): + with self.assertRaises(ValueError): + Timeseries({self.start1: self.seg1}, sf) + + def test_initialize_timeseries_with_not_Unit_unit_raises_error(self): + for unit in (1, 1.0, 1+1j, [], (), {}, set(), "volt"): + with self.assertRaises(ValueError): + Timeseries({self.start1: self.seg1}, self.sf, unit=unit) + + def test_initialize_timeseries_with_not_string_name_raises_error(self): + for name in (1, 1.0, 1+1j, [], (), {}, set()): + with self.assertRaises(ValueError): + Timeseries({self.start1: self.seg1}, self.sf, name=name) + + def test_initialize_timeseries_with_overlapping_segments_raises_error(self): + # Start at the same timepoint + with self.assertRaises(OverlapingSegmentsError): + Timeseries({self.start1: self.seg1, self.start1: self.seg2}, self.sf) + # Second one starts in the middle of the first one + with self.assertRaises(OverlapingSegmentsError): + Timeseries({self.start1: self.seg1, self.start1+timedelta(seconds=1): self.seg2}, self.sf) + # Second one starts exactly at the end of the first one + second_start = self.start1 + timedelta(seconds=len(self.seg1.samples) / self.sf / 2) + Timeseries({self.start1: self.seg1, second_start: self.seg2}, self.sf) # no error here + with self.assertRaises(OverlapingSegmentsError): + second_start -= timedelta(microseconds=1) # one microsecond (10e-6s) before the end of the first segment + Timeseries({self.start1: self.seg1, second_start: self.seg2}, self.sf) + + +if __name__ == '__main__': + unittest.main() From 4b42d12520b10618df87d1e34bcb978086dde35d Mon Sep 17 00:00:00 2001 From: saraiva Date: Sat, 28 Oct 2023 15:51:08 +0200 Subject: [PATCH 42/47] Fix get properties tests for Timeseries --- src/ltbio/_core/exceptions.py | 12 +- src/ltbio/biosignals/_Segment.py | 2 +- src/ltbio/biosignals/_Timeseries.py | 169 ++++++++++++------ src/ltbio/biosignals/_Timeseries.pyi | 3 +- .../Timeseries/test_initializers.py | 52 +++--- tests/resources/timeseries.py | 8 +- 6 files changed, 157 insertions(+), 89 deletions(-) diff --git a/src/ltbio/_core/exceptions.py b/src/ltbio/_core/exceptions.py index 81d38d5c..b3b69355 100644 --- a/src/ltbio/_core/exceptions.py +++ b/src/ltbio/_core/exceptions.py @@ -30,11 +30,7 @@ class EmptyTimeseriesError(TimeseriesError): def __init__(self): super().__init__(f"Trying to create a Timeseries with no samples.") -class OverlapingSegmentsError(TimeseriesError): - def __init__(self, first_start: datetime, first_end: datetime, second_start: datetime, second_end: datetime): - super().__init__(f"Trying to add two overlapping segments to a Timeseries. " - f"First Segment starts at {first_start} and ends at {first_end}. " - f"Second Segment starts at {second_start} and ends at {second_end}.") + class IncompatibleTimeseriesError(Exception): @@ -110,6 +106,12 @@ def __init__(self, what: str): super().__init__(f"There is an overlap between {what}") +class OverlappingSegmentsError(TimeseriesError, OverlappingError): + def __init__(self, first_start: datetime, first_end: datetime, second_start: datetime, second_end: datetime): + super().__init__(f"two segments to be added to the Timeseries. " + f"First Segment starts at {first_start} and ends at {first_end}. " + f"Second Segment starts at {second_start} and ends at {second_end}.") + class TimeseriesOverlappingError(OverlappingError): def __init__(self, first, second, *overlap: DateTimeRange): super().__init__(f"Timeseries {first} and Timeseries {second}" + f" on {overlap}." if overlap else ".") diff --git a/src/ltbio/biosignals/_Segment.py b/src/ltbio/biosignals/_Segment.py index b6a0eeef..93a37524 100644 --- a/src/ltbio/biosignals/_Segment.py +++ b/src/ltbio/biosignals/_Segment.py @@ -51,7 +51,7 @@ def __init__(self, samples: ndarray | Sequence[float]): """ # Save samples - self.__samples = np.array(samples, dtype=float) + self.__set_samples(np.array(samples, dtype=float)) # =================================== # BUILT-INS (Basics) diff --git a/src/ltbio/biosignals/_Timeseries.py b/src/ltbio/biosignals/_Timeseries.py index 0191cd18..cd3ce3b6 100644 --- a/src/ltbio/biosignals/_Timeseries.py +++ b/src/ltbio/biosignals/_Timeseries.py @@ -19,7 +19,7 @@ from math import ceil from os.path import join from tempfile import mkstemp -from typing import List, Iterable, Collection, Dict, Tuple, Callable, Sequence, Union +from typing import List, Iterable, Collection, Dict, Tuple, Callable, Sequence, Union, Any import matplotlib.pyplot as plt import numpy as np @@ -35,7 +35,7 @@ from ._Timeline import Timeline from .units import Unit, Frequency from .._core.exceptions import DifferentSamplingFrequenciesError, DifferentUnitsError, TimeseriesOverlappingError, \ - DifferentDomainsError, EmptyTimeseriesError + DifferentDomainsError, EmptyTimeseriesError, OverlappingSegmentsError from .._core.operations import Operator, Operation @@ -145,39 +145,96 @@ class Timeseries(): https://github.com/jomy-kk/IT-LongTermBiosignals/wiki/%5BClass%5D-Timeseries """ - # INITIALIZERS - @multimethod - def __init__(self, segments_by_time=None, sampling_frequency=None, unit=None, name=None): + def __check_valid_segment(self, segment): + if not isinstance(segment, Segment): + raise TypeError(f"{segment} is not a Segment.") + + def __check_valid_datetime(self, x): + if not isinstance(x, datetime): + raise TypeError(f"{x} is not a datetime.") + + def __add_segment(self, start: datetime, segment: Segment): + self.__check_valid_datetime(start) + self.__check_valid_segment(segment) + if not hasattr(self, "_Timeseries__segments"): + self.__segments = OrderedDict() + + # Check overlap + #if Timeline.overlap(self.domain, DateTimeRange(start, segment.end)): # TODO; in the future should be like this + candidate_interval = self.__get_segment_domain(start, segment) + for s, S in self.__segments.items(): + S_interval = self.__get_segment_domain(s, S) + if candidate_interval.is_intersection(S_interval) and candidate_interval.end_datetime != S_interval.start_datetime and candidate_interval.start_datetime != S_interval.end_datetime: + raise OverlappingSegmentsError(candidate_interval.start_datetime, candidate_interval.end_datetime, + S_interval.start_datetime, S_interval.end_datetime) + + # Add segment + self.__segments[start] = segment + + def __get_segment_end(self, start: datetime, segment: Segment) -> datetime: + return start + timedelta(seconds=len(segment) / self.sampling_frequency) + + def __get_segment_domain(self, start: datetime, segment: Segment) -> DateTimeRange: + return DateTimeRange(start, self.__get_segment_end(start, segment)) + + def __check_valid_segments(self, segments_by_time): """ - Type-checking and validation of the parameters. + Checks if: + - It's not None + - It's a dict + - It's not empty + - All keys are datetimes + - All values are Segments """ - # Segments if segments_by_time is None: raise EmptyTimeseriesError() - else: - if not isinstance(segments_by_time, dict): - raise ValueError(f"Invalid segments: {segments_by_time}") - if len(segments_by_time) == 0: - raise EmptyTimeseriesError() - for start, segment in segments_by_time.items(): - if not isinstance(start, datetime): - raise ValueError(f"Invalid start datetime: {start}") - if not isinstance(segment, Segment): - raise ValueError(f"Invalid Segment: {segment}") - # Sampling frequency + if not isinstance(segments_by_time, dict): + raise TypeError(f"Invalid segments: {segments_by_time}. Must be a dictionary.") + if len(segments_by_time) == 0: + raise EmptyTimeseriesError() + for start, segment in segments_by_time.items(): + self.__check_valid_datetime(start) + self.__check_valid_segment(segment) + + def __check_valid_sampling_frequency(self, sampling_frequency): if sampling_frequency is None: raise ValueError("Sampling frequency is required.") elif not isinstance(sampling_frequency, (float, int)): - raise ValueError(f"Invalid sampling frequency: {sampling_frequency}") - # Unit + raise TypeError(f"Invalid sampling frequency: {sampling_frequency}") + + def __check_valid_unit(self, unit): if unit is not None and not isinstance(unit, Unit): - raise ValueError(f"Invalid unit: {unit}") - # Name + raise TypeError(f"Invalid unit: {unit}") + + def __check_valid_name(self, name): if name is not None and not isinstance(name, str): - raise ValueError(f"Invalid name: {name}") + raise TypeError(f"Invalid name: {name}") + + def __set_sampling_frequency(self, sampling_frequency: float): + self.__check_valid_sampling_frequency(sampling_frequency) + self.__sampling_frequency = sampling_frequency if isinstance(sampling_frequency, Frequency) else Frequency(sampling_frequency) + + def __set_unit(self, unit: Unit): + self.__check_valid_unit(unit) + self.__unit = unit + + def __set_name(self, name: str): + self.__check_valid_name(name) + self.__name = name + # INITIALIZERS @multimethod - def __init__(self, segments_by_time: dict[datetime, ndarray | Sequence[float] | Segment], sampling_frequency: float, + def __init__(self, segments_by_time=None, sampling_frequency=None, unit=None, name=None): + """ + Type-checking and validation of the parameters, in case multimethod dispatching fails. + """ + self.__check_valid_segments(segments_by_time) + self.__check_valid_sampling_frequency(sampling_frequency) + self.__check_valid_unit(unit) + self.__check_valid_name(name) + + @multimethod + def __init__(self, segments_by_time: dict[datetime, Segment | ndarray | Sequence], sampling_frequency: float, unit: Unit = None, name: str = None): """ Give one or multiple instantiated Segments. @@ -201,28 +258,26 @@ def __init__(self, segments_by_time: dict[datetime, ndarray | Sequence[float] | name: str A symbolic name for the Timeseries. It is mentioned in plots, reports, error messages, etc. """ + # Metadata - self.__sampling_frequency = Frequency(sampling_frequency) - self.__unit = unit - self.__name = name + self.__set_sampling_frequency(sampling_frequency) + self.__set_unit(unit) + self.__set_name(name) + + # Sequences of floats -> Convert to Segments (optional) + if all([isinstance(seg, (Sequence, ndarray)) for seg in segments_by_time.values()]): + segments_by_time = {start: Segment(samples=seg) for start, seg in segments_by_time.items()} # Segments - if len(segments_by_time) == 0: - raise EmptyTimeseriesError() - self.__segments = OrderedDict() + self.__check_valid_segments(segments_by_time) for start, segment in segments_by_time.items(): - if not isinstance(start, datetime): raise TypeError() - if not isinstance(segment, Segment): - if not isinstance(segment, (ndarray, Sequence[float])): - raise TypeError() - segment = Segment(segment) - self.__segments[start] = segment + self.__add_segment(start, segment) # =================================== # Properties (Getters) @property def segments(self) -> tuple[Segment]: - return self.__segments + return tuple(self.__segments.values()) @property def __samples(self) -> ndarray: @@ -237,36 +292,40 @@ def sampling_frequency(self) -> float: """The frequency at which the samples were acquired, in Hz.""" return float(self.__sampling_frequency) + def __segment_duration(self, i: int) -> timedelta: + return timedelta(seconds=len(self.segments[i]) / self.sampling_frequency) + + def __segment_start(self, i: int) -> datetime: + return tuple(self.__segments.keys())[i] + + def __segment_end(self, i: int) -> datetime: + start = tuple(self.__segments.keys())[i] + return start + self.__segment_duration(i) + + @property + def duration(self) -> timedelta: + """The actual recorded time without interruptions.""" + return sum((self.__segment_duration(i) for i in range(self.n_segments)), timedelta()) + @property def start(self) -> datetime: """The date and time of the first sample.""" - return self.__segments[0].start # Is the initial datetime of the first Segment + return self.__segment_start(0) # Is the initial datetime of the first Segment @property def end(self) -> datetime: """The date and time of the last sample.""" - return self.__segments[-1].end # Is the final datetime of the last Segment - - def __segment_duration(self, segment: Segment) -> timedelta: - return timedelta(seconds=len(segment) / self.sampling_frequency) - - def __segment_end(self, segment: Segment) -> datetime: - return segment.start + self.__segment_duration(segment) - - @property - def duration(self) -> timedelta: - """The actual recorded time without interruptions.""" - return sum(self.__segment_duration(segment) for segment in self.__segments) + return self.__segment_end(-1) # Is the final datetime of the last Segment @property def domain(self) -> Timeline: - intervals = [DateTimeRange(segment.start, segment.end) for segment in self.__segments] - return Timeline(Timeline.Group(intervals=intervals), f"{self.name} Domain") + intervals = [DateTimeRange(self.__segment_start(i), self.__segment_end(i)) for i in range(self.n_segments)] + return Timeline(Timeline.Group(intervals=intervals), name=f"{self.name} Domain") @property def unit(self) -> Unit: """The physical unit at which the samples should be interpreted.""" - return self.__units + return self.__unit @property def name(self) -> str: @@ -285,7 +344,7 @@ def name(self, name: str) -> None: @property def is_contiguous(self) -> bool: """States if there are no interruptions in time.""" - return len(self.__segments) == 1 + return self.n_segments == 1 # =================================== # BUILT-INS (Basics) @@ -294,7 +353,7 @@ def __copy__(self) -> 'Timeseries': self.__units.__copy__(), self.__name.__copy__()) def __len__(self) -> int: - return sum([len(seg) for seg in self.__segments]) + return sum([len(seg) for seg in self.segments]) def __iter__(self) -> iter: for segment in self.__segments: diff --git a/src/ltbio/biosignals/_Timeseries.pyi b/src/ltbio/biosignals/_Timeseries.pyi index 75c4db41..f77e8b0c 100644 --- a/src/ltbio/biosignals/_Timeseries.pyi +++ b/src/ltbio/biosignals/_Timeseries.pyi @@ -33,13 +33,12 @@ class Timeseries(): # INITIALIZERS @multimethod - def __init__(self, segments_by_time: dict[datetime, ndarray[float] | Sequence[float] | Segment], sampling_frequency: float, + def __init__(self, segments_by_time: dict[datetime, Segment | ndarray | Sequence], sampling_frequency: float, units: Unit = None, name: str = None): ... # GETTERS @property def segments(self) -> tuple[Segment]: ... - @property def n_segments(self) -> int: ... @property diff --git a/tests/biosignals/Timeseries/test_initializers.py b/tests/biosignals/Timeseries/test_initializers.py index e3840280..49704c79 100644 --- a/tests/biosignals/Timeseries/test_initializers.py +++ b/tests/biosignals/Timeseries/test_initializers.py @@ -1,12 +1,12 @@ import unittest -from datetime import timedelta +from datetime import timedelta, datetime import numpy as np from numpy import ndarray -from ltbio._core.exceptions import EmptyTimeseriesError, OverlapingSegmentsError +from ltbio._core.exceptions import EmptyTimeseriesError, OverlappingSegmentsError from ltbio.biosignals import Timeseries -from resources.segments import get_segment +from resources.segments import get_segment, get_segment_length from resources.timeseries import start_a, start_b, start_c, sf_low, units_volt @@ -73,10 +73,9 @@ def test_initialize_timeseries_with_sequence_samples(self): segments = timeseries._Timeseries__segments self.assertEqual(len(segments), 1) start, seg = list(segments.items())[0] - self.assertTrue(seg is self.seg1) # same pointer self.assertIsInstance(seg.samples, ndarray) # type self.assertEqual(seg.samples.dtype, float) # dtype - self.assertTrue(all(seg.samples == self.seg1.samples)) # content + self.assertTrue(all(seg.samples == sequence)) # content self.assertEqual(start, self.start1) # start timepoint # Assert sampling frequency @@ -98,14 +97,14 @@ def test_initialize_timeseries_with_no_segments_raises_error(self): with self.assertRaises(EmptyTimeseriesError): Timeseries() - def test_initialize_timeseries_with_no_sequence_samples_raises_error(self): - for seg in (1, 1.0, 1+1j, {}, set(), None): - with self.assertRaises(ValueError): + def test_initialize_timeseries_with_not_sequence_samples_raises_error(self): + for seg in (1, 1.0, 1+1j, {}, set()): + with self.assertRaises(TypeError): Timeseries({self.start1: seg}, self.sf) - def test_initialize_timeseries_with_no_dates_raises_error(self): + def test_initialize_timeseries_with_not_dates_raises_error(self): for date in ('2023-01-01', 2023, 2023.0, 2023+1j, None): - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): Timeseries({date: self.seg1}, self.sf) def test_initialize_timeseries_with_no_sampling_frequency_raises_error(self): @@ -113,33 +112,42 @@ def test_initialize_timeseries_with_no_sampling_frequency_raises_error(self): Timeseries({self.start1: self.seg1}) def test_initialize_timeseries_with_not_number_sf_raises_error(self): - for sf in ("", "a", [], (), {}, set(), None): - with self.assertRaises(ValueError): + for sf in ("", "a", [], (), {}, set()): + with self.assertRaises(TypeError): Timeseries({self.start1: self.seg1}, sf) def test_initialize_timeseries_with_not_Unit_unit_raises_error(self): for unit in (1, 1.0, 1+1j, [], (), {}, set(), "volt"): - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): Timeseries({self.start1: self.seg1}, self.sf, unit=unit) def test_initialize_timeseries_with_not_string_name_raises_error(self): for name in (1, 1.0, 1+1j, [], (), {}, set()): - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): Timeseries({self.start1: self.seg1}, self.sf, name=name) def test_initialize_timeseries_with_overlapping_segments_raises_error(self): - # Start at the same timepoint - with self.assertRaises(OverlapingSegmentsError): - Timeseries({self.start1: self.seg1, self.start1: self.seg2}, self.sf) + # Second one ends exactly at the start of the first one + first_start = datetime(2023, 1, 1, 12, 0, 0) # 12:00:00.0 + second_start = first_start - timedelta(seconds=get_segment_length('medium') / self.sf) # 11:59:54.0 + # Seg1: [12:00:00.0, 12:00:03.0[ || Seg2: [11:59:54.0, 12:00:00.0[ + Timeseries({first_start: self.seg1, second_start: self.seg2}, self.sf) # no error here + with self.assertRaises(OverlappingSegmentsError): + second_start += timedelta(microseconds=1) # one microsecond (10e-6s) after the start of the second segment + # Seg1: [12:00:00.0, 12:00:03.0[ || Seg2: [11:59:54.000001, 12:00:00.000001[ + Timeseries({first_start: self.seg1, second_start: self.seg2}, self.sf) + # Second one starts in the middle of the first one - with self.assertRaises(OverlapingSegmentsError): + with self.assertRaises(OverlappingSegmentsError): Timeseries({self.start1: self.seg1, self.start1+timedelta(seconds=1): self.seg2}, self.sf) + # Second one starts exactly at the end of the first one - second_start = self.start1 + timedelta(seconds=len(self.seg1.samples) / self.sf / 2) - Timeseries({self.start1: self.seg1, second_start: self.seg2}, self.sf) # no error here - with self.assertRaises(OverlapingSegmentsError): + first_start = datetime(2023, 1, 1, 12, 0, 0) # 12:00:00.0 + second_start = first_start + timedelta(seconds=get_segment_length('small') / self.sf) # 12:00:03.0 + Timeseries({first_start: self.seg1, second_start: self.seg2}, self.sf) # no error here + with self.assertRaises(OverlappingSegmentsError): second_start -= timedelta(microseconds=1) # one microsecond (10e-6s) before the end of the first segment - Timeseries({self.start1: self.seg1, second_start: self.seg2}, self.sf) + Timeseries({first_start: self.seg1, second_start: self.seg2}, self.sf) if __name__ == '__main__': diff --git a/tests/resources/timeseries.py b/tests/resources/timeseries.py index 68c08e4a..7dd5f6ad 100644 --- a/tests/resources/timeseries.py +++ b/tests/resources/timeseries.py @@ -50,11 +50,11 @@ def get_timeseries_end(length: str, discontiguous: bool, sf: str) -> datetime: return start_c + timedelta(seconds=get_segment_length('large') / sf) def get_timeseries_duration(length: str, discontiguous: bool, sf: str) -> timedelta: + if sf == 'low': + sf = sf_low + if sf == 'high': + sf = sf_high if not discontiguous: - if sf == 'low': - sf = sf_low - if sf == 'high': - sf = sf_high return timedelta(seconds=get_segment_length(length)/sf) else: if length == 'medium': From 96a089eff12d69e86e003be3ff1826185690c03c Mon Sep 17 00:00:00 2001 From: saraiva Date: Sat, 28 Oct 2023 15:51:19 +0200 Subject: [PATCH 43/47] Add get properties tests for Timeseries --- .../Timeseries/test_get_properties.py | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 tests/biosignals/Timeseries/test_get_properties.py diff --git a/tests/biosignals/Timeseries/test_get_properties.py b/tests/biosignals/Timeseries/test_get_properties.py new file mode 100644 index 00000000..c46be071 --- /dev/null +++ b/tests/biosignals/Timeseries/test_get_properties.py @@ -0,0 +1,155 @@ +import unittest +from datetime import datetime, timedelta + +import numpy as np + +from ltbio.biosignals import Segment, Timeline, Timeseries +from ltbio.biosignals.units import Unit +from resources.timeseries import get_timeseries, get_timeseries_end, get_timeseries_duration, get_timeseries_name, \ + units_volt, units_siemens +from resources.timeseries import start_a, start_b +from resources.timeseries import sf_low, sf_high +from resources.segments import medium_samples_1, get_segment_length # for contiguous Timeseries +from resources.segments import small_samples_2, medium_samples_2 # for discontiguous Timeseries + + +class TimeseriesGetPropertiesTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.contiguous_ts = get_timeseries('medium', 1, discontiguous=False, sf='low', units='volt') + cls.discontiguous_ts = get_timeseries('medium', 2, discontiguous=True, sf='high', units='siemens') + + def test_get_segments(self): + # Contiguous + x = self.contiguous_ts.segments + self.assertTrue(isinstance(x, tuple)) + self.assertTrue(len(x) == 1) + self.assertTrue(isinstance(x[0], Segment)) + self.assertTrue(np.allclose(x[0].samples, medium_samples_1)) + + # Discontiguous + x = self.discontiguous_ts.segments + self.assertTrue(isinstance(x, tuple)) + self.assertTrue(len(x) == 2) + self.assertTrue(isinstance(x[0], Segment)) + self.assertTrue(np.allclose(x[0].samples, small_samples_2)) + self.assertTrue(isinstance(x[1], Segment)) + self.assertTrue(np.allclose(x[1].samples, medium_samples_2)) + + def test_get_n_segments(self): + # Contiguous + x = self.contiguous_ts.n_segments + self.assertTrue(isinstance(x, int)) + self.assertEqual(x, 1) + + # Discontiguous + x = self.discontiguous_ts.n_segments + self.assertTrue(isinstance(x, int)) + self.assertEqual(x, 2) + + def test_get_sampling_frequency(self): + # Contiguous + x = self.contiguous_ts.sampling_frequency + self.assertTrue(isinstance(x, float)) + self.assertEqual(x, sf_low) + + # Discontiguous + x = self.discontiguous_ts.sampling_frequency + self.assertTrue(isinstance(x, float)) + self.assertEqual(x, sf_high) + + def test_get_start(self): + # Contiguous + x = self.contiguous_ts.start + self.assertTrue(isinstance(x, datetime)) + self.assertEqual(x, start_a) + + # Discontiguous + x = self.discontiguous_ts.start + self.assertTrue(isinstance(x, datetime)) + self.assertEqual(x, start_a) + + def test_get_end(self): + # Contiguous + x = self.contiguous_ts.end + self.assertTrue(isinstance(x, datetime)) + self.assertEqual(x, get_timeseries_end('medium', False, 'low')) + + # Discontiguous + x = self.discontiguous_ts.end + self.assertTrue(isinstance(x, datetime)) + self.assertEqual(x, get_timeseries_end('medium', True, 'high')) + + def test_get_duration(self): + # Contiguous + x = self.contiguous_ts.duration + self.assertTrue(isinstance(x, timedelta)) + self.assertEqual(x, get_timeseries_duration('medium', False, 'low')) + + # Discontiguous + x = self.discontiguous_ts.duration + self.assertTrue(isinstance(x, timedelta)) + self.assertEqual(x, get_timeseries_duration('medium', True, 'high')) + + def test_get_domain(self): + # Contiguous + x = self.contiguous_ts.domain + self.assertTrue(isinstance(x, Timeline)) + intervals = x.single_group.intervals + self.assertTrue(len(intervals) == 1) + self.assertEqual(intervals[0].start_datetime, start_a) + self.assertEqual(intervals[0].end_datetime, get_timeseries_end('medium', False, 'low')) + + # Discontiguous + x = self.discontiguous_ts.domain + self.assertTrue(isinstance(x, Timeline)) + intervals = x.single_group.intervals + self.assertTrue(len(intervals) == 2) + self.assertEqual(intervals[0].start_datetime, start_a) + self.assertEqual(intervals[0].end_datetime, start_a + timedelta(seconds=get_segment_length('small')/sf_high)) + self.assertEqual(intervals[1].start_datetime, start_b) + self.assertEqual(intervals[1].end_datetime, get_timeseries_end('medium', True, 'high')) + + def test_get_unit(self): + # Contiguous + x = self.contiguous_ts.unit + self.assertTrue(isinstance(x, Unit)) + self.assertEqual(x, units_volt) + + # Discontiguous + x = self.discontiguous_ts.unit + self.assertTrue(isinstance(x, Unit)) + self.assertEqual(x, units_siemens) + + def test_get_unit_when_not_set(self): + ts = Timeseries({start_a: medium_samples_1}, sf_low) + x = ts.unit + self.assertEqual(x, None) + + def test_get_name(self): + # Contiguous + x = self.contiguous_ts.name + self.assertTrue(isinstance(x, str)) + self.assertEqual(x, get_timeseries_name(1)) + + # Discontiguous + x = self.discontiguous_ts.name + self.assertTrue(isinstance(x, str)) + self.assertEqual(x, get_timeseries_name(2)) + + def test_get_name_when_not_set(self): + ts = Timeseries({start_a: medium_samples_1}, sf_low) + x = ts.name + self.assertEqual(x, None) + + def test_is_contiguous(self): + # Contiguous + x = self.contiguous_ts.is_contiguous + self.assertTrue(isinstance(x, bool)) + self.assertEqual(x, True) + + # Discontiguous + x = self.discontiguous_ts.is_contiguous + self.assertTrue(isinstance(x, bool)) + self.assertEqual(x, False) From dda516867fc9ce2b9d672a0ab586bba3991a3b94 Mon Sep 17 00:00:00 2001 From: saraiva Date: Sat, 28 Oct 2023 15:58:14 +0200 Subject: [PATCH 44/47] Add tests for set properties in Timeseries --- .../Timeseries/test_set_properties.py | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/biosignals/Timeseries/test_set_properties.py diff --git a/tests/biosignals/Timeseries/test_set_properties.py b/tests/biosignals/Timeseries/test_set_properties.py new file mode 100644 index 00000000..e3ccabbf --- /dev/null +++ b/tests/biosignals/Timeseries/test_set_properties.py @@ -0,0 +1,60 @@ +import unittest + +from numpy import allclose + +from ltbio.biosignals.units import Siemens +from resources.segments import small_samples_1, small_samples_2 +from resources.timeseries import get_timeseries, get_timeseries_name + + +class TimeseriesSegmentSetPropertiesTestCase(unittest.TestCase): + + @classmethod + def setUp(cls): + cls.contiguous_ts = get_timeseries('medium', 1, discontiguous=False, sf='low', units='volt') + cls.discontiguous_ts = get_timeseries('medium', 2, discontiguous=True, sf='high', units='siemens') + + def test_set_name(self): + new_name = 'New Name' + + # Contiguous + self.assertEqual(self.contiguous_ts.name, get_timeseries_name(1)) # Old value + self.contiguous_ts.name = new_name + self.assertEqual(self.contiguous_ts.name, new_name) # New value + + # Discontiguous + self.assertEqual(self.discontiguous_ts.name, get_timeseries_name(2)) # Old value + self.discontiguous_ts.name = new_name + self.assertEqual(self.discontiguous_ts.name, new_name) # New value + + def test_set_segments_raises_error(self): + with self.assertRaises(AttributeError): + self.contiguous_ts.segments = (small_samples_1,) + + def test_set_n_segments_raises_error(self): + with self.assertRaises(AttributeError): + self.contiguous_ts.n_segments = 2 + + def test_set_sampling_frequency_raises_error(self): + with self.assertRaises(AttributeError): + self.contiguous_ts.sampling_frequency = 1000 + + def test_set_start_raises_error(self): + with self.assertRaises(AttributeError): + self.contiguous_ts.start = '2019-01-01 00:00:00' + + def test_set_units_raises_error(self): + with self.assertRaises(AttributeError): + self.contiguous_ts.unit = Siemens() + + def test_set_duration_raises_error(self): + with self.assertRaises(AttributeError): + self.contiguous_ts.duration = 10 + + def test_set_end_raises_error(self): + with self.assertRaises(AttributeError): + self.contiguous_ts.end = '2019-01-01 00:00:00' + + +if __name__ == '__main__': + unittest.main() From 1cfad16d958487c44f2ca97c791d5f9b68ec5628 Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 30 Oct 2023 02:40:28 +0100 Subject: [PATCH 45/47] Fix tests for set properties in Timeseries --- src/ltbio/biosignals/_Timeseries.py | 50 +++++++++++++++---- src/ltbio/biosignals/_Timeseries.pyi | 6 +-- .../Timeseries/test_set_properties.py | 4 +- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/ltbio/biosignals/_Timeseries.py b/src/ltbio/biosignals/_Timeseries.py index cd3ce3b6..518e4c5e 100644 --- a/src/ltbio/biosignals/_Timeseries.py +++ b/src/ltbio/biosignals/_Timeseries.py @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +import operator from collections import OrderedDict # =================================== @@ -302,6 +303,9 @@ def __segment_end(self, i: int) -> datetime: start = tuple(self.__segments.keys())[i] return start + self.__segment_duration(i) + def __set_segment(self, i: int, segment: Segment): + self.__segments[i] = segment + @property def duration(self) -> timedelta: """The actual recorded time without interruptions.""" @@ -349,15 +353,17 @@ def is_contiguous(self) -> bool: # =================================== # BUILT-INS (Basics) def __copy__(self) -> 'Timeseries': - return Timeseries([seg.__copy__() for seg in self.__segments], self.sampling_frequency.__copy__(), - self.__units.__copy__(), self.__name.__copy__()) + return Timeseries({start: seg.__copy__() for start, seg in self.__segments.items()}, + self.sampling_frequency, self.__unit, self.__name) + + def __str__(self) -> str: + return f"Timeseries ({len(self)})" def __len__(self) -> int: return sum([len(seg) for seg in self.segments]) def __iter__(self) -> iter: - for segment in self.__segments: - yield from segment + return iter(self.__segments.items()) # Use default iterator @multimethod def __contains__(self, item: datetime | DateTimeRange) -> bool: @@ -530,6 +536,33 @@ def _unary_operation(cls, timeseries: 'Timeseries', operation: Callable, operato return Timeseries(segments=new_segments, sampling_frequency=first.sampling_frequency, unit=first.unit, name=timeseries.name + ' ' + operator_string) + def __binary_arithmetics(self, other, operation: Callable, inplace=False): + if inplace: + if type(other) is Timeseries: + Timeseries._check_meta_compatibility(self, other) + Timeseries._check_domain_compatibility(self, other) + for i in range(self.n_segments): + self.__set_segment(i, operation(self.segments[i], other.segments[i])) + elif type(other) in (float, int): + for i in range(self.n_segments): + self.__set_segment(i, operation(self.segments[i], other)) + else: + raise TypeError(f"Arithmetic operation between Timeseries and {type(other)} not allowed. " + f"Second operator should be a number or another Timeseries.") + return self + else: + if type(other) is Timeseries: + Timeseries._check_meta_compatibility(self, other) + Timeseries._check_domain_compatibility(self, other) + return Timeseries({start: operation(self.segments[i], other.segments[i]) for i, start in enumerate(self.__segments.keys())}, + self.sampling_frequency, self.unit, self.name) + elif type(other) in (float, int): + return Timeseries({start: operation(self.segments[i], other) for i, start in enumerate(self.__segments.keys())}, + self.sampling_frequency, self.unit, self.name) + else: + raise TypeError(f"Arithmetic operation between Timeseries and {type(other)} not allowed. " + f"Second operator should be a number or another Timeseries.") + @multimethod def __add__(self, other: 'Timeseries') -> 'Timeseries': return Timeseries._binary_operation(lambda x, y: x + y, '+', self, other) @@ -546,13 +579,8 @@ def __sub__(self, other: 'Timeseries') -> 'Timeseries': def __sub__(self, other: float) -> 'Timeseries': return Timeseries._unary_operation(self, lambda x: x - other, f'- {other}') - @multimethod - def __mul__(self, other: 'Timeseries') -> 'Timeseries': - return Timeseries._binary_operation(lambda x, y: x * y, '*', self, other) - - @multimethod - def __mul__(self, other: float) -> 'Timeseries': - return Timeseries._unary_operation(self, lambda x: x * other, f'* {other}') + def __mul__(self, other) -> 'Timeseries': + return self.__binary_arithmetics(other, operator.mul, inplace=False) @multimethod def __truediv__(self, other: 'Timeseries') -> 'Timeseries': diff --git a/src/ltbio/biosignals/_Timeseries.pyi b/src/ltbio/biosignals/_Timeseries.pyi index f77e8b0c..e7cb6872 100644 --- a/src/ltbio/biosignals/_Timeseries.pyi +++ b/src/ltbio/biosignals/_Timeseries.pyi @@ -112,11 +112,7 @@ class Timeseries(): @multimethod def __sub__(self, other: float) -> 'Timeseries': ... - @multimethod - def __mul__(self, other: 'Timeseries') -> 'Timeseries': ... - - @multimethod - def __mul__(self, other: float) -> 'Timeseries': ... + def __mul__(self, other: 'Timeseries' | int | float) -> 'Timeseries': ... @multimethod def __truediv__(self, other: 'Timeseries') -> 'Timeseries': ... diff --git a/tests/biosignals/Timeseries/test_set_properties.py b/tests/biosignals/Timeseries/test_set_properties.py index e3ccabbf..3302b33a 100644 --- a/tests/biosignals/Timeseries/test_set_properties.py +++ b/tests/biosignals/Timeseries/test_set_properties.py @@ -1,9 +1,7 @@ import unittest -from numpy import allclose - from ltbio.biosignals.units import Siemens -from resources.segments import small_samples_1, small_samples_2 +from resources.segments import small_samples_1 from resources.timeseries import get_timeseries, get_timeseries_name From be64716cc31fb7477861e76662754ba13d651ccc Mon Sep 17 00:00:00 2001 From: saraiva Date: Mon, 30 Oct 2023 02:40:42 +0100 Subject: [PATCH 46/47] Add tests for Timeseries built-ins --- tests/biosignals/Timeseries/test_builtins.py | 81 ++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 tests/biosignals/Timeseries/test_builtins.py diff --git a/tests/biosignals/Timeseries/test_builtins.py b/tests/biosignals/Timeseries/test_builtins.py new file mode 100644 index 00000000..66127dfb --- /dev/null +++ b/tests/biosignals/Timeseries/test_builtins.py @@ -0,0 +1,81 @@ +import unittest +from copy import copy +from datetime import datetime + +import numpy as np + +from ltbio.biosignals import Segment +from resources.timeseries import get_timeseries, start_a, start_b +from tests.resources.segments import get_segment_length, small_samples_2, medium_samples_1, medium_samples_2 + + +class TimeseriesBuiltinsTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.contiguous_ts = get_timeseries('medium', 1, discontiguous=False, sf='low', units='volt') + cls.discontiguous_ts = get_timeseries('medium', 2, discontiguous=True, sf='high', units='siemens') + + def test_len(self): + self.assertEqual(len(self.contiguous_ts), get_segment_length('medium')) + self.assertEqual(len(self.discontiguous_ts), get_segment_length('small') + get_segment_length('medium')) + + def test_iter(self): + # Contiguous + x = iter(self.contiguous_ts) + key, value = next(x) + self.assertTrue(isinstance(key, datetime)) + self.assertEqual(key, start_a) + self.assertTrue(isinstance(value, Segment)) + self.assertTrue(np.allclose(value.samples, medium_samples_1)) + + # Discontiguous + x = iter(self.discontiguous_ts) + key, value = next(x) + self.assertTrue(isinstance(key, datetime)) + self.assertEqual(key, start_a) + self.assertTrue(isinstance(value, Segment)) + self.assertTrue(np.allclose(value.samples, small_samples_2)) + key, value = next(x) + self.assertTrue(isinstance(key, datetime)) + self.assertEqual(key, start_b) + self.assertTrue(isinstance(value, Segment)) + self.assertTrue(np.allclose(value.samples, medium_samples_2)) + + def test_str(self): # Assert the length is in the string representation + self.assertIn(str(get_segment_length('medium')), str(self.contiguous_ts)) + self.assertIn(str(get_segment_length('small')+get_segment_length('medium')), str(self.discontiguous_ts)) + + def test_repr(self): + self.test_str() + + """ + def test_hash(self): + pass + """ + + def test_copy(self): + # Contiguous + copied = copy(self.contiguous_ts) + self.assertFalse(self.contiguous_ts is copied) # Assert objects are different + self.assertFalse(self.contiguous_ts.segments[0].samples is copied.segments[0].samples) # Assert pointers are different + self.assertTrue(np.allclose(self.contiguous_ts.segments[0].samples, copied.segments[0].samples)) # Assert content is the same + # Assert what happens to the copied does not affect the original + copied_modified = copied * 0 + self.assertFalse(np.allclose(self.contiguous_ts.segments[0].samples, copied_modified.segments[0].samples)) + self.assertTrue(np.allclose(self.contiguous_ts.segments[0].samples, copied.segments[0].samples)) + + # Discontiguous + copied = copy(self.discontiguous_ts) + self.assertFalse(self.discontiguous_ts is copied) # Assert objects are different + self.assertFalse(self.discontiguous_ts.segments[0].samples is copied.segments[0].samples) # Assert pointers are different + self.assertTrue(np.allclose(self.discontiguous_ts.segments[0].samples, copied.segments[0].samples)) # Assert content is the same + # Assert what happens to the copied does not affect the original + copied_modified = copied * 0 + self.assertFalse(np.allclose(self.discontiguous_ts.segments[0].samples, copied_modified.segments[0].samples)) + self.assertTrue(np.allclose(self.discontiguous_ts.segments[0].samples, copied.segments[0].samples)) + + + +if __name__ == '__main__': + unittest.main() From ce2f5739e43399742a0070bdf11d72986b24dba2 Mon Sep 17 00:00:00 2001 From: saraiva Date: Thu, 30 Nov 2023 11:12:27 +0100 Subject: [PATCH 47/47] Add tests for Timeseries indexing --- src/ltbio/biosignals/_Timeseries.py | 158 +++++++++++++------ src/ltbio/biosignals/_Timeseries.pyi | 4 +- tests/biosignals/Timeseries/test_indexing.py | 129 +++++++++++++++ 3 files changed, 243 insertions(+), 48 deletions(-) create mode 100644 tests/biosignals/Timeseries/test_indexing.py diff --git a/src/ltbio/biosignals/_Timeseries.py b/src/ltbio/biosignals/_Timeseries.py index 518e4c5e..22d1b1dc 100644 --- a/src/ltbio/biosignals/_Timeseries.py +++ b/src/ltbio/biosignals/_Timeseries.py @@ -30,6 +30,7 @@ from multimethod import multimethod from numpy import array, append, ndarray, divide, concatenate, tile, memmap from scipy.signal import resample +from re import match from ._Event import Event from ._Segment import Segment @@ -376,40 +377,100 @@ def __contains__(self, item: str) -> bool: # BUILT-INS (Indexing) @multimethod def __getitem__(self, item: int) -> Segment: - ... + lengths = [len(seg) for seg in self.segments] + if item >= 0: + for i, length in enumerate(lengths): + if item < length: + return self.segments[i][item] + else: + item -= length + raise IndexError("Index out of range") + else: + for i, length in enumerate(reversed(lengths)): + if -item <= length: + return self.segments[-i - 1][item] + else: + item += length + raise IndexError("Index out of range") @multimethod def __getitem__(self, item: datetime) -> float: - return self.__get_samples(item).samples[0] + return self.__get_sample(item) + + @property + def __in_single_day(self) -> bool: + """States if the Timeseries is self-contained in the day.""" + return self.start.date() == self.end.date() @multimethod def __getitem__(self, item: str): - return self[to_datetime(item)] + time_pattern = r'\d{2}:\d{2}:\d{2}' # HH:MM:SS format + day_time_pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}' # YYYY-MM-DD HH:MM:SS format + + # If contains time: + if match(time_pattern, item): # Has time? + if match(day_time_pattern, item): # Has date? + pass + else: + if self.__in_single_day: + item = f"{self.start.strftime('%Y-%m-%d')} {item}" + else: + raise ValueError(f"Invalid timestamp ({item}) without date. Absence of date is only allowed for " + f"Timeseries that start and end on the same day. " + f"Must be in the format YYYY-MM-DD HH:MM:SS or HH:MM:SS.") + else: + raise ValueError(f"Invalid timestamp ({item}) without time. " + f"Must be in the format YYYY-MM-DD HH:MM:SS or HH:MM:SS.") + + # Convert to datetime + try: + item = to_datetime(item) + except Exception: + raise ValueError(f"Invalid timestamp ({item}). Must be in the format YYYY-MM-DD HH:MM:SS or HH:MM:SS.") + + # Call __getitem__(datetime) + return self[item] @multimethod def __getitem__(self, item: slice): # Discard step if item.step is not None: raise IndexError("Indexing with step is not allowed for Timeseries. Try downsample it first.") + # Get start and end start = item.start if item.start is not None else self.start end = item.stop if item.stop is not None else self.end - # Convert to datetime, if needed - start = to_datetime(start) if isinstance(start, str) else start - end = to_datetime(end) if isinstance(end, str) else end - # Get the samples - return Timeseries(segments=self.__get_samples(start, end), sampling_frequency=self.sampling_frequency, - unit=self.unit, name=self.name) + + # Int + if isinstance(start, int) and isinstance(end, int): + segments = self.__get_samples(start, end) + + # Datetime + elif isinstance(start, datetime) and isinstance(end, datetime): + segments = self.__get_samples(start, end) + + # Str timestamps + elif isinstance(start, str) and isinstance(end, str): + start = to_datetime(start) + end = to_datetime(end) + segments = self.__get_samples(start, end) + + else: + raise TypeError(f"Invalid slice: {item}. Must be a pair of integers, datetimes or string timestamps.") + + return Timeseries(segments=segments, sampling_frequency=self.sampling_frequency, unit=self.unit, name=self.name) @multimethod def __getitem__(self, item: DateTimeRange): return self[item.start_datetime:item.end_datetime] + """ @multimethod def __getitem__(self, item: tuple): # Get each result individually sub_timeseries = [self[ix] for ix in item] return Timeseries.concatenate(sub_timeseries) + """ @multimethod def __getitem__(self, item: Timeline): @@ -666,63 +727,68 @@ def diff(self) -> 'Timeseries': # INTERNAL USAGE - Convert indexes <-> timepoints && Get Samples def __get_sample(self, datetime: datetime) -> float: - self.__check_boundaries(datetime) - for segment in self.__segments: # finding the first Segment - if datetime in segment: - return segment[int((datetime - segment.start).total_seconds() * self.sampling_frequency)] - raise IndexError("Datetime given is in not defined in this Timeseries.") + segment_ix = self.__check_boundaries(datetime) + segment = self.segments[segment_ix] + start_datetime = self.__segment_start(segment_ix) + return segment[int((datetime - start_datetime).total_seconds() * self.sampling_frequency)] + @multimethod def __get_samples(self, initial_datetime: datetime, final_datetime: datetime) -> List[Segment]: '''Returns the samples between the given initial and end datetimes.''' - self.__check_boundaries(initial_datetime) - self.__check_boundaries(final_datetime) + #self.__check_boundaries(initial_datetime) + #self.__check_boundaries(final_datetime) res_segments = [] - for i in range(len(self.__segments)): # finding the first Segment - segment = self.__segments[i] - if segment.start <= initial_datetime <= segment.end: - if final_datetime <= segment.end: + for i in range(self.n_segments): # finding the first Segment + segment = self.segments[i] + start, end = self.__segment_start(i), self.__segment_end(i) + if start <= initial_datetime <= end: + if final_datetime <= end: trimmed_segment = segment[int(( - initial_datetime - segment.start).total_seconds() * self.sampling_frequency):int( - (final_datetime - segment.start).total_seconds() * self.sampling_frequency)] + initial_datetime - start).total_seconds() * self.sampling_frequency):int( + (final_datetime - start).total_seconds() * self.sampling_frequency)] res_segments.append(trimmed_segment) return res_segments else: - if not initial_datetime == segment.end: # skip what would be an empty set + if not initial_datetime == end: # skip what would be an empty set trimmed_segment = segment[int(( - initial_datetime - segment.start).total_seconds() * self.sampling_frequency):] + initial_datetime - start).total_seconds() * self.sampling_frequency):] res_segments.append(trimmed_segment) - for j in range(i + 1, - len(self.__segments)): # adding the remaining samples, until the last Segment is found - segment = self.__segments[j] - if final_datetime <= segment.end: + for j in range(i + 1, self.n_segments): # adding the remaining samples, until the last Segment is found + segment = self.segments[j] + start, end = self.__segment_start(j), self.__segment_end(j) + if final_datetime <= end: trimmed_segment = segment[:int( - (final_datetime - segment.start).total_seconds() * self.sampling_frequency)] + (final_datetime - start).total_seconds() * self.sampling_frequency)] res_segments.append(trimmed_segment) return res_segments else: trimmed_segment = segment[:] res_segments.append(trimmed_segment) - def __check_boundaries(self, datetime_or_range: datetime | DateTimeRange) -> None: - intersects = False + @multimethod + def __get_samples(self, start: int, end: int) -> dict[datetime, Segment]: + lengths = [len(seg) for seg in self.segments] + result = {} + for i, length in enumerate(lengths): + seg_slice = slice(max(0, start), min(length, end)) + if seg_slice.start is not None and seg_slice.stop is not None: + result[self.__segment_start(i) + timedelta(seconds=seg_slice.start/self.sampling_frequency)] = self.segments[i][seg_slice] + return result + + def __check_boundaries(self, datetime_or_range: datetime | DateTimeRange) -> int | None: + domain = self.domain.single_group.intervals + if isinstance(datetime_or_range, datetime): - for subdomain in self.domain: - if datetime_or_range in subdomain: - intersects = True - break - if not intersects: - raise IndexError( - f"Datetime given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") + for i, interval in enumerate(domain): + if datetime_or_range in interval: # then it is inside the interval + return i # Return the Segment index + raise IndexError(f"Datetime given is outside of Timeseries domain.") elif isinstance(datetime_or_range, DateTimeRange): - for subdomain in self.domain: - if subdomain.is_intersection( - datetime_or_range) and datetime_or_range.start_datetime != subdomain.end_datetime: - intersects = True - break - if not intersects: - raise IndexError( - f"Interval given is outside of Timeseries domain, {' U '.join([f'[{subdomain.start_datetime}, {subdomain.end_datetime}[' for subdomain in self.domain])}.") + for i, interval in enumerate(domain): + if interval.is_intersection(datetime_or_range) and datetime_or_range.start_datetime != interval.end_datetime: + return i # Return the Segment index + raise IndexError(f"Interval given is outside of Timeseries domain.") def _indices_to_timepoints(self, indices: list[list[int]], by_segment=False) -> tuple[datetime] | tuple[ list[datetime]]: diff --git a/src/ltbio/biosignals/_Timeseries.pyi b/src/ltbio/biosignals/_Timeseries.pyi index e7cb6872..8e18d83c 100644 --- a/src/ltbio/biosignals/_Timeseries.pyi +++ b/src/ltbio/biosignals/_Timeseries.pyi @@ -86,8 +86,8 @@ class Timeseries(): def __getitem__(self, item: Timeline) -> float | Timeseries: ... @multimethod def __getitem__(self, item: slice) -> float | Timeseries: ... - @multimethod - def __getitem__(self, item: tuple) -> float | Timeseries: ... + #@multimethod + #def __getitem__(self, item: tuple) -> float | Timeseries: ... # BUILT-INS (Joining Timeseries) @multimethod diff --git a/tests/biosignals/Timeseries/test_indexing.py b/tests/biosignals/Timeseries/test_indexing.py new file mode 100644 index 00000000..4557304c --- /dev/null +++ b/tests/biosignals/Timeseries/test_indexing.py @@ -0,0 +1,129 @@ +import unittest +from datetime import timedelta + +import numpy as np +from numpy import allclose + +from dateutil.parser import parse as to_datetime +from resources.segments import medium_samples_1, get_segment_length, small_samples_2, medium_samples_2 +from resources.timeseries import get_timeseries, start_a, sf_low, get_timeseries_end, sf_high, start_b + + +class TimeseriesIndexingTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.contiguous_ts = get_timeseries('medium', 1, discontiguous=False, sf='low', units='volt') + cls.discontiguous_ts = get_timeseries('medium', 2, discontiguous=True, sf='high', units='siemens') + + # Test indexing with timestamps + # (timestamp, expected sample) + cls.CONTIGUOUS_START = (start_a, medium_samples_1[0]) + cls.CONTIGUOUS_MIDDLE1 = (start_a + timedelta(seconds=1 / sf_low) * 1, medium_samples_1[1]) + cls.CONTIGUOUS_MIDDLE2 = (start_a + timedelta(seconds=1 / sf_low) * 2, medium_samples_1[2]) + cls.CONTIGUOUS_END = (get_timeseries_end('medium', False, 'low') - timedelta(seconds=1 / sf_low), medium_samples_1[-1]) + cls.DISCONTIGUOUS_SEG1_START = (start_a, small_samples_2[0]) + cls.DISCONTIGUOUS_SEG1_MIDDLE1 = (start_a + timedelta(seconds=1 / sf_high) * 1, small_samples_2[1]) + cls.DISCONTIGUOUS_SEG1_MIDDLE2 = (start_a + timedelta(seconds=1 / sf_high) * 2, small_samples_2[2]) + cls.DISCONTIGUOUS_SEG1_END = (start_a + timedelta(seconds=(get_segment_length('small') - 1) / sf_high), small_samples_2[-1]) + cls.DISCONTIGUOUS_SEG2_START = (start_b, medium_samples_2[0]) + cls.DISCONTIGUOUS_SEG2_MIDDLE1 = (start_b + timedelta(seconds=1 / sf_high) * 1, medium_samples_2[1]) + cls.DISCONTIGUOUS_SEG2_MIDDLE2 = (start_b + timedelta(seconds=1 / sf_high) * 2, medium_samples_2[2]) + cls.DISCONTIGUOUS_SEG2_END = (get_timeseries_end('medium', True, 'high') - timedelta(seconds=1 / sf_high), medium_samples_2[-1]) + + cls.test_timestamps_on_contiguous = (cls.CONTIGUOUS_START, cls.CONTIGUOUS_MIDDLE1, cls.CONTIGUOUS_MIDDLE2, + cls.CONTIGUOUS_END) + cls.test_timestamps_on_discontiguous = (cls.DISCONTIGUOUS_SEG1_START, cls.DISCONTIGUOUS_SEG1_MIDDLE1, + cls.DISCONTIGUOUS_SEG1_MIDDLE2, cls.DISCONTIGUOUS_SEG1_END, + cls.DISCONTIGUOUS_SEG2_START, cls.DISCONTIGUOUS_SEG2_MIDDLE1, + cls.DISCONTIGUOUS_SEG2_MIDDLE2, cls.DISCONTIGUOUS_SEG2_END) + + def test_indexing_int_position(self): + # Contiguous + for position in (0, 3, 5, -1): # 5 and -1 are the same position + self.assertEqual(self.contiguous_ts[position], medium_samples_1[position]) + # Discontiguous + for position in (0, 3, 5): + self.assertEqual(self.discontiguous_ts[position], small_samples_2[position]) + for position in (6, 12, 17, -1): # 17 and -1 are the same position + self.assertEqual(self.discontiguous_ts[position], medium_samples_2[position - (6 if position > 0 else 0)]) + + def test_indexing_datetime_position(self): + # Contiguous + for timestamp, expected_sample in self.test_timestamps_on_contiguous: + self.assertEqual(self.contiguous_ts[timestamp], expected_sample) + + # Discontiguous + for timestamp, expected_sample in self.test_timestamps_on_discontiguous: + self.assertEqual(self.discontiguous_ts[timestamp], expected_sample) + + def test_indexing_str_position(self): + FORMAT = '%Y-%m-%d %H:%M:%S.%f' + + # Contiguous + for timestamp, expected_sample in self.test_timestamps_on_contiguous: + self.assertEqual(self.contiguous_ts[timestamp.strftime(FORMAT)], expected_sample) + + # Discontiguous + for timestamp, expected_sample in self.test_timestamps_on_discontiguous: + self.assertEqual(self.discontiguous_ts[timestamp.strftime(FORMAT)], expected_sample) + + def test_indexing_str_time_without_date_position(self): + """Only allowed with strings""" + FORMAT = '%H:%M:%S.%f' + # Contiguous + for timestamp, expected_sample in self.test_timestamps_on_contiguous: + self.assertEqual(self.contiguous_ts[timestamp.strftime(FORMAT)], expected_sample) + # Discontiguous + for timestamp, expected_sample in self.test_timestamps_on_discontiguous: + self.assertEqual(self.discontiguous_ts[timestamp.strftime(FORMAT)], expected_sample) + + def test_indexing_int_slice_positions(self): + # Contiguous + for slice_ in (slice(0, 3), # [0:3[ = [0, 1, 2] # from start to middle + slice(3, 6), # [3:6[ = [3, 4, 5] # from middle to end + slice(None, 4), # [:4[ = [0, 1, 2, 3] # from start (implicit) to middle + slice(3, None), # [3:] = [3, 4, 5] # from middle to end (implicit) + slice(0, get_segment_length('medium')), # from start to end + slice(None, None), # [:] # from start (implicit) to end (implicit) + slice(None, -2), # [:-2] # all but last two + slice(-2, None), # [-2:] # last two + slice(-4, -2), # [-4:-2] # middle two + ): + self.assertTrue(allclose(self.contiguous_ts[slice_], medium_samples_1[slice_])) + + # Discontiguous (6 + 12 = 18 samples; middle at 9) + all_samples = np.concatenate(small_samples_2, medium_samples_2) + for slice_ in (slice(0, 9), # [0:9[ = seg1 + 3 first seg2 # from start to middle + slice(9, 17), # [9:17[ = 9 last seg2 # from middle to end + slice(None, 4), # [:9[ # from start (implicit) to middle + slice(3, None), # [9:[ = # from middle to end (implicit) + slice(0, 17), # [0:17[ # from start to end + slice(None, None), # [:] # from start (implicit) to end (implicit) + slice(0, 2), # [0:2[ # first two; without crossing segments + slice(-2, None), # [-2:] # last two; without crossing segments + slice(None, -2), # [:-2] # all but last two; without crossing segments + ): + self.assertTrue(allclose(self.discontiguous_ts[slice_], all_samples[slice_])) + + def test_indexing_tuple(self): + index = (8, slice(2, 5), 0, slice(None, -2)) + res = self.medium_segment[index] # self.medium_segment[8, 2:5, 0, :-2] + self.assertIsInstance(res, tuple) + for ix, sub_res in zip(index, res): + self._check_content_correctness(ix, medium_samples_1, sub_res) + + def test_indexing_out_of_range(self): + length = get_segment_length('medium') + for index in (-length-1, length, length+1, 100, -100): + with self.assertRaises(IndexError): + x = self.medium_segment[index] + + def test_indexing_invalid_type(self): + for index in (1.5, 'a', {1, 2, 3}, {1: 2, 3: 4}, None): + with self.assertRaises(TypeError): + x = self.medium_segment[index] + + +if __name__ == '__main__': + unittest.main()