In [1]:
from pathlib import Path
from typing import List
import pickle
import re

In [2]:
def find_files_with_prefix(directory: str, prefix: str) -> List[Path]:
    """
    Finds all files in the given directory that start with the specified prefix.

    Args:
        directory (str): Path to the directory to search.
        prefix (str): The prefix that filenames should start with.

    Returns:
        List[Path]: List of Path objects for matching files.
    """
    dir_path = Path(directory)
    return [file for file in dir_path.iterdir() if file.is_file() and file.name.startswith(prefix)]


In [None]:
# def sort_paths_by_sample_count(paths: List[Path]) -> List[Path]:
#     """
#     Sorts a list of file paths based on the numeric value following 'samples_till:' in the filename.

#     Args:
#         paths (List[Path]): List of Path objects whose names contain 'samples_till:NUMBER_'.

#     Returns:
#         List[Path]: Paths sorted in ascending order by extracted NUMBER.
#     """
#     def extract_number(path: Path) -> int:
#         match = re.search(r'samples_till:(\d+)_', path.name)
#         if not match:
#             raise ValueError(f"Filename does not contain 'samples_till:<number>_' pattern: {path.name}")
#         return int(match.group(1))
    
#     return sorted(paths, key=extract_number)


In [3]:
path = "/home/jhagenbe_sw/ASIM/ecg-seizure-detection/MatrixProfile/downsample_freq=32,window_size=120_0,stride=60_0"
config = path.split("/")[-1]
MPs_path = "/home/jhagenbe_sw/ASIM/ecg-seizure-detection/MatrixProfile/MPs"

In [4]:
config
paths = find_files_with_prefix(directory=MPs_path, prefix=config)

In [5]:
array_list = []
for path in paths:
    path=path.__str__()
    with open(path, "rb") as f:
        array_list.extend(pickle.load(f))

In [6]:
array_list

[mparray([[61.68488129721279, 1418, -1, 1418],
          [61.68404065531033, 1419, -1, 1419],
          [61.683557897664805, 1420, -1, 1420],
          ...,
          [60.183065626820536, 554, 554, -1],
          [60.182424009470466, 555, 555, -1],
          [60.18174509324043, 556, 556, -1]], shape=(1601, 4), dtype=object),
 mparray([[59.0749775444679, 1090, -1, 1090],
          [59.0746496414488, 1091, -1, 1091],
          [59.07173584561214, 1092, -1, 1092],
          ...,
          [60.15471119810845, 508, 508, -1],
          [60.15433387210598, 509, 509, -1],
          [60.15460400621397, 510, 510, -1]], shape=(1601, 4), dtype=object),
 mparray([[60.155108803175004, 1321, -1, 1321],
          [60.15465180139495, 1322, -1, 1322],
          [60.17433916987762, 1323, -1, 1323],
          ...,
          [60.57114927685473, 321, 321, -1],
          [60.57017917350783, 322, 322, -1],
          [60.57245474764717, 323, 323, -1]], shape=(1601, 4), dtype=object),
 mparray([[62.856358166199

In [10]:
array_list[0]

mparray([[61.68488129721279, 1418, -1, 1418],
         [61.68404065531033, 1419, -1, 1419],
         [61.683557897664805, 1420, -1, 1420],
         ...,
         [60.183065626820536, 554, 554, -1],
         [60.182424009470466, 555, 555, -1],
         [60.18174509324043, 556, 556, -1]], shape=(1601, 4), dtype=object)

In [11]:
import numpy as np
reduced_array_list = [array[:, 0].reshape(-1, 1) for array in array_list]
# = array_list[0][:, 0].reshape(-1, 1)
reduced_array_list

[mparray([[61.68488129721279],
          [61.68404065531033],
          [61.683557897664805],
          ...,
          [60.183065626820536],
          [60.182424009470466],
          [60.18174509324043]], shape=(1601, 1), dtype=object),
 mparray([[59.0749775444679],
          [59.0746496414488],
          [59.07173584561214],
          ...,
          [60.15471119810845],
          [60.15433387210598],
          [60.15460400621397]], shape=(1601, 1), dtype=object),
 mparray([[60.155108803175004],
          [60.15465180139495],
          [60.17433916987762],
          ...,
          [60.57114927685473],
          [60.57017917350783],
          [60.57245474764717]], shape=(1601, 1), dtype=object),
 mparray([[62.85635816619981],
          [62.854434245954096],
          [62.85728581365503],
          ...,
          [64.31599651536015],
          [64.31427330568735],
          [64.31466543519377]], shape=(1601, 1), dtype=object),
 mparray([[62.24486590191615],
          [62.242263834638294]

In [12]:
array_list.__len__()

3500

In [13]:
reduced_array_list.__len__()

3500