In [2]:
from IPython.display import clear_output

import os

import numpy as np

from tqdm import tqdm
import random

In [4]:
def extract_features(combined_paths, qtd_segments_by_rec, db_alias):
    for combined_path in combined_paths:
        _, _, db_alias, record_filename = combined_path[0].split("/")
        index, record_id, _, _ = record_filename.split("_")
        result_filename = f"./{db_alias}/{index}_{record_id}_result.npy"

        if os.path.exists(result_filename):
            continue

        rri_segment_path, recording_path = combined_path

        rri_segments = np.load(rri_segment_path, allow_pickle=True)
        recording_segments = np.load(recording_path, allow_pickle=True)

        print(qtd_segments_by_rec, len(rri_segments))

        if len(rri_segments) < qtd_segments_by_rec:
            continue

        index_list = np.random.choice(
            len(rri_segments),
            qtd_segments_by_rec,
            replace=False,
        )

        rri_segments_samples = rri_segments[index_list]

        recording_segments_samples = recording_segments[index_list]

        recording_result = np.empty((0, 57))

        for segment in tqdm(zip(rri_segments_samples, recording_segments_samples)):
            rri_segment, recording = segment

            rri_histogram, _ = np.histogram(a=rri_segment, range=(0, 2500), bins=50)

            if rri_histogram.shape[0] == 0:
                continue

            btd = get_btd(recording)
            clear_output(wait=True)

            combine = np.hstack((rri_histogram, np.array(btd)))

            recording_result = np.vstack((recording_result, combine))

        np.save(
            file=result_filename,
            arr=recording_result,
        )


def get_combined_path(data_path):
    data_filenames = os.listdir(data_path)

    rri_segment_filenames = list(
        filter(lambda item: "rri_segment" in item, data_filenames)
    )
    rri_segment_paths = [f"{data_path}{filename}" for filename in rri_segment_filenames]

    recording_segment_filenames = list(
        filter(lambda item: "recording_segment" in item, data_filenames)
    )
    recording_segment_paths = [
        f"{data_path}{filename}" for filename in recording_segment_filenames
    ]

    combine_paths = [
        combined_paths
        for combined_paths in zip(
            sorted(rri_segment_paths), sorted(recording_segment_paths)
        )
    ]

    return combine_paths


def generate_chunks(combined_paths, alias_db):
    for combined_path in combined_paths:
        _, _, _, record_filename = combined_path[0].split("/")
        _, record_id, _, _ = record_filename.split("_")

        rri_segment_path, recording_path = combined_path

        rri_segments = np.load(rri_segment_path, allow_pickle=True)

        recording_segments = np.load(recording_path, allow_pickle=True)

        indexes = np.arange(len(rri_segments))
        np.random.shuffle(indexes)

        shuffled_rri = rri_segments[indexes]
        shuffled_recordings = recording_segments[indexes]

        chunk_size = 200

        qtd_segments = rri_segments.shape[0]

        for i in range(0, qtd_segments, chunk_size):
            start_index_chunk = i
            end_index_chunk = i + chunk_size

            if end_index_chunk >= qtd_segments:
                end_index_chunk = qtd_segments - 1

            result_filename = f"./{alias_db}/{record_id}_{start_index_chunk}_{end_index_chunk}_result.npy"

            if os.path.exists(result_filename):
                print("exists")
                continue

            rri_segment_chunk = shuffled_rri[start_index_chunk:end_index_chunk]
            recording_chunk = shuffled_recordings[start_index_chunk:end_index_chunk]
            zipped_chunk = list(zip(rri_segment_chunk, recording_chunk))

            np.save(file=result_filename, arr=zipped_chunk)

In [2]:
alias_db = "NSRDB"
nsrdb_data_path = f"../extract_data/{alias_db}/"
nsrdb_combine_paths = get_combined_path(nsrdb_data_path)

nsrdb_feature_path = f"./{alias_db}"
if not os.path.exists(nsrdb_feature_path):
    os.makedirs(nsrdb_feature_path)

nsrdb_combine_paths_shuffled = random.sample(
    nsrdb_combine_paths, len(nsrdb_combine_paths)
)

nsrdb_combine_paths_shuffled_sampled = nsrdb_combine_paths_shuffled

generate_chunks(nsrdb_combine_paths_shuffled_sampled, alias_db)

  arr = np.asanyarray(arr)


In [5]:
alias_db = "AFDB"
afdb_data_path = f"../extract_data/{alias_db}/"
afdb_combine_paths = get_combined_path(afdb_data_path)

afdb_feature_path = f"./{alias_db}"
if not os.path.exists(afdb_feature_path):
    os.makedirs(afdb_feature_path)

afdb_combine_paths_shuffled = random.sample(afdb_combine_paths, len(afdb_combine_paths))

afdb_combine_paths_shuffled_sampled = afdb_combine_paths_shuffled

# generate_chunks(afdb_combine_paths_shuffled_sampled, alias_db)

In [6]:
afdb_combine_paths_shuffled_sampled

[('../extract_data/AFDB/4_04746_rri_segment.npy',
  '../extract_data/AFDB/4_04746_recording_segment.npy'),
 ('../extract_data/AFDB/7_05091_rri_segment.npy',
  '../extract_data/AFDB/7_05091_recording_segment.npy'),
 ('../extract_data/AFDB/18_08219_rri_segment.npy',
  '../extract_data/AFDB/18_08219_recording_segment.npy'),
 ('../extract_data/AFDB/3_04126_rri_segment.npy',
  '../extract_data/AFDB/3_04126_recording_segment.npy'),
 ('../extract_data/AFDB/21_08434_rri_segment.npy',
  '../extract_data/AFDB/21_08434_recording_segment.npy'),
 ('../extract_data/AFDB/17_08215_rri_segment.npy',
  '../extract_data/AFDB/17_08215_recording_segment.npy'),
 ('../extract_data/AFDB/10_06426_rri_segment.npy',
  '../extract_data/AFDB/10_06426_recording_segment.npy'),
 ('../extract_data/AFDB/12_06995_rri_segment.npy',
  '../extract_data/AFDB/12_06995_recording_segment.npy'),
 ('../extract_data/AFDB/20_08405_rri_segment.npy',
  '../extract_data/AFDB/20_08405_recording_segment.npy'),
 ('../extract_data/AFDB/9

In [4]:
alias_db = "LTAFDB"
ltafdb_data_path = f"../extract_data/{alias_db}/"
ltafdb_combine_paths = get_combined_path(ltafdb_data_path)

ltafdb_feature_path = f"./{alias_db}"
if not os.path.exists(ltafdb_feature_path):
    os.makedirs(ltafdb_feature_path)

ltafdb_combine_paths_shuffled = random.sample(
    ltafdb_combine_paths, len(ltafdb_combine_paths)
)

ltafdb_combine_paths_shuffled_sampled = ltafdb_combine_paths_shuffled

generate_chunks(ltafdb_combine_paths_shuffled_sampled, alias_db)

In [2]:
alias_db = "LTAFDB_N"
ltafdb_data_path = f"../extract_data/{alias_db}/"
ltafdb_combine_paths = get_combined_path(ltafdb_data_path)

ltafdb_feature_path = f"./{alias_db}"
if not os.path.exists(ltafdb_feature_path):
    os.makedirs(ltafdb_feature_path)

ltafdb_combine_paths_shuffled = random.sample(
    ltafdb_combine_paths, len(ltafdb_combine_paths)
)

ltafdb_combine_paths_shuffled_sampled = ltafdb_combine_paths_shuffled

generate_chunks(ltafdb_combine_paths_shuffled_sampled, alias_db)

  arr = np.asanyarray(arr)
