## Get maximum length signal in each hour
## Impute zero with rest of the signal
## Stack horizontally (x-axis) (By taking mean of all channels)
## Get fourier and pass them as signals

# Getting maximum length signal in each hour upto 72

In [None]:
import os
import re

def process_file(file_path, encoding='utf-8'):
    with open(file_path, 'r', encoding=encoding, errors='replace') as file:
        content = file.read()
        first_line = file.readline().strip()
        line = (f"First line of {file_path}: {first_line}")
        hour = int(line[75:78])

        start_time_match = re.search(r"#Start time: (\d{2}:\d{2}:\d{2})", content)
        if start_time_match:
            start_time = start_time_match.group(1)
            print(f"Start time: {start_time}")

        # Extract end time using regular expression
        end_time_match = re.search(r"#End time: (\d{2}:\d{2}:\d{2})", content)
        if end_time_match:
            end_time = end_time_match.group(1)
            print(f"End time: {end_time}")        

def iterate_through_directory(directory):
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isfile(item_path):
            if item.endswith('EEG.hea'):  # Adjust the extension as needed
                process_file(item_path)
        elif os.path.isdir(item_path):
            iterate_through_directory(item_path)

# Specify the root directory where your files and subdirectories are located
root_directory = '/Volumes/Bharadwaj/physionet-official/data_nan/0346'

# Start iterating through the root directory
iterate_through_directory(root_directory)


For most of the signals there is complete 1 hour data in the file. So assuming that atleast we will get 1 patient data of 1 hour while checking all patients. Since patients data should not be last. Just imputing with zero where signal is not there.

# Sampling data

Before imputing with zero, we have to sample the signal so that we can know how many components will present in each signal, so we can impute rest with zero.

In [2]:
# Content of the file

file_path = '/Volumes/Bharadwaj/physionet-official/data_nan/0346/0346_023_037_EEG.hea'
encoding='utf-8'

with open(file_path, 'r', encoding=encoding, errors='replace') as file:
        file_content = file.read()

first_line_tokens = file_content.strip().split()
sampling_frequency = int(first_line_tokens[2])
print(f"Sampling frequency: {sampling_frequency}")


Sampling frequency: 256


In [3]:
import numpy as np
from scipy.signal import resample
from scipy.io import loadmat

file = loadmat('/Volumes/Bharadwaj/physionet-official/data_nan/0346/0346_023_037_EEG.mat')
arr = file['val']
print(arr)

# Original signal and its sampling rate
original_signal = arr  # Replace with your signal data
original_sampling_rate = sampling_frequency  # Replace with your original sampling rate (in Hz)

# Target sampling rate
target_sampling_rate = 100  # Resampling to 100 Hz

# Calculate the resampling ratio
resampling_ratio = target_sampling_rate / original_sampling_rate

# Calculate the new length of the resampled signal
new_length = int(len(original_signal) * resampling_ratio)

# Resample the signal
resampled_signal = resample(original_signal, new_length)

# Print the lengths of the original and resampled signals
print(f"Original signal length: {len(original_signal)}")
print(f"Resampled signal length: {len(resampled_signal)}")

[[26894 26859 26865 ... 27401 27429 27451]
 [24642 24606 24601 ... 25474 25492 25505]
 [24731 24716 24646 ... 25491 25483 25453]
 ...
 [26037 26422 26559 ... 26113 26416 26618]
 [25232 26402 25125 ... 25570 26289 25064]
 [24386 24614 24786 ... 24414 24703 24873]]
Original signal length: 21
Resampled signal length: 8


### Don't mean channels, signals get mixed up