In [1]:
from brian2 import Hz, kHz
from brian2hears import Sound, erbspace, Gammatone, Filterbank
from scipy.signal import butter, filtfilt, sosfiltfilt, lfilter
import librosa as lb
import os, sys
sys.path.append(os.getcwd())
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import linear_model

In [2]:
class EnvelopeFromGammatoneFilterbank(Filterbank):
    """Converts the output of a GammatoneFilterbank to an envelope."""

    def __init__(self, source):
        """Initialize the envelope transformation.

        Parameters
        ----------
        source : Gammatone
            Gammatone filterbank output to convert to envelope
        """
        super().__init__(source)

        self.nchannels = 1

    def buffer_apply(self, input_):
        # 6. take absolute value of the input_
        compressed_subbands = np.abs(input_)**0.6

        combined_envelope = np.sum(compressed_subbands, axis=1)

        return  combined_envelope.reshape(combined_envelope.shape[0], 1)

In [4]:
# # Convert the numpy array to a Sound object
# audio_sound = Sound(audio_signal, samplerate=sr*Hz)

# num_filters = 28
# center_freqs = np.linspace(50*Hz, 5*kHz, num_filters)
# gammatone_filterbank = Gammatone(audio_sound, center_freqs)

# envelope_calcuation = EnvelopeFromGammatoneFilterbank(gammatone_filterbank)
# combined_envelope = envelope_calcuation.process()

# # Plot the combined envelope
# plt.plot(combined_envelope)
# plt.title("Combined Envelope")

# plt.show()

In [5]:
# # Choose the frequency range based on the method
# low_freq = 1
# high_freq = 32

# # Design a Butterworth bandpass filter
# order = 4
# nyquist = 0.5 * sr
# low = low_freq / nyquist
# high = high_freq / nyquist
# sos = butter(order, [low, high], btype='band', output='sos')

# normalized_envelope = combined_envelope / np.max(combined_envelope)
# print(normalized_envelope.shape)
# filtered_envelope = sosfiltfilt(sos, normalized_envelope[:, 0])


# print(filtered_envelope)

# # Choose the downsampling rate based on the method
# downsampling_rate = 64

# resampled_envelope = lb.resample(filtered_envelope, orig_sr=sr, target_sr=downsampling_rate)
# print(resampled_envelope.shape)
# plt.plot(resampled_envelope)
# plt.show()

In [3]:
def bandpass_filter(signal, fs):
    sos = butter(1, [1, 32], fs=fs, btype='band', output='sos')
    filtered_signal = sosfiltfilt(sos, signal, axis=0)
    return filtered_signal

def downsample_eeg(filtered_eeg, fs, target_fs):
    downsampled_eeg = lb.resample(filtered_eeg.T, orig_sr=fs, target_sr=target_fs).T
    return downsampled_eeg

def get_audio_file(filepath):
    audio_signal, sr = lb.load(filepath, sr=44100)
    
    return audio_signal, sr

def audio_data_preprocessing(audio_signal, sr=44100):
    # AUDIO DATA PREPROCESSING
    audio_sound = Sound(audio_signal, samplerate=sr*Hz)

    num_filters = 28
    center_freqs = np.linspace(50*Hz, 5*kHz, num_filters)
    gammatone_filterbank = Gammatone(audio_sound, center_freqs)

    envelope_calcuation = EnvelopeFromGammatoneFilterbank(gammatone_filterbank)
    combined_envelope = envelope_calcuation.process()

    # Design a Butterworth bandpass filter
    # nyquist = 0.5 * sr
    # low = low_freq / nyquist
    # high = high_freq / nyquist
    # sos = butter(4, [low, high], btype='band', output='sos')

    # normalized_envelope = combined_envelope / np.max(combined_envelope)
    # filtered_envelope = sosfiltfilt(sos, normalized_envelope[:, 0])
    print(combined_envelope.shape)
    filtered_envelope = bandpass_filter(combined_envelope.flatten(), sr)

    # Choose the downsampling rate based on the method
    target_sr = 64 # DEEP LEARNING

    resampled_envelope = lb.resample(filtered_envelope, orig_sr=sr, target_sr=target_sr)
#     plt.plot(resampled_envelope)
#     plt.show()
    return resampled_envelope

def eeg_data_preprocessing(eeg):
    # EEG DATA PREPROCESSING
    # Filter the EEG signals using a bandpass filter
    fs = 128
    filtered_eeg = bandpass_filter(eeg, fs)
    # Downsample the EEG signals
    target_sr = 64  # For linear regression
    downsampled_eeg = lb.resample(filtered_eeg, orig_sr=fs, target_sr=target_sr, axis=0)
    return downsampled_eeg

In [4]:
def preprocess(filename, verbose=False):
    filepath = "../data/eeg/" + filename
    eeg_data = np.load(filepath, allow_pickle=True)
    eeg = eeg_data['eeg']
    fs = eeg_data['fs']
    subject = str(eeg_data['subject'])
    stimulus_attended = str(eeg_data['stimulus_attended']).replace(".wav", "")
    stimulus_unattended = str(eeg_data['stimulus_unattended']).replace(".wav", "")
    print("====================== EEG DATA ======================") if verbose else None
    print(f"Attended stimulus: {stimulus_attended}") if verbose else None
    print(f"Unattended stimulus: {stimulus_unattended}") if verbose else None
    print(f"Sampling rate: {fs} Hz") if verbose else None
    print(f"Subject: {subject}") if verbose else None
    print("======================================================\n") if verbose else None

    print("=> PROCESSING EEG DATA") if verbose else None
    eeg_preprocessed = eeg_data_preprocessing(eeg)
    np.save("../data/train_cnn/"+ filename.replace(".npz", "") + ".npy", eeg_preprocessed)
    print(f"=> EEG PREPROCESSED DATA SHAPE: {eeg_preprocessed.shape}") if verbose else None
    print("=> DONE\n") if verbose else None

    print("=> RETRIEVING AUDIO DATA OF ATTENDED STIMULUS") if verbose else None
    attended_audio_signal, sr = get_audio_file("../data/stimuli/" + str(eeg_data["stimulus_attended"]))
    print("================= ATTENDED AUDIO DATA ================") if verbose else None
    print(f"File name: {eeg_data['stimulus_attended']}") if verbose else None
    print(f"Audio signal length: {len(attended_audio_signal)}") if verbose else None
    print(f"Sampling rate: {sr} Hz") if verbose else None
    print("======================================================\n") if verbose else None

    if os.path.isfile("../data/train_cnn/" + stimulus_attended + ".npy"):
        print("ATTENDED AUDIO DATA ALREADY PREPROCESSED") if verbose else None
        attended_preprocessed = np.load("../data/train_cnn/" + stimulus_attended + ".npy")
    else:
        print("=> PROCESSING ATTENDED AUDIO DATA") if verbose else None
        attended_preprocessed = audio_data_preprocessing(attended_audio_signal)
        np.save("../data/train_cnn/" + stimulus_attended + ".npy", attended_preprocessed)
    print("=> DONE\n") if verbose else None

    print("=> RETRIEVING AUDIO DATA OF UNATTENDED STIMULUS") if verbose else None
    unattended_audio_signal, sr = get_audio_file("../data/stimuli/" + str(eeg_data["stimulus_unattended"]))
    
    print("================= UNATTENDED AUDIO DATA ================") if verbose else None
    print(f"File name: {eeg_data['stimulus_unattended']}") if verbose else None
    print(f"Audio signal length: {len(unattended_audio_signal)}") if verbose else None
    print(f"Sampling rate: {sr} Hz") if verbose else None
    print("======================================================\n") if verbose else None

    if os.path.isfile("../data/train_cnn/" + stimulus_unattended + ".npy"):
        print("UNATTENDED AUDIO DATA ALREADY PREPROCESSED") if verbose else None
        unattended_preprocessed = np.load("../data/train_cnn/" + stimulus_unattended + ".npy")
    else:
        print("=> PROCESSING UNATTENDED AUDIO DATA") if verbose else None
        unattended_preprocessed = audio_data_preprocessing(unattended_audio_signal)
        np.save("../data/train_cnn/"+stimulus_unattended+ ".npy", unattended_preprocessed)
    print("=> DONE\n") if verbose else None

In [5]:
for index, filename in enumerate(os.listdir("../data/eeg/")):
    print("PREPROCESSING FILE {filename} ({index}/{total})".format(filename=filename, index=index, total=len(os.listdir("../data/eeg/"))))
    if filename.endswith(".npz"):
        preprocess(filename, verbose=False)
    print(f"PREPROCESSED ALL OF {filename}")
    # except:
    #     print(f"COULD NOT PREPROCESS ALL OF {filename}")
#     print(filename)

PREPROCESSING FILE sub-008_-_audiobook_6_1.npz (0/310)
(36761165, 1)
(42025682, 1)
PREPROCESSED ALL OF sub-008_-_audiobook_6_1.npz
PREPROCESSING FILE sub-004_-_audiobook_3.npz (1/310)
(42444051, 1)
(41314177, 1)
PREPROCESSED ALL OF sub-004_-_audiobook_3.npz
PREPROCESSING FILE sub-013_-_audiobook_6_2.npz (2/310)
(36753542, 1)
PREPROCESSED ALL OF sub-013_-_audiobook_6_2.npz
PREPROCESSING FILE sub-019_-_audiobook_4.npz (3/310)
(35872129, 1)
PREPROCESSED ALL OF sub-019_-_audiobook_4.npz
PREPROCESSING FILE sub-018_-_audiobook_5_3.npz (4/310)
(39959390, 1)
PREPROCESSED ALL OF sub-018_-_audiobook_5_3.npz
PREPROCESSING FILE sub-035_-_podcast_6.npz (5/310)
(37252858, 1)
(43893761, 1)
PREPROCESSED ALL OF sub-035_-_podcast_6.npz
PREPROCESSING FILE sub-035_-_audiobook_1.npz (6/310)
PREPROCESSED ALL OF sub-035_-_audiobook_1.npz
PREPROCESSING FILE sub-002_-_audiobook_5_1.npz (7/310)
(40116309, 1)
PREPROCESSED ALL OF sub-002_-_audiobook_5_1.npz
PREPROCESSING FILE sub-019_-_audiobook_5_2.npz (8/310)
P