In [1]:
from brian2 import Hz, kHz
from brian2hears import Sound, erbspace, Gammatone, Filterbank
from scipy.signal import butter, filtfilt, sosfiltfilt, lfilter
import librosa as lb
import os, sys
sys.path.append(os.getcwd())
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import linear_model

In [None]:
# ----- Audio data -----

method = "CNN"

# Step 1 : Read audio filename.wav to the variable A.
filepath = "../data/stimuli/audiobook_1.wav"
audio_signal, sr = lb.load(filepath, sr=44100)

print(f"Audio signal length: {len(audio_signal)}")

In [2]:
class EnvelopeFromGammatoneFilterbank(Filterbank):
    """Converts the output of a GammatoneFilterbank to an envelope."""

    def __init__(self, source):
        """Initialize the envelope transformation.

        Parameters
        ----------
        source : Gammatone
            Gammatone filterbank output to convert to envelope
        """
        super().__init__(source)

        self.nchannels = 1

    def buffer_apply(self, input_):
        # 6. take absolute value of the input_
        compressed_subbands = np.abs(input_)**0.6

        combined_envelope = np.sum(compressed_subbands, axis=1)

        return  combined_envelope.reshape(combined_envelope.shape[0], 1)

In [None]:
# Convert the numpy array to a Sound object
audio_sound = Sound(audio_signal, samplerate=sr*Hz)

num_filters = 28
center_freqs = np.linspace(50*Hz, 5*kHz, num_filters)
gammatone_filterbank = Gammatone(audio_sound, center_freqs)

envelope_calcuation = EnvelopeFromGammatoneFilterbank(gammatone_filterbank)
combined_envelope = envelope_calcuation.process()

# Plot the combined envelope
plt.plot(combined_envelope)
plt.title("Combined Envelope")

plt.show()

In [None]:
# Choose the frequency range based on the method
low_freq = 1
high_freq = 9  if method == "LR" else 32

# Design a Butterworth bandpass filter
order = 4
nyquist = 0.5 * sr
low = low_freq / nyquist
high = high_freq / nyquist
sos = butter(order, [low, high], btype='band', output='sos')

normalized_envelope = combined_envelope / np.max(combined_envelope)
print(normalized_envelope.shape)
filtered_envelope = sosfiltfilt(sos, normalized_envelope[:, 0])


print(filtered_envelope)

# Choose the downsampling rate based on the method
downsampling_rate = 20  if method == "LR" else 64

resampled_envelope = lb.resample(filtered_envelope, orig_sr=sr, target_sr=downsampling_rate)
print(resampled_envelope.shape)
plt.plot(resampled_envelope)
plt.show()

In [22]:
def bandpass_filter_eeg(eeg_data, fs, low_freq, high_freq):
    nyquist = 0.5 * fs
    low = low_freq / nyquist
    high = high_freq / nyquist
    b, a = butter(4, [low, high], btype='band')
    filtered_eeg = lfilter(b, a, eeg_data, axis=0)
    return filtered_eeg

def downsample_eeg(filtered_eeg, fs, target_fs):
    downsampled_eeg = lb.resample(filtered_eeg.T, orig_sr=fs, target_sr=target_fs).T
    return downsampled_eeg

def get_audio_file(filepath):
    audio_signal, sr = lb.load(filepath, sr=44100)
    
    return audio_signal, sr

def audio_data_preprocessing(audio_signal, sr=44100):
    # AUDIO DATA PREPROCESSING
    audio_sound = Sound(audio_signal, samplerate=sr*Hz)

    num_filters = 28
    center_freqs = np.linspace(50*Hz, 5*kHz, num_filters)
    gammatone_filterbank = Gammatone(audio_sound, center_freqs)

    envelope_calcuation = EnvelopeFromGammatoneFilterbank(gammatone_filterbank)
    combined_envelope = envelope_calcuation.process()

    # Choose the frequency range based on the method
    low_freq = 1
    high_freq = 32 # FOR DEEP LEARNING

    # Design a Butterworth bandpass filter
    order = 4
    nyquist = 0.5 * sr
    low = low_freq / nyquist
    high = high_freq / nyquist
    sos = butter(order, [low, high], btype='band', output='sos')

    normalized_envelope = combined_envelope / np.max(combined_envelope)
    filtered_envelope = sosfiltfilt(sos, normalized_envelope[:, 0])

    # Choose the downsampling rate based on the method
    downsampling_rate = 64 # DEEP LEARNING

    resampled_envelope = lb.resample(filtered_envelope, orig_sr=sr, target_sr=downsampling_rate)
#     plt.plot(resampled_envelope)
#     plt.show()
    return resampled_envelope

def eeg_data_preprocessing(eeg):
    # EEG DATA PREPROCESSING
    # Filter the EEG signals using a bandpass filter
    low_freq, high_freq = 1, 32  # For deep learning
    fs = 128
    filtered_eeg = bandpass_filter_eeg(eeg, fs, low_freq, high_freq)
    # Downsample the EEG signals
    target_fs = 64  # For linear regression
    print(f"=> DOWNSAMPLING EEG DATA FROM {fs}Hz TO {target_fs}Hz")

    downsampled_eeg = downsample_eeg(filtered_eeg, fs, target_fs)
    return downsampled_eeg

In [29]:
def preprocess(filename):
    filepath = "../data/eeg/" + filename
    eeg_data = np.load(filepath)
    eeg = eeg_data['eeg']
    fs = eeg_data['fs']
    subject = str(eeg_data['subject'])
    stimulus_attended = str(eeg_data['stimulus_attended']).replace(".wav", "")
    stimulus_unattended = str(eeg_data['stimulus_unattended']).replace(".wav", "")
    print("====================== EEG DATA ======================")
    print(f"Attended stimulus: {stimulus_attended}")
    print(f"Unattended stimulus: {stimulus_unattended}")
    print(f"Sampling rate: {fs} Hz")
    print(f"Subject: {subject}")
    print("======================================================\n")

    print("=> PROCESSING EEG DATA")
    eeg_preprocessed = eeg_data_preprocessing(eeg)
    np.save("../data/train_cnn/"+ filename.replace(".npz", "") + ".npy", eeg_preprocessed)
    print(f"=> EEG PREPROCESSED DATA SHAPE: {eeg_preprocessed.shape}")
    print("=> DONE\n")

    print("=> RETRIEVING AUDIO DATA OF ATTENDED STIMULUS")
    attended_audio_signal, sr = get_audio_file("../data/stimuli/" + str(eeg_data["stimulus_attended"]))
    print("================= ATTENDED AUDIO DATA ================")
    print(f"File name: {eeg_data['stimulus_attended']}")
    print(f"Audio signal length: {len(attended_audio_signal)}")
    print(f"Sampling rate: {sr} Hz")
    print("======================================================\n")

    if os.path.isfile("../data/train_cnn/" + stimulus_attended + ".npy"):
        print("ATTENDED AUDIO DATA ALREADY PREPROCESSED")
        attended_preprocessed = np.load("../data/train_cnn/" + stimulus_attended + ".npy")
    else:
        print("=> PROCESSING ATTENDED AUDIO DATA")
        attended_preprocessed = audio_data_preprocessing(attended_audio_signal)
        np.save("../data/train_cnn/" + stimulus_attended + ".npy", attended_preprocessed)
    print("=> DONE\n")

    print("=> RETRIEVING AUDIO DATA OF UNATTENDED STIMULUS")
    unattended_audio_signal, sr = get_audio_file("../data/stimuli/" + str(eeg_data["stimulus_unattended"]))
    
    print("================= UNATTENDED AUDIO DATA ================") if print else None
    print(f"File name: {eeg_data['stimulus_unattended']}") if print else None
    print(f"Audio signal length: {len(unattended_audio_signal)}") if print else None
    print(f"Sampling rate: {sr} Hz") if print else None
    print("======================================================\n") if print else None

    if os.path.isfile("../data/train_cnn/" + stimulus_unattended + ".npy"):
        print("UNATTENDED AUDIO DATA ALREADY PREPROCESSED")
        unattended_preprocessed = np.load("../data/train_cnn/" + stimulus_unattended + ".npy")
    else:
        print("=> PROCESSING UNATTENDED AUDIO DATA")
        unattended_preprocessed = audio_data_preprocessing(unattended_audio_signal)
        np.save("../data/train_cnn/"+stimulus_unattended+ ".npy", unattended_preprocessed)
    print("=> DONE\n")
    # eeg_preprocessed_filename = subject + "ATT=" + stimulus_attended + "UNATT=" + stimulus_unattended
    # print(eeg_preprocessed_filename)
    # stimuli_train, stimuli_test, eeg_train, eeg_test = train_test_split(resampled_envelope[:len(downsampled_eeg)], downsampled_eeg, test_size=0.2, random_state = 10)

#     attended_train, attended_val = np.split(attended_preprocessed, [int(0.8 * len(attended_preprocessed))])
#     unattended_train, unattended_val = np.split(unattended_preprocessed, [int(0.8 * len(unattended_preprocessed))])
#     eeg_train, eeg_val = np.split(eeg_preprocessed, [int(0.8 * len(eeg_preprocessed))])
    
    
#     np.save("../data/validate_cnn/"+stimulus_attended+ ".npy", attended_val)
#     np.save("../data/validate_cnn/"+stimulus_unattended+ ".npy", unattended_val)
#     np.save("../data/validate_cnn/"+ filename + ".npy", eeg_test)

In [None]:
for filename in os.listdir("../data/eeg/"):
#     print(f"PROCESSING FILE {filename}")
    try:
        preprocess(filename)
        print(f"PREPROCESSED ALL OF {filename}")
    except:
        print(f"COULD NOT PREPROCESS ALL OF {filename}")
#     print(filename)

PROCESSING FILE sub-008_-_audiobook_6_1.npz
Attended stimulus: audiobook_6_1
Unattended stimulus: audiobook_5_3
Sampling rate: 128 Hz
Subject: sub-008

=> PROCESSING EEG DATA
=> DOWNSAMPLING EEG DATA FROM 128Hz TO 64Hz
=> EEG PREPROCESSED DATA SHAPE: (53440, 64)
=> DONE

=> RETRIEVING AUDIO DATA OF ATTENDED STIMULUS
File name: audiobook_6_1.wav
Audio signal length: 36761165
Sampling rate: 44100 Hz

ATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

=> RETRIEVING AUDIO DATA OF UNATTENDED STIMULUS
File name: audiobook_5_3.wav
Audio signal length: 42025682
Sampling rate: 44100 Hz

UNATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

PROCESSING FILE sub-004_-_audiobook_3.npz
Attended stimulus: audiobook_3
Unattended stimulus: audiobook_1
Sampling rate: 128 Hz
Subject: sub-004

=> PROCESSING EEG DATA
=> DOWNSAMPLING EEG DATA FROM 128Hz TO 64Hz
=> EEG PREPROCESSED DATA SHAPE: (60096, 64)
=> DONE

=> RETRIEVING AUDIO DATA OF ATTENDED STIMULUS
File name: audiobook_3.wav
Audio signal length: 4244

=> DOWNSAMPLING EEG DATA FROM 128Hz TO 64Hz
=> EEG PREPROCESSED DATA SHAPE: (53440, 64)
=> DONE

=> RETRIEVING AUDIO DATA OF ATTENDED STIMULUS
File name: audiobook_6_1.wav
Audio signal length: 36761165
Sampling rate: 44100 Hz

ATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

=> RETRIEVING AUDIO DATA OF UNATTENDED STIMULUS
File name: audiobook_5_3.wav
Audio signal length: 42025682
Sampling rate: 44100 Hz

UNATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

PROCESSING FILE sub-017_-_audiobook_2_1.npz
Attended stimulus: audiobook_2_1
Unattended stimulus: audiobook_5_1
Sampling rate: 128 Hz
Subject: sub-017

=> PROCESSING EEG DATA
=> DOWNSAMPLING EEG DATA FROM 128Hz TO 64Hz
=> EEG PREPROCESSED DATA SHAPE: (50752, 64)
=> DONE

=> RETRIEVING AUDIO DATA OF ATTENDED STIMULUS
File name: audiobook_2_1.wav
Audio signal length: 34879241
Sampling rate: 44100 Hz

ATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

=> RETRIEVING AUDIO DATA OF UNATTENDED STIMULUS
File name: audiobook_5_1.wav
Audio sign

  audio_signal, sr = lb.load(filepath, sr=44100)


File name: audiobook_6_1.wav
Audio signal length: 36761165
Sampling rate: 44100 Hz

ATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

=> RETRIEVING AUDIO DATA OF UNATTENDED STIMULUS
COULD NOT PREPROCESS ALL OF sub-009_-_audiobook_6_1.npz
PROCESSING FILE sub-018_-_audiobook_5_2.npz
Attended stimulus: audiobook_5_2
Unattended stimulus: audiobook_1
Sampling rate: 128 Hz
Subject: sub-018

=> PROCESSING EEG DATA
=> DOWNSAMPLING EEG DATA FROM 128Hz TO 64Hz
=> EEG PREPROCESSED DATA SHAPE: (58368, 64)
=> DONE

=> RETRIEVING AUDIO DATA OF ATTENDED STIMULUS
File name: audiobook_5_2.wav
Audio signal length: 40116309
Sampling rate: 44100 Hz

ATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

=> RETRIEVING AUDIO DATA OF UNATTENDED STIMULUS
File name: audiobook_1.wav
Audio signal length: 41314177
Sampling rate: 44100 Hz

UNATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

PROCESSING FILE sub-016_-_audiobook_2_1.npz
Attended stimulus: audiobook_2_1
Unattended stimulus: audiobook_5_1
Sampling rate: 128

=> DOWNSAMPLING EEG DATA FROM 128Hz TO 64Hz
=> EEG PREPROCESSED DATA SHAPE: (53440, 64)
=> DONE

=> RETRIEVING AUDIO DATA OF ATTENDED STIMULUS
File name: audiobook_6_1.wav
Audio signal length: 36761165
Sampling rate: 44100 Hz

ATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

=> RETRIEVING AUDIO DATA OF UNATTENDED STIMULUS
File name: audiobook_5_1.wav
Audio signal length: 39959390
Sampling rate: 44100 Hz

UNATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

PROCESSING FILE sub-018_-_audiobook_1.npz
Attended stimulus: audiobook_1
Unattended stimulus: audiobook_5_3
Sampling rate: 128 Hz
Subject: sub-018

=> PROCESSING EEG DATA
=> DOWNSAMPLING EEG DATA FROM 128Hz TO 64Hz
=> EEG PREPROCESSED DATA SHAPE: (60096, 64)
=> DONE

=> RETRIEVING AUDIO DATA OF ATTENDED STIMULUS
File name: audiobook_1.wav
Audio signal length: 41314177
Sampling rate: 44100 Hz

ATTENDED AUDIO DATA ALREADY PREPROCESSED
=> DONE

=> RETRIEVING AUDIO DATA OF UNATTENDED STIMULUS
File name: audiobook_5_3.wav
Audio signal len