### Import Library

In [36]:
import os
import numpy as np
import random
from pydub import AudioSegment, silence
import pandas as pd
import openpyxl
from pydub.playback import play
import random


### Functions: Generate the stimulus with xlsx files

In [37]:
def create_sequence(stimuli, length):
    # Randomly select elements from stimuli
    sequence = random.sample(stimuli, length)

    # Extract the names and audio segments
    names, audio_segments = zip(*sequence)

    # Combine the selected elements with 100 ms silent inter-stimulus intervals
    sequence_audio = sum([sound + AudioSegment.silent(duration=100) for sound in audio_segments])
    
    return list(names), sequence_audio


def create_present_absent_stimulus(stimuli, length, white_noise, retention_period_length = 2000):
    stimuli_dict = dict(stimuli)
    stimuli_cases = []
    for i in range(length):
        # Create the white noise and retention period
        retention_period = AudioSegment.silent(duration=retention_period_length)

        # Select present stimulus
        # Create the first sequence
        first_sequence_names, first_sequence_audio = create_sequence(stimuli, length)
        #remaining_items = [item for item in stimuli if item[0] not in first_sequence_names]
        probe = first_sequence_names[i]
        condition = "present"
        stimulus = white_noise + first_sequence_audio + retention_period + stimuli_dict[probe]
        stimuli_cases.append((stimulus, condition, i, first_sequence_names, [probe]))

        # #test
        # stimulus  = stimulus.set_frame_rate(44100)  # Correctly reassign the object
        # play(stimulus)

        # Select absent stimulus if possible
        # Create the first sequence
        first_sequence_names, first_sequence_audio = create_sequence(stimuli, length)
        remaining_items = [item for item in stimuli if item[0] not in first_sequence_names]
        if remaining_items:
            absent_item = random.choice(remaining_items)
            condition = "absent"
            stimulus = white_noise + first_sequence_audio + retention_period + stimuli_dict[absent_item[0]]
            stimuli_cases.append((stimulus, condition, i, first_sequence_names, [absent_item[0]]))

    return stimuli_cases


def create_same_different_stimulus(stimuli, length, white_noise, retention_period_length = 2000):
    stimuli_dict = dict(stimuli)
    stimuli_cases = []
    for i in range(length-1):
        # Create the white noise and retention period
        retention_period = AudioSegment.silent(duration=retention_period_length)

        # Select same stimulus
        # Create the first sequence
        first_sequence_names, first_sequence_audio = create_sequence(stimuli, length)
        condition = "same"
        stimulus = white_noise + first_sequence_audio + retention_period + first_sequence_audio
        stimuli_cases.append((stimulus, condition, i, first_sequence_names, first_sequence_names))

        # Select different stimulus
        # Create the first sequence
        first_sequence_names, first_sequence_audio = create_sequence(stimuli, length)
        probe = first_sequence_names.copy()
        probe[i], probe[i + 1] = probe[i + 1], probe[i]
        # Fix here: convert names back to audio segments
        probe_audio = sum([stimuli_dict[sound] + AudioSegment.silent(duration=100) for sound in probe])
        condition = "different"
        # Fix here: Replace 'pause' with 'white_noise'
        stimulus = white_noise + white_noise + first_sequence_audio + retention_period + probe_audio
        stimuli_cases.append((stimulus, condition, i, first_sequence_names, probe))

    return stimuli_cases


def save_stimulus(stimulus, task, condition, length, swap_position, sequence_names, switch_name, stimuli_type, block_number, stimulus_number, output_dir):
    # Map the stimuli_type to an abbreviation
    if stimuli_type == "complex_tones":
        stimuli_type_abbr = "ct"
    elif stimuli_type == "cat_mew_sounds":
        stimuli_type_abbr = "cm"
    elif stimuli_type == "dog_bark_sounds":
        stimuli_type_abbr = "dbs"
    elif stimuli_type == "everyday_sounds":
        stimuli_type_abbr = "ez"
    else:
        raise ValueError("Invalid stimuli_type: " + stimuli_type)

    # Create a filename
    filename = f"block{block_number}_{stimuli_type_abbr}_{task}_{condition}_length{length}_stim{stimulus_number}"
    if swap_position is not None:
        filename += f"_swap{swap_position}"
    filename += ".wav"

    # Create a directory for the stimulus type, task, block number and length if it doesn't exist
    dir_path = os.path.join(output_dir, task, f"block{block_number}_{stimuli_type}", f"length{length}")
    os.makedirs(dir_path, exist_ok=True)

    # #test
    # stimulus  = stimulus.set_frame_rate(44100)  # Correctly reassign the object
    # play(stimulus)
    
    # Save the stimulus
    stimulus.export(os.path.join(dir_path, filename), format="wav")

    # Create metadata for this stimulus
    metadata = {
        'randomise_blocks': block_number,
        'randomise_trials': 1,
        'display': f'task-{task.lower()}',
        'Audio_Filename': filename,
        'Same_Different': condition if condition in ["same", "different"] else '',
        'Absent_Present': condition if condition in ["present", "absent"] else '',
        'text': '',
        'embedded': '',
        'Attend_Condition': stimuli_type,
        'Length': length,
        'Pause': 100,
        'Sequence Pause': 2100,
        'Sequence_Names': ', '.join(sequence_names),  
        'Switch_Names': ', '.join(switch_name),  
        'Stim_Ind': swap_position if condition in ['different', 'present'] else '',
    }

    return metadata, dir_path

In [38]:
def generate_stimuli(sound_dir, stimuli_type, white_noise_file, output_dir, num_blocks, num_stimuli_per_condition, length):
    # Load white noise and create a 500 ms pause
    white_noise = AudioSegment.from_wav(white_noise_file)
    pause = AudioSegment.silent(duration=500)

    # Get list of all wav files in the sound directory
    sound_files = [f for f in os.listdir(sound_dir)  if not f.startswith('._') and f.endswith('.wav')]
    sounds = [(f.replace('.wav', ''), AudioSegment.from_file(os.path.join(sound_dir, f), format="wav")) for f in sound_files]
    # print(sound_files)
    # print(sounds)
    # Load and play each sound
    # for f in sound_files:
    #     sound_path = os.path.join(sound_dir, f)
    #     sound = AudioSegment.from_file(sound_path, format="wav")
    #     sound = sound.set_frame_rate(44100) 
    #     play(sound)

    all_metadata = []

    for block in range(1, num_blocks + 1):
        for stim_number in range(1, num_stimuli_per_condition + 1):
            all_metadata.extend(process_stimuli(sounds, stimuli_type, block, stim_number, length, output_dir, white_noise))

    return all_metadata

def process_stimuli(stimuli, stimuli_type, block, stim_number, length, output_dir, white_noise):
    all_metadata = []

    # Process present/absent condition
    stimuli_cases = create_present_absent_stimulus(stimuli, length, white_noise)
    for case in stimuli_cases:
        stimulus, condition, index, sequence_names, switch_name = case
        metadata, dir_path = save_stimulus(stimulus, "PA", condition, length, index, sequence_names, switch_name, stimuli_type, block, stim_number, output_dir)
        all_metadata.append(metadata)

    # Save metadata
    df = pd.DataFrame(all_metadata)
    df.to_excel(os.path.join(dir_path, f"metadata_length{length}_PA.xlsx"), index=False)

    all_metadata = []

    # Process same/different condition
    stimuli_cases = create_same_different_stimulus(stimuli, length, white_noise)
    for case in stimuli_cases:
        stimulus, condition, index, sequence_names, switch_name = case
        metadata, dir_path = save_stimulus(stimulus, "SD", condition, length, index, sequence_names, switch_name, stimuli_type, block, stim_number, output_dir)
        all_metadata.append(metadata)

    # Save metadata
    df = pd.DataFrame(all_metadata)
    df.to_excel(os.path.join(dir_path, f"metadata_length{length}_SD.xlsx"), index=False)

    return all_metadata

### Stimuli Generation

In [39]:
#----INPUT DIR----
#white_noise_file = "E:/cmu/lab project/stimuli exploration/replicated stimuli/processed stimuli/white_noise.wav"
white_noise_file = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Stimuli exploration/Separated stimuli/replicated stimuli-original study (dif category)/processed stimuli/processed_white_noise/white_noise.wav"

#complex_tones_dir = "E:/cmu/lab project/stimuli exploration/replicated stimuli/processed stimuli/processed_complex_note"
complex_tones_dir = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Stimuli exploration/Separated stimuli/replicated stimuli-original study (dif category)/processed stimuli/processed_complex_note_rename"

#everyday_sounds_dir = "E:/cmu/lab project/stimuli exploration/replicated stimuli/processed stimuli/processed_everyday_sound"
everyday_sounds_dir = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Stimuli exploration/Separated stimuli/replicated stimuli-original study (dif category)/processed stimuli/processed_everyday_sound (no dog)"

#cat_sounds_dir = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Stimuli exploration/Separated stimulus/animal-sounds (same category)/cat_sounds/5. loudness normalized cat sounds"
cat_sounds_dir = "/Volumes/T7/CMU LiMN Research/perceptual organization/Stimuli exploration/Separated stimulus/animal-sounds (same category)/cat_sounds/6. 300ms loudness normalized cat sounds"

#dog_sounds_dir = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Stimuli exploration/Separated stimulus/animal-sounds (same category)/dog_sounds/5. loudness normalized dog sounds"
dog_sounds_dir = "/Volumes/T7/CMU LiMN Research/perceptual organization/Stimuli exploration/Separated stimulus/animal-sounds (same category)/dog_sounds/6. 300ms loudness normalized dog sounds"

vocoded_ED_dir = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Stimuli exploration/vocoded stimuli/vocoded-everydaySound"
vocoded_CT_dir = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Stimuli exploration/vocoded stimuli/vocoded-complexTones"

#----OUTPUT DIR----
# len 6
#output_dir_everyday= "/Volumes/T7/CMU LiMN Research/perceptual organization/stimuli generation/len6-complexTone_everydaySound/stimuli"
# len 4 
output_dir_everyday= "/Volumes/T7/CMU LiMN Research/perceptual organization/stimuli generation/len4-complexTone_everydaySound/stimuli"

#len 6
#output_dir_cat = "/Volumes/T7/CMU LiMN Research/perceptual organization/stimuli generation/len6-complexTone_cat/stimuli"
#len 4
output_dir_cat = "/Volumes/T7/CMU LiMN Research/perceptual organization/stimuli generation/len4-complexTone_cat/stimuli"

output_dir_dog = "/Volumes/T7/CMU LiMN Research/perceptual organization/stimuli generation/len6-complexTone_dog/stimuli"

#len4 vocoded
output_dir_vocoded_CT = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Stimuli generation/len4-complexTone_vocodedCT/stimuli"

In [40]:
# the setup parameters
num_blocks =2
num_stimuli_per_condition = 1 #more than 1 will over write the xlsx which means only 1 stimuli will show up in xlsx file, because the save xlsx file is within process_stimuli function
length = 4

# #---- Complex_tone VS Everyday_sounds ----
# complex_tones_metadata = generate_stimuli(complex_tones_dir, 'complex_tones', white_noise_file, output_dir_everyday, num_blocks, num_stimuli_per_condition, length)
# everyday_sounds_metadata = generate_stimuli(everyday_sounds_dir, 'everyday_sounds', white_noise_file, output_dir_everyday, num_blocks, num_stimuli_per_condition, length)

#---- Complex_tone VS Cat_sounds ----
# complex_tones_metadata = generate_stimuli(complex_tones_dir, 'complex_tones', white_noise_file, output_dir_cat, num_blocks, num_stimuli_per_condition, length)
# cat_mew_sounds_metadata = generate_stimuli(cat_sounds_dir, 'cat_mew_sounds', white_noise_file, output_dir_cat, num_blocks, num_stimuli_per_condition, length)

# #---- Complex_tone VS Dog_sounds ----
# complex_tones_metadata = generate_stimuli(complex_tones_dir, 'complex_tones', white_noise_file, output_dir_dog, num_blocks, num_stimuli_per_condition, length)
# dog_bark_sounds_metadata = generate_stimuli(dog_sounds_dir, 'dog_bark_sounds', white_noise_file, output_dir_dog, num_blocks, num_stimuli_per_condition, length)

# #---- Complex_tone VS Vocoded Complex_tone ----
#vocoded_cat_sounds_metadata = generate_stimuli(vocoded_meow_dir, 'cat_mew_sounds', white_noise_file, output_dir_vocoded_CT, num_blocks, num_stimuli_per_condition, length)
vocoded_everyday_sounds_metadata = generate_stimuli(vocoded_ED_dir, 'everyday_sounds', white_noise_file, output_dir_vocoded_CT, num_blocks, num_stimuli_per_condition, length)
vocoded_complex_tones_metadata = generate_stimuli(vocoded_CT_dir, 'complex_tones', white_noise_file, output_dir_vocoded_CT, num_blocks, num_stimuli_per_condition, length)
