### This code functionalized the spread sheet generation for the gorilla experiment for both (A/P) & (S/D)
- Instruction
- Practice 
- Actual trials (block based)

### Import library


In [60]:
import random
import os
from os import listdir
import pandas as pd
from pydub import AudioSegment
import itertools
from pydub.playback import play
import numpy as np

### Functions

In [61]:
def create_pitch_word_matrix(stim_dir, speaker, pMedian, syllables):
    '''
    stim_dir: directory of monotonized word/syllable semitone
    speakers: keyword in the name to differentiate the speaker
    syllables: all the syllable/word existed
    '''
    # create speaker and syllable lists
    speaker_wavs = []
    speaker_wavs_names = []
    speaker_wavs = [AudioSegment.from_wav(stim_dir + f) for f in listdir(stim_dir)
                        if speaker and pMedian in f]
    speaker_wavs_names = [f for f in listdir(stim_dir)
                            if speaker and pMedian in f]
    
    # create word/syllable matrix
    matrix = np.zeros((len(syllables), len(syllables)), dtype=object)
    matrix_names = np.zeros((len(syllables), len(syllables)), dtype=object)
    for i in range(len(syllables)):
        for j in range(len(syllables)):
            matrix[i, j] = speaker_wavs[len(syllables)*i + j]
            matrix_names[i, j] = speaker_wavs_names[len(syllables)*i + j]

    return matrix, matrix_names

In [62]:
def gen_trial_from_filenames(seq1, seq2, iItemi, iSeqi, seq_length):
    """
    :param seq1:        sound sequence 1 
    :param seq2:        sound sequence 2
    :param iItemi:      the pause between every item in the sequence
    :param iSeqi:       the pause between each sequence
    :param seq_length:  the length of the sequence 1, 2
    :return:            the trial sequence 
    """
    pause_sound = AudioSegment.silent(duration=iItemi)
    pause_btw_seq_sound = AudioSegment.silent(duration=iSeqi)

    sequence_1 = seq1[0]
    sequence_2 = seq2[0]

    for i in range(1, seq_length):
        sequence_1 = sequence_1 + pause_sound + seq1[i]
    for i in range(1, len(seq2)):
        sequence_2 = sequence_2 + pause_sound + seq2[i]

    trial = sequence_1 + pause_btw_seq_sound + sequence_2

    return trial

In [63]:
# Define the sequence_generation for fixed 2nd dimension
# Change the first sequence to
# The sequence length have to be smaller than 9, otherwise np.setdiff1d will return empty array
def generate_first_sequence(attend_cond, seq_length, file_matrix, filename_matrix):
    if attend_cond == 'pitch':
        pitch_list = np.random.choice(np.arange(9), size=seq_length, replace=False)
        word_list = np.full(seq_length, np.random.randint(0, 9))
    elif attend_cond == 'word':
        word_list = np.random.choice(np.arange(9), size=seq_length, replace=False)
        pitch_list = np.full(seq_length, np.random.randint(0, 9))
    else:
        raise Exception('Attend condition inputted is not valid')

    items = [(word_list[i], pitch_list[i]) for i in range(0, seq_length)]

    seq_1 = np.zeros((seq_length,), dtype=object)
    seq_1_names = np.zeros((seq_length,), dtype=object)
    for i in range(seq_length):
        seq_1[i] = file_matrix[items[i]]
        seq_1_names[i] = filename_matrix[items[i]]

    return seq_1, seq_1_names, pitch_list, word_list

def generate_same_trial(seq_1, seq_1_names):
    seq_2 = seq_1.copy()
    seq_2_names = seq_1_names
    switch_pos = None

    return seq_2, seq_2_names, switch_pos

def generate_different_trial(seq_length, attend_cond,pitch_list, word_list, file_matrix, filename_matrix, serial):
    switch_pos = serial
    seq_2 = np.zeros((seq_length,), dtype=object)
    seq_2_names = np.zeros((seq_length,), dtype=object)

    if attend_cond == 'pitch':
        #print('pitch')
        # swap adjacent pitch indices in pitch list
        pitch_list_diff = pitch_list.copy()
        pitch_list_diff[switch_pos], pitch_list_diff[switch_pos + 1] = \
            pitch_list_diff[switch_pos + 1], pitch_list_diff[switch_pos]
        #Unchanged in word_list
        items_seq_diff = [(word_list[i], pitch_list_diff[i]) for i in range(0, seq_length)]

    elif attend_cond == 'word':
        #print('word')
        # swap adjacent word indices
        word_list_diff = word_list.copy()
        word_list_diff[switch_pos], word_list_diff[switch_pos + 1] = \
            word_list_diff[switch_pos + 1], word_list_diff[switch_pos]
        #keep the pitches same
        items_seq_diff = [(word_list_diff[i], pitch_list[i]) for i in range(0, seq_length)]

    else:
        raise Exception('Attend condition inputted is not valid')

    #Assignt the value for 2nd sequence    
    for i in range(seq_length):
        seq_2[i] = file_matrix[items_seq_diff[i]]
        seq_2_names[i] = filename_matrix[items_seq_diff[i]]

    #sanity check
    #print('seq1 names', seq_1_names)
    #print('seq2 names', seq_2_names)

    return seq_2, seq_2_names, switch_pos

def generate_present_trial(seq_1, seq_1_names, attend_cond, serial):
    switch_pos = serial
    #randomized dimension -- depends on the attending dimension
    if attend_cond == 'pitch' or attend_cond == 'word':
        seq_2 = [seq_1[switch_pos]]
        seq_2_names = seq_1_names[switch_pos]
    else:
        raise Exception("attend condition is inputted incorrectly")

    return seq_2, seq_2_names, switch_pos

def generate_absent_trial(attend_cond, serial, word_list, pitch_list, file_matrix, filename_matrix):
    switch_pos = serial
    if attend_cond == 'pitch':  #change the pitch, keep the words the same
        pitch2use = np.random.choice(np.setdiff1d(np.arange(9), pitch_list))
        seq_2 = [file_matrix[(word_list[switch_pos], pitch2use)]]
        seq_2_names = filename_matrix[(word_list[switch_pos], pitch2use)]
    elif attend_cond == 'word': #change the word, keep the pitches the same
        word2use = np.random.choice(np.setdiff1d(np.arange(9), pitch_list))
        seq_2 = [file_matrix[(word2use, pitch_list[switch_pos])]]
        seq_2_names = filename_matrix[(word2use, pitch_list[switch_pos])]
    else:
        raise Exception("attend condition is inputted incorrectly")

    return seq_2, seq_2_names, switch_pos

def generate_sequence_fixed(dir, file_matrix, seq_length, trial_type, pause_item, pause_seq,
                      attend_cond, filename_matrix, name, serial):
    """
    :param dir:             the saving wave file directory
    :param file_matrix:     the matrix with (Row(words) by Column(pitches))
    :param seq_length:      length of the sequence generated 
    :param trial_type:      one of 4 cases same/different(word/pitch) present/abscent(word/pitch)
    :param pause_item:      the pause between every item in the sequence
    :param pause_seq:       the pause between each sequence
    :param attend_cond:     attend to the "pitch" or "word"
    :param filename_matrix: the matrix with names of (Row(words) by Column(pitches))
    :param name:            name the sequence after the trail type
    :param serial:          target index 0-8 (1-9) 
    :return:
    """
    # Construct the 1st sequence
    seq_1, seq_1_names, pitch_list, word_list = generate_first_sequence(attend_cond, seq_length, file_matrix, filename_matrix)
     

    if trial_type == 'same':
        seq_2, seq_2_names, switch_pos = generate_same_trial(seq_1, seq_1_names)
    elif trial_type == 'different':
        seq_2, seq_2_names, switch_pos = generate_different_trial(seq_length, attend_cond, pitch_list, word_list, file_matrix, filename_matrix, serial)
    elif trial_type == 'present':
        seq_2, seq_2_names, switch_pos = generate_present_trial(seq_1, seq_1_names, attend_cond, serial)
    elif trial_type == 'absent':
        seq_2, seq_2_names, switch_pos = generate_absent_trial(attend_cond, serial, word_list, pitch_list, file_matrix, filename_matrix)
    else:
        raise Exception("Trial type is incorrect")

    # call function that creates the sequence using pydub
    trial = gen_trial_from_filenames(seq_1, seq_2, pause_item, pause_seq, seq_length)
    trial.export(dir + name, format="wav")

    return trial, seq_1_names, seq_2_names, switch_pos


In [64]:
def initialize_stimuli_data():
    stimuli_data = pd.DataFrame({'randomise_blocks': [None],
                                 'randomise_trials': [None],
                                 'display': [None],
                                 'Audio_Filename': [None],
                                 'Same_Different': [None],
                                 'Absent_Present': [None],
                                 'text': [None],
                                 'embedded': [None],
                                 'Attend_Condition': [None],
                                 'Length': [None],
                                 'Pause': [None],
                                 'Sequence Pause': [None],
                                 'Sequence_Names': [None],
                                 'Switch_Names': [None],
                                 'Stim_Ind': [None],
                                 'Non_attend_dim':[None]})
    return stimuli_data

def generate_same_different_task_data(num, length, pause, pause_seq_len, att_cond, audio, audio_names, save_dir,
                                      name_start, block_num):
    stimuli_data = initialize_stimuli_data()

    for i in range(num):
        # generate num calls to generate_audio; each call generates a same and different
        #   audio file
        for j in range(0, length - 1):
            same_name = f'Block_{block_num}_' + name_start + f'_same_{att_cond}_{length}_serial_{j}_stim' + str(i+1) + 'fixed.wav'
            diff_name = f'Block_{block_num}_' + name_start + f'_diff_{att_cond}_{length}_serial_{j}_stim' + str(i+1) + 'fixed.wav'

            same, first_seq_names, second_seq_names, switch_inds_same = generate_sequence(dir = save_dir,
                                                                                            file_matrix=audio,
                                                                                            seq_length=length,
                                                                                            trial_type='same',
                                                                                            pause_item=pause,
                                                                                            pause_seq=pause_seq_len,
                                                                                            attend_cond=att_cond,
                                                                                            filename_matrix=audio_names,
                                                                                            name=same_name,
                                                                                            serial=j)

            diff, first_seq_names_diff, second_seq_names_diff, switch_inds_diff = generate_sequence(dir = save_dir,
                                                                                            file_matrix=audio,
                                                                                            seq_length=length,
                                                                                            trial_type='different',
                                                                                            pause_item=pause,
                                                                                            pause_seq=pause_seq_len,
                                                                                            attend_cond=att_cond,
                                                                                            filename_matrix=audio_names,
                                                                                            name=diff_name,
                                                                                            serial=j)

            # create temporary dataframe for each audio file with parameters
            #same task
            temp_df_1 = pd.DataFrame({'randomise_blocks': [None],
                                        'randomise_trials': 1,
                                        'display': 'task-sd',
                                        'Audio_Filename': same_name,
                                        'Same_Different': 'same',
                                        'Absent_Present':[None],
                                        'text': [None],
                                        'embedded':[None],
                                        'Attend_Condition': '### '+ att_cond,
                                        'Length': length,
                                        'Pause': pause,
                                        'Sequence Pause': pause_seq_len,
                                        'Sequence_Names': str(first_seq_names),
                                        #'Second_Sequence_Names': str(second_seq_names),
                                        'Switch_Names':  str(second_seq_names),
                                        'Stim_Ind' : [None],
                                        'Non_attend_dim':'fixed'
                                        }, index=[0])

            #different task
            temp_df_2 = pd.DataFrame({'randomise_blocks': [None],
                                        'randomise_trials': 1,
                                        'display': 'task-sd',
                                        'Audio_Filename': diff_name,
                                        'Same_Different': 'different',
                                        'Absent_Present':[None],
                                        'text': [None],
                                        'embedded':[None],
                                        'Attend_Condition': '### '+ att_cond,
                                        'Length': length,
                                        'Pause': pause,
                                        'Sequence Pause': pause_seq_len,
                                        'Sequence_Names': str(first_seq_names_diff),
                                        #'Second_Sequence_Names': str(second_seq_names_diff),
                                        'Switch_Names': str(second_seq_names_diff),
                                        'Stim_Ind' : switch_inds_diff,
                                        'Non_attend_dim':'fixed'
                                        }, index=[0])

            # append to participant dataframe
            stimuli_data = pd.concat([stimuli_data, temp_df_1], ignore_index=True)
            stimuli_data = pd.concat([stimuli_data, temp_df_2], ignore_index=True)

    return stimuli_data


def generate_absent_present_task_data(num, length, pause, pause_seq_len, att_cond, audio, audio_names, save_dir,
                                      name_start, block_num):
    stimuli_data = initialize_stimuli_data()

    for i in range(num):
        for j in range(0, length):
            absent_name = f'Block_{block_num}_' + name_start + f'_absent_{att_cond}_{length}_serial_{j}_' + str(i+1) + 'fixed.wav'
            present_name = f'Block_{block_num}_' + name_start + f'_present_{att_cond}_{length}_serial_{j}_' + str(i+1) + 'fixed.wav'

            absent, seq_names_absent, absent_stim_name, switch_inds_absent = generate_sequence(dir = save_dir,
                                                                                                file_matrix=audio,
                                                                                                seq_length=length,
                                                                                                trial_type='absent',
                                                                                                pause_item=pause,
                                                                                                pause_seq=pause_seq_len,
                                                                                                attend_cond=att_cond,
                                                                                                filename_matrix=audio_names,
                                                                                                name=absent_name,
                                                                                                serial=j)

            present, seq_names_present, present_stim_name, present_stim_ind = generate_sequence(dir = save_dir,
                                                                                                file_matrix=audio,
                                                                                                seq_length=length,
                                                                                                trial_type='present',
                                                                                                pause_item=pause,
                                                                                                pause_seq=pause_seq_len,
                                                                                                attend_cond=att_cond,
                                                                                                filename_matrix=audio_names,
                                                                                                name=present_name,
                                                                                                serial=j)

            temp_df_1 = pd.DataFrame({'randomise_blocks': [None],
                                        'randomise_trials': 1,
                                        'display': 'task-ap',
                                        'Audio_Filename': absent_name,
                                        'Same_Different':[None],
                                        'Absent_Present': 'absent',
                                        'text': [None],
                                        'embedded':[None],
                                        'Attend_Condition':'### '+ att_cond,
                                        'Length': length,
                                        'Pause': pause,
                                        'Sequence Pause': pause_seq_len,
                                        'Sequence_Names': str(seq_names_absent),
                                        'Switch_Names': absent_stim_name,
                                        'Stim_Ind': j,
                                        'Non_attend_dim':'fixed'
                                        }, index=[0])

            temp_df_2 = pd.DataFrame({'randomise_blocks': [None],
                                        'randomise_trials': 1,
                                        'display': 'task-ap',
                                        'Audio_Filename': present_name,
                                        'Same_Different':[None],
                                        'Absent_Present': 'present',
                                        'text': [None],
                                        'embedded':[None],
                                        'Attend_Condition':'### '+ att_cond,
                                        'Length': length,
                                        'Pause': pause,
                                        'Sequence Pause': pause_seq_len,
                                        'Sequence_Names': str(seq_names_present),
                                        'Switch_Names': present_stim_name,
                                        'Stim_Ind': j,
                                        'Non_attend_dim':'fixed'
                                        }, index=[0])

            # append to participant dataframe
            stimuli_data = pd.concat([stimuli_data, temp_df_1], ignore_index=True)
            stimuli_data = pd.concat([stimuli_data, temp_df_2], ignore_index=True)

    return stimuli_data


def simplified_spreadsheet_word_pitches(block_num, task_type, num, audio, audio_names, name_start, length, pause,
                                        pause_seq_len, aud_type, save_dir_str):
    attend_conditions = ['word', 'pitch']

    for att_cond in attend_conditions:
        if task_type == 'sd':
            save_dir = save_dir_str + 'task_type_' + task_type + '/Block_' + str(block_num) + '_' + att_cond + '/len' + str(length) + '-fixed/'
            if not os.path.isdir(save_dir):
                os.makedirs(save_dir)

            stimuli_data = generate_same_different_task_data(num, length, pause, pause_seq_len, att_cond, audio,
                                                             audio_names, save_dir, name_start, block_num)
               
        elif task_type == 'ap':
            save_dir = save_dir_str + 'task_type_' + task_type + '/Block_' + str(block_num) + '_' + att_cond + '/len' + str(length) + '-fixed/'
            if not os.path.isdir(save_dir):
                os.makedirs(save_dir)

            stimuli_data = generate_absent_present_task_data(num, length, pause, pause_seq_len, att_cond, audio,
                                                              audio_names, save_dir, name_start, block_num)

        else:
            raise Exception("Task type is incorrect") 
        
        stimuli_data.to_excel(save_dir + 'Block_' + str(block_num) + '_' + task_type + '_' + name_start + '_' + att_cond + '_len' + str(
                            length) + '.xlsx',index=False)

### Import Data \& pitch-word matrix for monotonized 4 semitone shifts



In [65]:
#********* Directory of monotonized word/syllable semitone *****************
#syllabes stim with 3 semitone dif
#stim_dir = "E:/cmu/lab project/stimuli_sets_new/Monotonized 3 semitone shifts/"
#syllabes stim with 4 semitone dif
stim_dir = "E:/cmu/lab project/syllable stimuli/Monotonized 4 semitone shifts/"
#syllabes stim with 5 semitone dif
#stim_dir = "E:/cmu/lab project/stimuli_sets_new/Monotonized 5 semitone shifts/"

# create pitch-word matrix for monotonized 4 semitone shifts
speaker = ["AI", "AP"]
pitch_median = ["200", "100"]
syllables = ["ba", "bee", "boo", "da", "dee", "doo", "ga", "gee", "goo"]

female1_matrix, female1_matrix_names = create_pitch_word_matrix(stim_dir, speaker[0], pitch_median[0], syllables)
male1_matrix, male1_matrix_names = create_pitch_word_matrix(stim_dir, speaker[1],  pitch_median[1], syllables)

### Sequence Generation 

In [66]:
n_stim_per_cond = 1

# save_dir_v4 = "E:/cmu/lab project/stimuli generation/stimuli_v4/"
# #PLEASE delete the existing directory called save_dir_v2 before running the code
# if not os.path.isdir(save_dir_v4):
#     os.makedirs(save_dir_v4)

#Generate num_block blocks for num_length start at length 4(included) 
sequenceInterval = 2000
num_block = 5
length = [6]
interval = [10]

for i in interval:
    save_dir_v3 = "E:/cmu/lab project/stimuli generation/V4-syllable-4-semitone-interval-" + str(i)+"/"
    for block in range (1,1+num_block):
        print('block number:', block)
        for l in length:
            print('length:', l)
            #P/A task male and female
            #simplified_spreadsheet_word_pitches(block_num, task_type, num, audio, audio_names, name_start, length, pause, pause_seq_len, aud_type, save_dir_str)
            simplified_spreadsheet_word_pitches(block,'ap', n_stim_per_cond, male1_matrix, male1_matrix_names, 'male_1speaker', l, i, sequenceInterval, "single", save_dir_v3)
            simplified_spreadsheet_word_pitches(block,'ap', n_stim_per_cond, female1_matrix, female1_matrix_names, 'female_1speaker', l, i, sequenceInterval, "single", save_dir_v3)

            #S/D task male and female
            #simplified_spreadsheet_word_pitches(block_num, task_type, num, audio, audio_names, name_start, length, pause, pause_seq_len, aud_type, save_dir_str)
            simplified_spreadsheet_word_pitches(block,'sd', n_stim_per_cond, male1_matrix, male1_matrix_names, 'male_1speaker', l, i, sequenceInterval, "single", save_dir_v3)
            simplified_spreadsheet_word_pitches(block,'sd', n_stim_per_cond, female1_matrix, female1_matrix_names, 'female_1speaker', l, i, sequenceInterval, "single", save_dir_v3)

block number: 1
length: 6
block number: 2
length: 6
block number: 3
length: 6
block number: 4
length: 6
block number: 5
length: 6
