In [None]:
# loads and parses corrected vad files to determine chunks (ipus) and
# turns and inserts them in the database; also extracts ipu audio from
# full session audio and concatenates ipu audio to turns, replacing
# gaps (from pauses, laughter etc.) by silence
# and computes auxiliary tables "chunk_pairs" and "halfway_points"

In [1]:
import sys
sys.path.append('../python/')
import cfg
import db
import fio

In [2]:
def get_chu_intervals(ses_id, mch_pair):
    ''' loads and merges intervals for given interaction '''
    ((_, _, wav_fname1), (mch_id2, _, wav_fname2)) = mch_pair
    vad_path1, vad_fname1 = fio.get_vad_pfn(wav_fname1, 'corrected/')
    intervals1 = fio.read_textgrid_file(vad_path1, vad_fname1)
    intervals1 = [i + ['A'] for i in intervals1]
    if mch_id2 != 0:
        vad_path2, vad_fname2 = fio.get_vad_pfn(
            wav_fname2, 'corrected/')
        intervals2 = fio.read_textgrid_file(vad_path2, vad_fname2)
    else:
        intervals2 = fio.read_woz_msg_file(ses_id)
    intervals2 = [i + ['B'] for i in intervals2]
    # merge the intervals by starting timestamps
    return sorted(intervals1 + intervals2)


def find_interval(intervals, ts):
    ''' returns index of interval which contains given timestamp '''
    return min([i for i, (s, e) in enumerate(intervals) if e > ts])


db.connect(cfg.CORPUS_ID_BMIC)

# global ids for turns and chunks (one tur_id per speaker, see below)
# (these should be set to the highest id currently in the database
#  to continue a previous run; set to 0 if the database is empty)
tur_ids = [10559, 10559]
chu_id = 27286

# iterate over sessions of status 3 (vad corrected)
for grp_id, ses_id, ses_type, rnd, mch_pair \
in db.find_sessions(3):
    chu_intervals = get_chu_intervals(ses_id, mch_pair)
    tsk_intervals = fio.read_tsk_interval_file(ses_id)
    # lists to track end timestamps and turns/chunks per speaker;
    # lookup of "other speaker" is easier with list than with dict
    ends = [0.0, 0.0]
    tur_cnts = [0, 0]
    chu_cnts = [0, 0]
    # create a chunk for each non-silent interval and turns as needed
    for start, end, text, a_or_b in chu_intervals:
        task_index = find_interval(tsk_intervals, start) + 1
        tsk_id = db.get_tsk_id(ses_id, task_index)
        role = db.get_role(tsk_id, a_or_b)
        idx = 0 if a_or_b == 'A' else 1
        if text != 'silent':
            # new chunk, check whether new turn
            if ends[1-idx] > ends[idx] \
            or tur_cnts[1-idx] > tur_cnts[idx] \
            or tur_cnts[idx] == 0:
                # new turn, update index and count
                tur_cnts[idx] = max(tur_cnts) + 1
                tur_ids[idx] = max(tur_ids) + 1
                chu_cnts[idx] = 1
                # initialize turn wav/txt file
                wav_or_txt = 'wav' if text == 'sounding' else 'txt'
                fio.init_tur_file(ses_id, tur_cnts[idx], wav_or_txt)
            else:
                # continuation of old turn
                chu_cnts[idx] += 1
                if text == 'sounding':
                    # append silence to existing turn wav file
                    fio.append_silence(
                        *fio.get_tur_pfn(ses_id, tur_cnts[idx]), 
                        start - ends[idx])
                else: # text is a woz message
                    # append newline to existing turn txt file
                    fio.append_newline(ses_id, tur_cnts[idx])
            ends[idx] = end

            if chu_cnts[idx] == 1:
                # first chunk in turn; insert turn first
                db.ins_tur(tur_ids[idx], tsk_id, tur_cnts[idx], role)
            chu_id += 1
            words = None if text == 'sounding' else text
            db.ins_chu(chu_id, tur_ids[idx], chu_cnts[idx], 
                       start, end, words)
            # append audio/text to turn file
            if text == 'sounding':
                fio.append_chunk_audio(
                    ses_id, tur_cnts[idx], 
                    mch_pair[idx][1], mch_pair[idx][2], start, end)
            else:
                fio.append_line(ses_id, tur_cnts[idx], words)
        # task_index_prev = task_index
    fio.write_tur_list(ses_id)
db.set_turn_indices()
db.commit()
db.close()

In [3]:
# for each turn, annotators are given the option to listen to samples of
# preceding and/or subsequent speech by the same speaker for comparison;
# for the first turn, the subsequent speech sample is 30s long, all 
# other samples are 3s long (excluding pauses between ipus); this code
# produces the wav files of these samples for all turns with a duration
# of at least 1s (only those are annotated)

def get_sample_interval(ses_id, i, delta, tur_cnt, thresh):
    # finds an interval of turns (excl. i, j) from given turn i with
    # >= thresh of audio from same speaker; delta to move fwd or bwd
    dur_sum = 0.0
    a_or_b_i = db.get_tur_spk(ses_id, i)[0]
    j = i + delta
    while j > 0 and j < tur_cnt+1 and (dur_sum < thresh):
        dur, a_or_b_j = db.get_tur_duration(ses_id, j)
        if a_or_b_i == a_or_b_j:
            dur_sum += dur
        j += delta
    return j

db.connect(cfg.CORPUS_ID_BMIC)
for _, ses_id, _, _, mch_pair in db.find_sessions(3):
    tur_cnt = db.get_tur_cnt(ses_id)
    intro_done = {'A': False, 'B': False}
    for i in range(1, tur_cnt+1):
        a_or_b, spk_id = db.get_tur_spk(ses_id, i)
        if spk_id != 0 and db.get_tur_duration(ses_id, i)[0] >= 1.0:
            # this is a turn that needs to be annotated, compute samples
            if i < tur_cnt:
                # not the last turn, forward interval needed
                if not intro_done[a_or_b]:
                    j = get_sample_interval(
                        ses_id, i, +1, tur_cnt, 30.0)
                    intro_done[a_or_b] = True;
                else:
                    j = get_sample_interval(
                        ses_id, i, +1, tur_cnt, 3.0)
                fio.concat_turns(ses_id, i, i, j, '_next')
            if i > 1:
                # not the first turn, backward interval needed
                j = get_sample_interval(
                    ses_id, i, -1, tur_cnt, 3.0)
                fio.concat_turns(ses_id, i, j, i, '_prev')
db.close()