In [37]:
input = "/Users/joregan/Playing/hsi_google/"
output = "/tmp/hsi_google"

In [50]:
import json

def convert_google_asr_file_to_textgrid_times(filename, output_words=True):
    segments = []
    words = []
    last_end = 0.0

    with open(filename) as inf:
        data = json.load(inf)
    assert "results" in data, "no 'results' list, this is maybe not from Google ASR"
    for result in data["results"]:
        if len(result["alternatives"]) != 1:
            print("More than one alternative", result["alternatives"])
        item = result["alternatives"][0]
        if not "transcript" in item:
            continue
        text = item["transcript"]
        if not "resultEndTime" in item:
            # print(item, "missing resultEndTime")
            if "words" in item:
                if "endTime" in item["words"][-1]:
                    end_time = item["words"][-1]["endTime"]
        else:
            end_time = item["resultEndTime"]
        if end_time.endswith("s"):
            end_time = end_time[:-1]
        end_time = float(end_time)
        segments.append((last_end, end_time, text))
        last_end = end_time

        if output_words and "words" in item:
            for word in item["words"]:
                start = word["startTime"]
                if start[-1] == "s":
                    start = float(start[:-1])
                start = float(start)
                end = word["endTime"]
                if end[-1] == "s":
                    end = end[:-1]
                end = float(end)
                if start == end:
                    end += 0.01
                word_text = word["word"]
                words.append((start, end, word_text))

    return segments, words

In [47]:
def pad_with_silences(items):
    last = 0.0
    output = []
    for item in items:
        if item[0] > last:
            output.append((last, item[0], ""))
            output.append(item)
            last = item[1]
        elif item[0] == last:
            output.append(item)
            last = item[1]
        elif item[0] < last:
            print(f"Error: {item} starts before previous end time {last}")
            new_end = item[1]
            if new_end <= last:
                new_end += 0.01
            output.append((last, new_end, item[2]))
            last = new_end
    return output

In [45]:
from praatio import textgrid
from praatio.utilities.constants import Interval

def convert_to_textgrid(filename, outfile):
    a, b = convert_google_asr_file_to_textgrid_times(filename)
    a = pad_with_silences(a)
    b = pad_with_silences(b)

    a_start = a[0][0]
    a_end = a[-1][1]
    b_start = b[0][0]
    b_end = b[-1][1]

    a = [Interval(x[0], x[1], x[2]) for x in a]
    b = [Interval(x[0], x[1], x[2]) for x in b]

    tg = textgrid.Textgrid()
    res_tier = textgrid.IntervalTier('google_results', a, a_start, a_end)
    word_tier = textgrid.IntervalTier('google_words', b, b_start, b_end)
    tg.addTier(res_tier)
    tg.addTier(word_tier)
    tg.save(outfile, format="long_textgrid", includeBlankSpaces=False)



In [51]:
from pathlib import Path

inpath = Path(input)
outpath = Path(output)

if not inpath.is_dir():
    convert_to_textgrid(input, output)
else:
    if not outpath.is_dir():
        outpath.mkdir()
    for filename in inpath.glob("*.json"):
        print(filename)
        outfile = outpath / f"{filename.stem}.TextGrid"
        convert_to_textgrid(str(filename), outfile)

/Users/joregan/Playing/hsi_google/hsi_4_0716_210_002_inter.json
/Users/joregan/Playing/hsi_google/hsi_6_0718_209_003_inter.json
Error: (23.2, 23.3, 'out?') starts before previous end time 23.21
Error: (29.9, 30.0, 'can') starts before previous end time 29.91
Error: (31.0, 31.2, "it's") starts before previous end time 31.01
Error: (41.2, 41.6, 'contractor') starts before previous end time 41.21
Error: (42.2, 42.21, 'about.') starts before previous end time 42.21
Error: (42.2, 42.9, "Don't") starts before previous end time 42.22
Error: (48.0, 48.5, 'talk') starts before previous end time 48.01
Error: (53.2, 53.9, 'Uh') starts before previous end time 53.21
Error: (57.4, 57.41, 'go.') starts before previous end time 57.41
Error: (58.6, 58.7, 'go') starts before previous end time 58.61
Error: (61.3, 62.5, 'The') starts before previous end time 61.309999999999995
Error: (71.6, 71.9, 'watch') starts before previous end time 71.61
Error: (74.3, 74.5, 'go.') starts before previous end time 74.

UnboundLocalError: local variable 'end_time' referenced before assignment

In [34]:
pad_with_silences(a)

[(0.0, 13.8, 'Okay. But, um,'),
 (13.8,
  17.8,
  ' Yeah, so this thank you for coming. I have this new apartment but'),
 (17.8, 19.8, ' I think, and'),
 (19.8, 43.6, ' oh,'),
 (43.6, 55.4, " it's a bit cramped know."),
 (55.4, 60.2, ' Oh yeah.'),
 (60.2, 68.3, ' oh,'),
 (68.3, 71.8, ' oh, mhm. And then'),
 (71.8, 77.1, ' Mhm.'),
 (77.1, 82.3, ' No, still looking for a place for this 1 on the table.'),
 (82.3, 85.6, " The statue on the table, like I don't."),
 (85.6, 88.9, ' Mhm.'),
 (88.9, 109.4, ' Which, which 1?'),
 (109.4, 110.8, ' oh,'),
 (110.8, 115.3, ' oh,'),
 (115.3, 130.0, ' mhm.'),
 (130.0,
  137.7,
  ' Mhm. What about which? Which is the red 1 actually, okay. Yeah, I see.'),
 (137.7, 144.6, ' Mhm.'),
 (144.6, 150.7, ' Mhm.'),
 (150.7, 167.1, ' Oh yes.'),
 (167.1,
  181.6,
  " Oh, well, I think it's fine. Like, just move to the bigger objects because I really don't like this table."),
 (181.6, 184.1, ' Here, you see which 1?'),
 (184.1,
  193.7,
  " I think move it a bit. I 