In [21]:
textgrid_dir = "/Users/joregan/Playing/textgrids_shared"
timecode_dir = "/Users/joregan/timecode_cut"
output_dir = "/tmp/textgrid_cut"

In [22]:
from pathlib import Path

textgrid_path = Path(textgrid_dir)
timecode_path = Path(timecode_dir)
output_path = Path(output_dir)

In [23]:
def get_timecode_offsets(filename):
    if type(filename) == Path:
        filename = str(filename)

    with open(filename) as inf:
        lines = [l.strip() for l in inf.readlines()]
        assert lines[0] == ",Frame,Time (Seconds),TimeCode", f"CSV file ({filename}) seems to be incorrect"
        p_start = lines[1].split(",")
        start = float(p_start[2])
        p_end = lines[-1].split(",")
        end = float(p_end[2])
        return start, end

In [40]:
from praatio import textgrid
from praatio.utilities.constants import Interval

def modify_textgrid(filename, start, end, output=None):
    if type(filename) == Path:
        filename = str(filename)
    if not output:
        output = filename
    elif type(output) == Path:
        output = str(output)

    tg = textgrid.openTextgrid(filename, True)
    new_tg = textgrid.Textgrid()
    for tiername in tg.tierNames:
        tier = tg.getTier(tiername)
        new_entries = []
        for entry in tier.entries:
            e_start = entry[0]
            e_end = entry[1]
            e_text = entry[2]

            new_start = e_start - start
            new_end = e_end - start

            if new_end < 0.0:
                continue
            elif e_start >= end and e_end > end:
                continue
            elif new_start < 0.0 and new_end > 0.0:
                if e_text != "":
                    print("Warning: truncating entry", filename, tiername, entry)
                if new_entries == []:
                    new_entries.append(Interval(0.0, new_end, e_text))
                else:
                    print("Shouldn't have existing entries!!", entry)
                    new_entries.append(Interval(0.0, new_end, e_text))
            elif e_start >= start and e_end <= end:
                new_entries.append(Interval(new_start, new_end, e_text))
            elif e_start <= end and e_end > end:
                if e_text != "":
                    print("Warning: truncating entry", filename, tiername, entry)
                new_entries.append(Interval(new_start, new_end, e_text))
            else:
                print("There should be no default case", entry)

        tier_start = new_entries[0][0]
        tier_end = new_entries[-1][1]
        new_tier = textgrid.IntervalTier(tiername, new_entries, tier_start, tier_end)
        new_tg.addTier(new_tier)

    new_tg.save(output, format="long_textgrid", includeBlankSpaces=True)

In [41]:
if not output_path.is_dir():
    output_path.mkdir()

for tgfile in textgrid_path.glob("*.[Tt]ext[Gg]rid"):
    base_stem = tgfile.stem
    stem = base_stem
    if "_inter" in stem:
        stem = stem.replace("_inter", "")
    else:
        stem = stem.replace("_main", "")

    csv_file = timecode_path / f"{stem}.csv"

    if not csv_file.exists():
        print("No timecode for", tgfile)
        continue

    start, end = get_timecode_offsets(csv_file)
    output_file = output_path / f"{base_stem}.TextGrid"
    modify_textgrid(tgfile, start, end, output_file)


Textgrid has a max timestamp of (477.8844) but tier has (477.84439999999995)
Maximum timestamp in Textgrid changed from (762.8836) to (762.9205)
Textgrid has a max timestamp of (762.9205) but tier has (762.8836)
Textgrid has a max timestamp of (762.9205) but tier has (762.8836)
Maximum timestamp in Textgrid changed from (342.37899999999996) to (342.4133)
Textgrid has a max timestamp of (342.4133) but tier has (342.37899999999996)
Textgrid has a max timestamp of (342.4133) but tier has (342.37899999999996)
Textgrid has a max timestamp of (556.9694) but tier has (556.8931)
Textgrid has a max timestamp of (326.3542) but tier has (326.2242)
Maximum timestamp in Textgrid changed from (326.3109) to (326.6203)
Textgrid has a max timestamp of (326.6203) but tier has (326.3109)
Textgrid has a max timestamp of (326.6203) but tier has (326.28090000000003)
Textgrid has a max timestamp of (475.61699999999996) but tier has (475.116)
Textgrid has a max timestamp of (402.2441) but tier has (402.2041)
