# Vosk CLI stderr output to CTM

> "Because it was quicker than looking at the API examples"

- branch: master
- comments: false
- categories: [vosk, ctm, kludge]

In [23]:
VOSKDIR = "/Users/joregan/hsi-vosk/"
GOOGDIR = "/Users/joregan/Playing/hsi_google/"

In [24]:
from pathlib import Path

vosk_path = Path(VOSKDIR)
goog_path = Path(GOOGDIR)

In [5]:
def get_recognition(filename):
    segments = []
    with open(filename) as inf:
        for line in inf.readlines():
            if line.startswith("INFO:root:{'result':"):
                text = line.strip()[10:]
                data = eval(text)
                segments.append(data)
    return segments

In [1]:
def clean_filename(filename):
    if type(filename) == str:
        filename = Path(filename).stem
    elif type(filename) == Path:
        filename = filename.stem
    return filename

In [46]:
from dataclasses import dataclass, field

def times_ap_eq(a, b, fudge=0.2):
    diff = a - b
    return abs(diff) < fudge

def norm_str_eq(a, b, punct="!.,;:?"):
    norm_a = a.strip(punct).lower()
    norm_b = b.strip(punct).lower()
    return norm_a == norm_b


@dataclass(frozen=True)
class TimedWord:
    text: str = field(compare=True)
    start: float = field(compare=True)
    end: float = field(compare=True)

    def __lt__(self, o):
        return self.start < o.start
    
    def __gt__(self, o):
        return self.end > o.end

    def __eq__(self, o):
        t = norm_str_eq(self.text, o.text)
        s = times_ap_eq(self.start, o.start)
        e = times_ap_eq(self.end, o.end)
        return t and s and e


In [17]:
def get_arrayrefs_from_vosk(rec):
    pointers = {}
    for c, data in enumerate(rec):
        for w in data["result"]:
            tw = TimedWord(w["word"], w["start"], w["end"])
            pointers[tw] = c
    return pointers

In [37]:
def get_arrayrefs_from_google(data):
    c = 0
    pointers = {}
    for result in data["results"]:
        if not "alternatives" in result:
            continue
        if not len(result["alternatives"]) == 1:
            continue
        if not "words" in result["alternatives"][0]:
            continue
        for w in result["alternatives"][0]["words"]:
            start = w["startTime"]
            end = w["endTime"]
            if start.endswith("s"):
                start = start[:-1]
            start = float(start)
            if end.endswith("s"):
                end = end[:-1]
            end = float(end)
            tw = TimedWord(w["word"], start, end)
            pointers[tw] = c
        c += 1
    return pointers

In [25]:
import json

for vosk_file in vosk_path.glob("*.vosk"):
    stem = clean_filename(vosk_file)
    goog_file = goog_path / f"{stem}.json"
    if not goog_file.exists():
        continue
    with open(str(goog_file)) as googf:
        goog_data = json.load(googf)
        goog_refs = get_arrayrefs_from_google(goog_data)
        vosk_data = get_recognition(vosk_file)
        vosk_refs = get_arrayrefs_from_vosk(vosk_data)

In [38]:
vosk_sample = "/Users/joregan/hsi-vosk/hsi_5_0718_210_002_main.wav.vosk"
goog_sample = "/Users/joregan/Playing/hsi_google/hsi_5_0718_210_002_main.json"
with open(goog_sample) as googf:
    goog_data = json.load(googf)
goog_refs = get_arrayrefs_from_google(goog_data)
vosk_data = get_recognition(vosk_sample)
vosk_refs = get_arrayrefs_from_vosk(vosk_data)


In [45]:
times_ap_eq(4.77, 4.8)

True

In [44]:
def times_ap_eq(a, b, fudge=0.2):
    diff = a - b
    return abs(diff) < fudge

In [39]:
goog_refs

{TimedWord(text='Strange.', start=4.2, end=4.8): 0,
 TimedWord(text='Oh', start=17.5, end=18.7): 1,
 TimedWord(text='sorry.', start=18.7, end=19.2): 1,
 TimedWord(text='Yeah.', start=22.9, end=23.2): 2,
 TimedWord(text='Yeah', start=23.2, end=23.5): 2,
 TimedWord(text='yeah.', start=23.5, end=23.8): 2,
 TimedWord(text='Yeah.', start=23.8, end=23.9): 2,
 TimedWord(text='Yeah.', start=25.6, end=27.2): 3,
 TimedWord(text='Yeah.', start=27.2, end=27.2): 3,
 TimedWord(text='And', start=49.0, end=50.3): 4,
 TimedWord(text="you're", start=50.3, end=50.8): 4,
 TimedWord(text='going', start=50.8, end=50.8): 4,
 TimedWord(text='to', start=50.8, end=50.9): 4,
 TimedWord(text='rent', start=50.9, end=51.2): 4,
 TimedWord(text='it.', start=51.2, end=51.4): 4,
 TimedWord(text='Oh,', start=51.4, end=52.1): 4,
 TimedWord(text='could', start=52.1, end=52.5): 4,
 TimedWord(text='I', start=52.5, end=52.6): 4,
 TimedWord(text='have', start=52.6, end=52.7): 4,
 TimedWord(text='some', start=52.7, end=53.0): 

In [34]:
vosk_refs

{TimedWord(text='you', start=0.0, end=0.18): 0,
 TimedWord(text='can', start=0.18, end=0.27): 0,
 TimedWord(text='take', start=0.272088, end=0.451059): 0,
 TimedWord(text='it', start=0.451059, end=0.51): 0,
 TimedWord(text='strange', start=4.17, end=4.89): 1,
 TimedWord(text='oh', start=18.03, end=18.244142): 2,
 TimedWord(text='sorry', start=18.36, end=19.14): 2,
 TimedWord(text='yeah', start=23.01, end=23.28): 3,
 TimedWord(text='yeah', start=23.34, end=23.52): 3,
 TimedWord(text='yeah', start=23.52, end=23.91): 3,
 TimedWord(text='maybe', start=26.28, end=26.49): 4,
 TimedWord(text='yeah', start=27.21, end=27.39): 4,
 TimedWord(text='this', start=27.96, end=28.11): 4,
 TimedWord(text='is', start=28.11, end=28.35): 4,
 TimedWord(text='yeah', start=46.56, end=46.83): 5,
 TimedWord(text='and', start=50.13, end=50.31): 6,
 TimedWord(text="you're", start=50.31, end=50.58): 6,
 TimedWord(text='going', start=50.58, end=50.91): 6,
 TimedWord(text='to', start=50.91, end=51.06): 6,
 TimedWord

In [36]:
TimedWord(text='strange', start=4.17, end=4.89)
TimedWord(text='Strange.', start=4.2, end=4.8)

TimedWord(text='Strange.', start=4.2, end=4.8)