# Vosk CLI stderr output to CTM

> "Because it was quicker than looking at the API examples"

- branch: master
- comments: false
- categories: [vosk, ctm, kludge]

In [23]:
VOSKDIR = "/Users/joregan/hsi-vosk/"
GOOGDIR = "/Users/joregan/Playing/hsi_google/"

In [24]:
from pathlib import Path

vosk_path = Path(VOSKDIR)
goog_path = Path(GOOGDIR)

In [5]:
def get_recognition(filename):
    segments = []
    with open(filename) as inf:
        for line in inf.readlines():
            if line.startswith("INFO:root:{'result':"):
                text = line.strip()[10:]
                data = eval(text)
                segments.append(data)
    return segments

In [1]:
def clean_filename(filename):
    if type(filename) == str:
        filename = Path(filename).stem
    elif type(filename) == Path:
        filename = filename.stem
    return filename

In [46]:
from dataclasses import dataclass, field

def times_ap_eq(a, b, fudge=0.2):
    diff = a - b
    return abs(diff) < fudge

def norm_str_eq(a, b, punct="!.,;:?"):
    norm_a = a.strip(punct).lower()
    norm_b = b.strip(punct).lower()
    return norm_a == norm_b


@dataclass(frozen=True)
class TimedWord:
    text: str = field(compare=True)
    start: float = field(compare=True)
    end: float = field(compare=True)

    def __lt__(self, o):
        return self.start < o.start
    
    def __gt__(self, o):
        return self.end > o.end

    def __eq__(self, o):
        t = norm_str_eq(self.text, o.text)
        s = times_ap_eq(self.start, o.start)
        e = times_ap_eq(self.end, o.end)
        return t and s and e


In [17]:
def get_arrayrefs_from_vosk(rec):
    pointers = {}
    for c, data in enumerate(rec):
        for w in data["result"]:
            tw = TimedWord(w["word"], w["start"], w["end"])
            pointers[tw] = c
    return pointers

In [37]:
def get_arrayrefs_from_google(data):
    c = 0
    pointers = {}
    for result in data["results"]:
        if not "alternatives" in result:
            continue
        if not len(result["alternatives"]) == 1:
            continue
        if not "words" in result["alternatives"][0]:
            continue
        for w in result["alternatives"][0]["words"]:
            start = w["startTime"]
            end = w["endTime"]
            if start.endswith("s"):
                start = start[:-1]
            start = float(start)
            if end.endswith("s"):
                end = end[:-1]
            end = float(end)
            tw = TimedWord(w["word"], start, end)
            pointers[tw] = c
        c += 1
    return pointers

In [25]:
import json

for vosk_file in vosk_path.glob("*.vosk"):
    stem = clean_filename(vosk_file)
    goog_file = goog_path / f"{stem}.json"
    if not goog_file.exists():
        continue
    with open(str(goog_file)) as googf:
        goog_data = json.load(googf)
        goog_refs = get_arrayrefs_from_google(goog_data)
        vosk_data = get_recognition(vosk_file)
        vosk_refs = get_arrayrefs_from_vosk(vosk_data)

In [38]:
vosk_sample = "/Users/joregan/hsi-vosk/hsi_5_0718_210_002_main.wav.vosk"
goog_sample = "/Users/joregan/Playing/hsi_google/hsi_5_0718_210_002_main.json"
with open(goog_sample) as googf:
    goog_data = json.load(googf)
goog_refs = get_arrayrefs_from_google(goog_data)
vosk_data = get_recognition(vosk_sample)
vosk_refs = get_arrayrefs_from_vosk(vosk_data)


In [47]:
a = TimedWord(text='strange', start=4.17, end=4.89)
b = TimedWord(text='Strange.', start=4.2, end=4.8)

In [49]:
from difflib import SequenceMatcher

s = SequenceMatcher(None, goog_refs, vosk_refs)