In [1]:
def approx_match(time_a, time_b, slippage=(0.01 * 6)):
    return abs(time_a - time_b) <= slippage

In [2]:
SAMPLE = "2442204240010034621"

In [3]:
from pathlib import Path
TEXT = Path("/Users/joregan/Playing/kbw2v")
PHONES = Path("/Users/joregan/Playing/rd_phonetic")

In [4]:
text = open(TEXT / f"{SAMPLE}_480p.json")
phones = open(PHONES / f"{SAMPLE}_480p.json")

In [5]:
import json
text_json = json.load(text)
phone_json = json.load(phones)

In [6]:
class Chunk:
    def __init__(self, chunk):
        self.text = chunk['text']
        self.start = chunk['timestamp'][0]
        self.end = chunk['timestamp'][1]
    
    def __repr__(self) -> str:
        return f"[{self.text} ({self.start}, {self.end})]"

class SimpleMerge(Chunk):
    def __init__(self, left: Chunk, right: Chunk):
        self.text = left.text
        self.phone = right.text
        self.start = left.start
        self.end = left.end
        self.diff_start = left.start - right.start
        self.diff_end = left.end - right.end

    def exact_length(self):
        return self.diff_start == 0 and self.diff_end == 0

    def __repr__(self) -> str:
        return f"[{self.text} :: {self.phone} ({self.start}, {self.end})]"

class ComplexMerge(Chunk):
    def __init__(self, left, right):
        if type(left) == list:
            self.left_chunks = left
        elif left == None:
            self.left_chunks == []
        else:
            self.left_chunks = [left]
        if type(right) == list:
            self.right_chunks = right
        elif right == None:
            self.right_chunks == []
        else:
            self.right_chunks = [right]

        self.start = self.get_start()
        self.end = self.get_end()

        self.text = " ".join([x.text for x in self.left_chunks])
        self.phone = " ".join([x.text for x in self.right_chunks])

    def get_start(self):
        if self.left_chunks == [] and self.right_chunks == []:
            return None
        if self.left_chunks == []:
            return self.right_chunks[0].start
        if self.right_chunks == []:
            return self.left_chunks[0].start
        if self.left_chunks[0].start < self.right_chunks[0].start:
            return self.left_chunks[0].start
        else:
            return self.right_chunks[0].start

    def get_end(self):
        if self.left_chunks == [] and self.right_chunks == []:
            return None
        if self.left_chunks == []:
            return self.right_chunks[-1].end
        if self.right_chunks == []:
            return self.left_chunks[-1].end
        if self.left_chunks[-1].end > self.right_chunks[-1].end:
            return self.left_chunks[-1].end
        else:
            return self.right_chunks[-1].end

    def __repr__(self) -> str:
        return f"[{self.text} :: {self.phone} ({self.start}, {self.end})]"

        

In [7]:
text_chunks = []
phone_chunks = []
for text_chunk in text_json['chunks']:
    text_chunks.append(Chunk(text_chunk))

for phone_chunk in phone_json['chunks']:
    phone_chunks.append(Chunk(phone_chunk))

In [8]:
class PhoneChunk(Chunk):
    def __init__(self, chunk):
        self.text = chunk.text
        self.start = chunk.start
        self.end = chunk.end
class WordChunk(Chunk):
    def __init__(self, chunk):
        self.text = chunk.text
        self.start = chunk.start
        self.end = chunk.end

In [9]:
merged = []

start = True

pci = 0
tci = 0

iteration = 0

while pci < len(phone_chunks) and tci < len(text_chunks):
    iteration += 1

    am_start = approx_match(text_chunks[tci].start, phone_chunks[pci].start)
    am_end = approx_match(text_chunks[tci].end, phone_chunks[pci].end)

    if am_start and am_end:
        merged.append(SimpleMerge(text_chunks[tci], phone_chunks[pci]))

    elif am_start:
        cur_text = [text_chunks[tci]]
        cur_phone = [phone_chunks[pci]]
        if phone_chunks[pci].end < text_chunks[tci].end:
            while not approx_match(text_chunks[tci].end, phone_chunks[pci].end) and phone_chunks[pci].end < text_chunks[tci].end:
                pci += 1
                if pci >= len(phone_chunks):
                    break
                cur_phone.append(phone_chunks[pci])
            merged.append(ComplexMerge(cur_text, cur_phone))
        else:
            while not approx_match(text_chunks[tci].end, phone_chunks[pci].end) and text_chunks[tci].end < phone_chunks[pci].end:
                tci += 1
                if tci >= len(text_chunks):
                    break
                cur_text.append(text_chunks[tci])
            merged.append(ComplexMerge(cur_text, cur_phone))
    else:
        if phone_chunks[pci].end < text_chunks[tci].start:
            while phone_chunks[pci].end < text_chunks[tci].start:
                pci += 1
                if pci >= len(phone_chunks):
                    break
                merged.append(PhoneChunk(phone_chunks[pci]))
        elif text_chunks[tci].end < phone_chunks[pci].start:
            while text_chunks[tci].end < phone_chunks[pci].start:
                tci += 1
                if tci >= len(text_chunks):
                    break
                merged.append(WordChunk(text_chunks[tci]))
        else:
            print("else", text_chunks[tci], phone_chunks[pci])
    tci += 1
    pci += 1


it 1 considering [IX (0.02, 0.34)] [iː (0.02, 0.06)] 0 0
it 1 0 0
it 2 considering [DEBATT (0.66, 1.08)] [debat (0.66, 1.12)] 1 2
it 3 considering [MED (1.18, 1.24)] [meː (1.16, 1.22)] 2 3
it 4 considering [ANLEDNING (1.38, 1.92)] [anleːdnɪŋ (1.36, 1.92)] 3 4
it 5 considering [AV (2.04, 2.18)] [ɑːv (2.04, 2.22)] 4 5
it 6 considering [INTEPELATIONSSVAR (2.38, 3.34)] [ɪntepelaɧuːnsvɑːr (2.38, 3.34)] 5 6
it 7 considering [OCH (3.84, 3.92)] [oː (3.86, 3.88)] 6 7
it 8 considering [KAMMAREN (3.96, 4.32)] [kamarən (3.96, 4.34)] 7 8
it 9 considering [ÖVERGÅR (4.44, 4.78)] [øːvərɡoːr (4.42, 4.8)] 8 9
it 10 considering [NU (4.86, 4.9)] [nʉː (4.86, 4.9)] 9 10
it 11 considering [TILL (4.98, 5.12)] [tɪl (5.04, 5.12)] 10 11
it 12 considering [ATT (5.16, 5.26)] [at (5.14, 5.24)] 11 12
it 13 considering [DEBATTERA (5.32, 5.78)] [debateːra (5.32, 5.8)] 12 13
it 14 considering [STATSRÅDENS (5.88, 6.5)] [stasroːdən (5.86, 6.44)] 13 14
it 15 considering [SVAR (6.56, 6.84)] [svɑːr (6.54, 6.86)] 14 15
it 16

In [11]:
merged[-10:-1]

[[AV :: ɑːv (1350.32, 1350.38)],
 [SVARET :: svɑːrət (1350.46, 1350.88)],
 [PÅ :: poː (1350.96, 1351.02)],
 [INTERPALATION :: ɪntepelaɧuːn (1351.14, 1351.76)],
 [TVÅHUNDRANIO :: tvoː hɵndra niːʊ (1352.22, 1353.08)],
 [AV :: ɑːv (1353.22, 1353.32)],
 [ROGER :: roːɡər (1353.42, 1353.66)],
 [HADADDLIBERALERNA :: hadad lɪberɑːləɳa (1353.7, 1354.56)],
 [ARBETSFÖRMEDLINGENS :: arbeːsfœ̞meːdlɪŋəns (1354.94, 1355.9)]]