In [28]:
import nemo.collections.asr as nemo_asr
import logging

logging.getLogger('nemo_logger').setLevel(logging.ERROR)

speaker_model = nemo_asr.models.EncDecSpeakerLabelModel.from_pretrained(
    "nvidia/speakerverification_en_titanet_large"
)

In [29]:
import os
import random

ROOT = os.getcwd()

reference = os.path.join(ROOT, "john.wav")

dir = os.path.join(ROOT, "split_wavs")

episodes = [
    (os.path.join(dir, showname, episode), episode)
    for showname in sorted(os.listdir(dir))
    for episode in sorted(os.listdir(os.path.join(dir, showname)))
    if ".DS_Store" not in episode
]

In [30]:
def reference_likleyhood(reference, samples):
    results = 0
    for sample in samples:
        try:
            result = speaker_model.verify_speakers(reference, sample)
            results = results + result
        except:
            print(sample)
    return results / 10


def getReferenceLabel(reference_filepath, sample_filepaths):
    speaker0 = [file for file in sample_filepaths if "Speaker 0" in file]
    speaker1 = [file for file in sample_filepaths if "Speaker 1" in file]
    randomized0 = random.sample(speaker0, len(speaker0))[:10]
    randomized1 = random.sample(speaker1, len(speaker1))[:10]
    result0 = reference_likleyhood(reference_filepath, randomized0)
    result1 = reference_likleyhood(reference_filepath, randomized1)
    if result0 > result1:
        return "Speaker 0"
    else:
        return "Speaker 1"

In [31]:
labels = []
for episodeDir, episode in episodes:
    files = [os.path.join(episodeDir, file) for file in os.listdir(episodeDir) if ".DS_Store" not in file]
    speaker = getReferenceLabel(reference, files)
    labels.append(f"{episode}_-_{speaker}")
    for file in os.listdir(episodeDir):
        if speaker not in file:
            filepath = os.path.join(episodeDir, file)
            os.remove(filepath)

f = open('omnibus_speakers.txt', 'w')
f.write("\n".join(labels))
f.close()

In [42]:
from utils import srt_to_transcript, transcript_to_srt

labels_dict = dict(
    [
    (line.split('_-_')[0], line.split('_-_')[1])
    for line in labels
]
)

transcriptDir = os.path.join(ROOT, 'transcripts', 'omnibus')

transcriptFiles = [
    (os.path.join(transcriptDir, file), file.split('_-_')[0])
    for file in os.listdir(transcriptDir)
]

os.makedirs(os.path.join(ROOT, 'john_lines'), exist_ok=True)

for filepath, episode in transcriptFiles:
    transcript = srt_to_transcript(filepath)
    john_label = labels_dict[episode]
    john_lines = [
        (idx, start, end, speaker, speech)
        for idx, start, end,  speaker, speech in transcript
        if john_label in speaker
    ]
    srt = transcript_to_srt(john_lines)
    outpath = os.path.join(ROOT, 'john_lines', f"{episode}.srt")
    f = open(outpath, 'w')
    f.write(srt)
    f.close()