In [1]:
from resemblyzer import preprocess_wav, VoiceEncoder
from demo_utils import *
from pathlib import Path
from spectralcluster import SpectralClusterer



In [2]:
wav_fpath = Path("public_comment_2406_480_1.mp3")
wav = preprocess_wav(wav_fpath)




In [3]:
segments = [[0, 5], [39, 44], [83, 88], [237, 242], [413, 420]]
speaker_names = ["A", "B", "C", "E", "F"]
speaker_wavs = [wav[int(s[0] * sampling_rate):int(s[1] * sampling_rate)] for s in segments]

In [4]:
encoder = VoiceEncoder("cpu")
print("Running the continuous embedding on cpu, this might take a while...")
_, cont_embeds, wav_splits = encoder.embed_utterance(wav, return_partials=True, rate=16)

Loaded the voice encoder model on cpu in 0.01 seconds.
Running the continuous embedding on cpu, this might take a while...


In [5]:
speaker_embeds = [encoder.embed_utterance(speaker_wav) for speaker_wav in speaker_wavs]
similarity_dict = {name: cont_embeds @ speaker_embed for name, speaker_embed in 
                   zip(speaker_names, speaker_embeds)}

In [6]:
print(similarity_dict)

{'A': array([0.87807256, 0.88468224, 0.91648847, ..., 0.5376143 , 0.5099734 ,
       0.512244  ], dtype=float32), 'B': array([0.5232038 , 0.54096365, 0.5343531 , ..., 0.5066391 , 0.5036247 ,
       0.5019189 ], dtype=float32), 'C': array([0.63168764, 0.6463007 , 0.644412  , ..., 0.50545037, 0.49182445,
       0.4817821 ], dtype=float32), 'E': array([0.5809439 , 0.5845593 , 0.58860123, ..., 0.45542926, 0.44637   ,
       0.44990394], dtype=float32), 'F': array([0.46378183, 0.4578523 , 0.44072443, ..., 0.9625536 , 0.9822396 ,
       0.99296117], dtype=float32)}


In [7]:
all_speakers = [speaker for speaker in similarity_dict]

In [8]:
def get_time(sec):
    minutes = int(sec/60)
    seconds = int((sec%60)*100)/100
    return f"{minutes}:{seconds}"

In [9]:
for index in range(len(similarity_dict['A'])):
    max_val = max([similarity_dict[speaker][index] for speaker in all_speakers])
    for speaker in all_speakers:
        if max_val == similarity_dict[speaker][index]:
            print(f"{speaker}, {get_time(index*0.0674)}")

A, 0:0.0
A, 0:0.06
A, 0:0.13
A, 0:0.2
A, 0:0.26
A, 0:0.33
A, 0:0.4
A, 0:0.47
A, 0:0.53
A, 0:0.6
A, 0:0.67
A, 0:0.74
A, 0:0.8
A, 0:0.87
A, 0:0.94
A, 0:1.01
A, 0:1.07
A, 0:1.14
A, 0:1.21
A, 0:1.28
A, 0:1.34
A, 0:1.41
A, 0:1.48
A, 0:1.55
A, 0:1.61
A, 0:1.68
A, 0:1.75
A, 0:1.81
A, 0:1.88
A, 0:1.95
A, 0:2.02
A, 0:2.08
A, 0:2.15
A, 0:2.22
A, 0:2.29
A, 0:2.35
A, 0:2.42
A, 0:2.49
A, 0:2.56
A, 0:2.62
A, 0:2.69
A, 0:2.76
A, 0:2.83
A, 0:2.89
A, 0:2.96
A, 0:3.03
A, 0:3.1
A, 0:3.16
A, 0:3.23
A, 0:3.3
A, 0:3.37
A, 0:3.43
A, 0:3.5
A, 0:3.57
A, 0:3.63
A, 0:3.7
A, 0:3.77
A, 0:3.84
A, 0:3.9
A, 0:3.97
A, 0:4.04
A, 0:4.11
A, 0:4.17
A, 0:4.24
A, 0:4.31
A, 0:4.38
A, 0:4.44
A, 0:4.51
A, 0:4.58
A, 0:4.65
A, 0:4.71
A, 0:4.78
A, 0:4.85
A, 0:4.92
A, 0:4.98
A, 0:5.05
A, 0:5.12
A, 0:5.18
A, 0:5.25
A, 0:5.32
A, 0:5.39
A, 0:5.45
A, 0:5.52
A, 0:5.59
A, 0:5.66
A, 0:5.72
A, 0:5.79
A, 0:5.86
A, 0:5.93
A, 0:5.99
A, 0:6.06
A, 0:6.13
A, 0:6.2
A, 0:6.26
A, 0:6.33
A, 0:6.4
A, 0:6.47
A, 0:6.53
A, 0:6.6
A, 0:6.67
A, 0:6.74
A, 

C, 1:8.81
C, 1:8.88
C, 1:8.95
C, 1:9.01
C, 1:9.08
C, 1:9.15
C, 1:9.21
C, 1:9.28
C, 1:9.35
C, 1:9.42
C, 1:9.48
C, 1:9.55
C, 1:9.62
C, 1:9.69
C, 1:9.75
C, 1:9.82
C, 1:9.89
C, 1:9.96
C, 1:10.02
C, 1:10.09
C, 1:10.16
C, 1:10.23
C, 1:10.29
C, 1:10.36
C, 1:10.43
C, 1:10.5
C, 1:10.56
C, 1:10.63
C, 1:10.7
C, 1:10.76
C, 1:10.83
C, 1:10.9
C, 1:10.97
C, 1:11.03
C, 1:11.1
C, 1:11.17
C, 1:11.24
C, 1:11.3
C, 1:11.37
C, 1:11.44
C, 1:11.51
C, 1:11.57
C, 1:11.64
C, 1:11.71
C, 1:11.78
C, 1:11.84
C, 1:11.91
C, 1:11.98
C, 1:12.05
C, 1:12.11
C, 1:12.18
C, 1:12.25
C, 1:12.32
C, 1:12.38
C, 1:12.45
C, 1:12.52
C, 1:12.58
C, 1:12.65
C, 1:12.72
C, 1:12.79
C, 1:12.85
C, 1:12.92
C, 1:12.99
C, 1:13.06
C, 1:13.12
C, 1:13.19
C, 1:13.26
C, 1:13.33
C, 1:13.39
C, 1:13.46
C, 1:13.53
C, 1:13.6
C, 1:13.66
C, 1:13.73
C, 1:13.8
C, 1:13.87
C, 1:13.93
C, 1:14.0
C, 1:14.07
C, 1:14.14
C, 1:14.2
C, 1:14.27
C, 1:14.34
C, 1:14.4
C, 1:14.47
C, 1:14.54
C, 1:14.61
C, 1:14.67
C, 1:14.74
C, 1:14.81
C, 1:14.88
C, 1:14.94
C, 1:15.01
C, 1:

C, 2:47.48
C, 2:47.55
C, 2:47.62
C, 2:47.69
C, 2:47.75
C, 2:47.82
C, 2:47.89
C, 2:47.96
C, 2:48.02
C, 2:48.09
C, 2:48.16
C, 2:48.23
C, 2:48.29
C, 2:48.36
C, 2:48.43
C, 2:48.5
C, 2:48.56
C, 2:48.63
C, 2:48.7
C, 2:48.76
C, 2:48.83
C, 2:48.9
C, 2:48.97
C, 2:49.03
C, 2:49.1
C, 2:49.17
C, 2:49.24
C, 2:49.3
C, 2:49.37
C, 2:49.44
C, 2:49.51
C, 2:49.57
C, 2:49.64
C, 2:49.71
C, 2:49.78
C, 2:49.84
C, 2:49.91
C, 2:49.98
C, 2:50.05
C, 2:50.11
C, 2:50.18
C, 2:50.25
C, 2:50.31
C, 2:50.38
C, 2:50.45
C, 2:50.52
C, 2:50.58
C, 2:50.65
C, 2:50.72
C, 2:50.79
C, 2:50.85
C, 2:50.92
C, 2:50.99
C, 2:51.06
C, 2:51.12
C, 2:51.19
C, 2:51.26
C, 2:51.33
C, 2:51.39
C, 2:51.46
C, 2:51.53
C, 2:51.6
C, 2:51.66
C, 2:51.73
C, 2:51.8
C, 2:51.87
C, 2:51.93
C, 2:52.0
C, 2:52.07
C, 2:52.13
C, 2:52.2
C, 2:52.27
C, 2:52.34
C, 2:52.4
C, 2:52.47
C, 2:52.54
C, 2:52.61
C, 2:52.67
C, 2:52.74
C, 2:52.81
C, 2:52.88
C, 2:52.94
C, 2:53.01
C, 2:53.08
C, 2:53.15
C, 2:53.21
C, 2:53.28
C, 2:53.35
C, 2:53.42
C, 2:53.48
C, 2:53.55
C, 2:53.6

E, 4:29.66
E, 4:29.73
E, 4:29.8
E, 4:29.86
E, 4:29.93
E, 4:30.0
E, 4:30.07
E, 4:30.13
E, 4:30.2
E, 4:30.27
E, 4:30.34
E, 4:30.4
E, 4:30.47
E, 4:30.54
E, 4:30.61
E, 4:30.67
E, 4:30.74
E, 4:30.81
E, 4:30.88
E, 4:30.94
E, 4:31.01
E, 4:31.08
E, 4:31.15
E, 4:31.21
E, 4:31.28
E, 4:31.35
E, 4:31.41
E, 4:31.48
E, 4:31.55
E, 4:31.62
E, 4:31.68
E, 4:31.75
E, 4:31.82
E, 4:31.89
E, 4:31.95
E, 4:32.02
E, 4:32.09
E, 4:32.16
E, 4:32.22
E, 4:32.29
E, 4:32.36
E, 4:32.43
E, 4:32.49
E, 4:32.56
E, 4:32.63
E, 4:32.7
E, 4:32.76
E, 4:32.83
E, 4:32.9
E, 4:32.97
E, 4:33.03
E, 4:33.1
E, 4:33.17
E, 4:33.23
E, 4:33.3
E, 4:33.37
E, 4:33.44
E, 4:33.5
E, 4:33.57
E, 4:33.64
E, 4:33.71
E, 4:33.77
E, 4:33.84
E, 4:33.91
E, 4:33.98
E, 4:34.04
E, 4:34.11
E, 4:34.18
E, 4:34.25
E, 4:34.31
E, 4:34.38
E, 4:34.45
E, 4:34.52
E, 4:34.58
E, 4:34.65
E, 4:34.72
E, 4:34.78
E, 4:34.85
E, 4:34.92
E, 4:34.99
E, 4:35.05
E, 4:35.12
E, 4:35.19
E, 4:35.26
E, 4:35.32
E, 4:35.39
E, 4:35.46
E, 4:35.53
E, 4:35.59
E, 4:35.66
E, 4:35.73
E, 4:35.

E, 6:14.33
E, 6:14.4
E, 6:14.47
E, 6:14.54
E, 6:14.6
E, 6:14.67
E, 6:14.74
E, 6:14.81
E, 6:14.87
E, 6:14.94
E, 6:15.01
E, 6:15.08
E, 6:15.14
E, 6:15.21
E, 6:15.28
E, 6:15.35
E, 6:15.41
E, 6:15.48
E, 6:15.55
E, 6:15.62
E, 6:15.68
E, 6:15.75
E, 6:15.82
E, 6:15.88
E, 6:15.95
E, 6:16.02
E, 6:16.09
E, 6:16.15
E, 6:16.22
E, 6:16.29
E, 6:16.36
E, 6:16.42
E, 6:16.49
E, 6:16.56
E, 6:16.63
E, 6:16.69
E, 6:16.76
E, 6:16.83
E, 6:16.9
E, 6:16.96
E, 6:17.03
E, 6:17.1
E, 6:17.17
E, 6:17.23
E, 6:17.3
E, 6:17.37
E, 6:17.43
E, 6:17.5
E, 6:17.57
E, 6:17.64
E, 6:17.7
E, 6:17.77
E, 6:17.84
E, 6:17.91
E, 6:17.97
E, 6:18.04
E, 6:18.11
E, 6:18.18
E, 6:18.24
E, 6:18.31
E, 6:18.38
E, 6:18.45
E, 6:18.51
E, 6:18.58
E, 6:18.65
E, 6:18.72
E, 6:18.78
E, 6:18.85
E, 6:18.92
E, 6:18.99
E, 6:19.05
E, 6:19.12
E, 6:19.19
E, 6:19.25
E, 6:19.32
E, 6:19.39
E, 6:19.46
E, 6:19.52
E, 6:19.59
E, 6:19.66
E, 6:19.73
E, 6:19.79
E, 6:19.86
E, 6:19.93
E, 6:20.0
E, 6:20.06
E, 6:20.13
E, 6:20.2
E, 6:20.27
E, 6:20.33
E, 6:20.4
E, 6:20.4