In [1]:
%pylab inline

import json
import os
from pyannote.core import Annotation, Segment

Populating the interactive namespace from numpy and matplotlib


In [2]:
from pyannote.metrics.diarization import DiarizationErrorRate

In [3]:
reference_path = '/home/bruno/datasets/mixed-musan/'
hypothesis_path = 'output-files/mixed-musan/'
metrics_path = 'output-files/metrics/'

if not os.path.isdir(hypothesis_path):
    os.makedirs(hypothesis_path)
if not os.path.isdir(metrics_path):
    os.makedirs(metrics_path)

In [4]:
reference_json_files = []
for path, dirs, files in os.walk(reference_path):
    for file in files:
        if file.endswith('.json'):
            reference_json_files.append(os.path.join(path,file))
reference_json_files = sorted(reference_json_files)
reference_json_files[0:5]

['/home/bruno/datasets/mixed-musan/two_speakers_0.json',
 '/home/bruno/datasets/mixed-musan/two_speakers_1.json',
 '/home/bruno/datasets/mixed-musan/two_speakers_10.json',
 '/home/bruno/datasets/mixed-musan/two_speakers_11.json',
 '/home/bruno/datasets/mixed-musan/two_speakers_12.json']

In [5]:
hypothesis_json_files = []
for path, dirs, files in os.walk(hypothesis_path):
    for file in files:
        if file.endswith('.json'):
            hypothesis_json_files.append(os.path.join(path,file))
hypothesis_json_files = sorted(hypothesis_json_files)
hypothesis_json_files[0:5]

['output-files/mixed-musan/two_speakers_0.wav.json',
 'output-files/mixed-musan/two_speakers_1.wav.json',
 'output-files/mixed-musan/two_speakers_10.wav.json',
 'output-files/mixed-musan/two_speakers_11.wav.json',
 'output-files/mixed-musan/two_speakers_12.wav.json']

In [26]:
def get_Annotation(path):
    annotation = Annotation()
    with open(path) as f:
        data = json.load(f)
    for i in range(len(data)):
        t0 = data[i]["start"]
        (h, m, s) = t0.split(':')
        start = float(h) * 3600 + float(m) * 60 + float(s)

        t1 = data[i]["end"]
        (h, m, s) = t1.split(':')
        end = float(h) * 3600 + float(m) * 60 + float(s)

        annotation[Segment(start,end)] = data[i]["speaker"]
    return annotation, end

In [7]:
tst = get_Annotation(hypothesis_json_files[0])

In [27]:
idx = min(len(hypothesis_json_files), len(reference_json_files))
metricsDER = []
print(idx)
#diarizationErrorRate = DiarizationErrorRate()

for i in range(idx):
    diarizationErrorRate = DiarizationErrorRate()

    hp = hypothesis_json_files[i].split('/')[-1].split('.')[0]
    rf = reference_json_files[i].split('/')[-1].split('.')[0]
    if hp == rf:
        print(rf,hp)
        hypothesis, end = get_Annotation(hypothesis_json_files[i])
        reference, _ = get_Annotation(reference_json_files[i])
        
        components = diarizationErrorRate(reference, hypothesis, detailed=True, uem=Segment(0,end))
        components['audio filename'] = rf
        metricsDER.append(components['diarization error rate'])
        
        with open(os.path.join(metrics_path, rf + '.json'), 'w') as f:
            json.dump(components, f, indent=4)

53
two_speakers_0 two_speakers_0
two_speakers_1 two_speakers_1
two_speakers_10 two_speakers_10
two_speakers_11 two_speakers_11
two_speakers_12 two_speakers_12
two_speakers_13 two_speakers_13
two_speakers_14 two_speakers_14
two_speakers_15 two_speakers_15
two_speakers_16 two_speakers_16
two_speakers_17 two_speakers_17
two_speakers_18 two_speakers_18
two_speakers_19 two_speakers_19
two_speakers_2 two_speakers_2
two_speakers_20 two_speakers_20
two_speakers_21 two_speakers_21
two_speakers_22 two_speakers_22
two_speakers_23 two_speakers_23
two_speakers_24 two_speakers_24
two_speakers_25 two_speakers_25
two_speakers_26 two_speakers_26
two_speakers_27 two_speakers_27
two_speakers_28 two_speakers_28
two_speakers_29 two_speakers_29
two_speakers_3 two_speakers_3
two_speakers_30 two_speakers_30
two_speakers_31 two_speakers_31
two_speakers_32 two_speakers_32
two_speakers_33 two_speakers_33
two_speakers_34 two_speakers_34
two_speakers_35 two_speakers_35
two_speakers_36 two_speakers_36
two_speakers_

In [28]:
metricsDER

[0.22073863029251126,
 0.3677145851516809,
 0.2048296909680748,
 0.08303920502739062,
 0.4003170499270657,
 0.25543285117841885,
 0.4511270213604318,
 0.10546047071924927,
 0.3973154959379232,
 0.2599536687005889,
 0.1979677934603089,
 0.40302103176656323,
 0.3189020250217785,
 0.0966148861554887,
 0.31889784654016473,
 0.26787802812315625,
 0.2107138169645327,
 0.492588230807418,
 0.48190447115530155,
 0.6332334868959252,
 0.07963843536524255,
 0.4069370238049981,
 0.07498902744269999,
 0.2946615961446351,
 0.25343636157093136,
 0.14894456583219726,
 0.35472586801855854,
 0.1146393745093693,
 0.18066292114449264,
 0.5559008070030821,
 0.35959208934573555,
 0.2960252476527674,
 0.2727050853087989,
 0.46879905842341785,
 0.22385353193438637,
 0.13078470824949612,
 0.09369238169402617,
 0.15908388706595528,
 0.2749322230828796,
 0.1493864104443278,
 0.10165041560102363,
 0.2228870599842616,
 0.19909946996598418,
 0.39632340271745314,
 0.519854080188286,
 0.44702665038323436,
 0.369682514

In [33]:
meanDER = np.mean(metricsDER)
maxDER = np.max(metricsDER)
minDER = np.min(metricsDER)

print('mean DER:', meanDER, '%')
print('max DER: ', maxDER, '%')
print('min DER: ', minDER, '%')

mean DER: 0.2781283923184297 %
max DER:  0.6332334868959252 %
min DER:  0.07498902744269999 %
