In [7]:
#from inaGVAD.vad_metrics import VadEval
import pandas as pd
import os
from pyannote.metrics.base import BaseMetric
from inaGVAD.gender_metrics import WstpErr

In [85]:
from pyannote.metrics.errors.identification import IdentificationErrorAnalysis

class GenderPredictionError(BaseMetric):
    @classmethod
    def metric_name(cls):
        # Return human-readable name of the metric
        return 'detailled gender prediction error'

    @classmethod
    def metric_components(cls):
        # Return component names from which the metric is computed
        return ['total',
                'correct male',
                'missed detection male',
                'false alarm male',
                'confusion male',
                'correct female',
                'missed detection female',
                'false alarm female',
                'confusion female']

    def __init__(self, collar=0.3, **kwargs):
        super(GenderPredictionError, self).__init__(collar=collar, **kwargs)
        self.iea = IdentificationErrorAnalysis(collar=collar)
    
    def compute_components(self, reference, hypothesis, **kwargs):
        
        # Actually compute the value of each component
        uem = kwargs['uem']
        andiff = self.iea.difference(reference, hypothesis, uem=uem)
        components = dict([(e, 0.) for e in self.metric_components()])
        
        for segment, _, label in andiff.itertracks(yield_label=True):
            print(segment, label)
            status, ref, hyp = label
            if status in ['false alarm', 'confusion']:
                key = status + ' ' + hyp
            else:
                assert status in ['missed detection', 'correct']
                key = status + ' ' + ref
            components[key] += segment.duration
            if status in ['confusion', 'missed detection', 'correct']:
                components['total'] += segment.duration
        return components

    def compute_metric(self, components):
        # Actually compute the metric based on the component values
        err = 0
        for k in components:
            if k == 'total':
                tot = components[k]
            elif 'correct' not in k:
                err += components[k]
        return err / tot
    
    def compute_metrics(self, components)

In [78]:
from pyannote.metrics.identification import IdentificationErrorRate

In [79]:
fref = './annotations/vad/tv-TF1-20220218T133551-60.csv'
fpred = './annotations/baseline_iss_vad/tv-TF1-20220218T133551-60.csv'
dref = './annotations/vad/'
dpred = './annotations/baseline_iss_vad/'

In [80]:
from pyannote.core import Annotation, Timeline, Segment



def df2annot(df, col, rmnan=True, uri=None):
    an = Annotation(uri=uri)
    for start, stop, val in zip(df.start, df.stop, df[col]):
        if rmnan and val != val:
            continue
        seg = Segment(start, stop)
        an[seg] = val
    return an.support()


def init_uem(df):
    uem = Timeline()
    uem.add(Segment(df.start[0], df.stop[len(df) - 1]))
    return uem


def rm_uem(uem, df, col, rmlist):
    for start, stop, val in zip(df.start, df.stop, df[col]):
        if val in rmlist:
            uem = uem.extrude(Segment(start, stop))
    return uem

def keep_uem(uem, df, col, keeplist):
    for start, stop, val in zip(df.start, df.stop, df[col]):
        if val not in keeplist:
            uem = uem.extrude(Segment(start, stop))
    return uem



#def df2uem(df, col2filt, colvals):
#    tl = Timeline()
#    tl.add(Segment(df.start[0], df.stop[len(df) - 1]))
#    for start, stop, val in zip(df.start, df.stop, df[col2filt]):
#        if val not in colvals:
#            tl = tl.extrude(Segment(start, stop))
#    return tl

class GenderEval:
    def __init__(self, collar=.3):
        self.iea = IdentificationErrorAnalysis(collar=collar)
        self.ier = IdentificationErrorRate(collar=collar)
        self.wstp = WstpErr()
        self.iea2 = GenderPredictionError(collar=collar)
        
    def __call__(self, fref, fpred):
        
        uri, _ = os.path.splitext(os.path.basename(fref))
        
        # parse reference
        dfref = pd.read_csv(fref)
        anref = df2annot(dfref, 'speaker_gender', uri = uri)
        uem = init_uem(dfref)
        uem = rm_uem(uem, dfref, 'speaker_gender', ['undefgender'])
        #print('main uem', uem)
        #print(uem)
        
        # parse prediction
        dfpred = pd.read_csv(fpred)
        dfpred = dfpred[dfpred.label.map(lambda x: x in ['male', 'female'])]
        anpred = df2annot(dfpred, 'label', uri = uri)
        
        andiff = self.iea.difference(anref, anpred, uem=uem)
        dret = {}
        for segment, _, label in andiff.itertracks(yield_label=True):
            # label = (status, reference, hypothesis)
            #print(segment)
            #print(label)
            if label not in dret:
                dret[label] = 0
            dret[label] += segment.duration
            
        ier = self.ier(anref, anpred, uem=uem)
        wstp = self.wstp(anref, anpred, uem=None)
        err = self.iea2(anref, anpred, uem=uem)
        return dret, ier, wstp, err



ge = GenderEval(collar=0.3)

ge('./annotations/detailed_csv/tv-TF1-131538.csv', './automatic_baselines/inaspeechsegmenter/tv-TF1-131538.csv')
ge('./annotations/detailed_csv/tv-ART-072411.csv', './automatic_baselines/inaspeechsegmenter/tv-ART-072411.csv')
ge('./annotations/detailed_csv/tv-TF1-131538.csv', './automatic_baselines/liumspkdirization_csv/tv-TF1-131538.csv')
ge('./annotations/detailed_csv/tv-ART-072411.csv', './automatic_baselines/liumspkdirization_csv/tv-ART-072411.csv')


#df2annot(dref, 'speaker_gender').label_support('male')
#df2uem(dref, 'speech_quality', ['onomatopoeia'])

[ 00:00:00.000 -->  00:00:00.445] ('false alarm', None, 'male')
[ 00:00:00.745 -->  00:00:09.186] ('correct', 'male', 'male')
[ 00:00:10.830 -->  00:00:10.893] ('false alarm', None, 'male')
[ 00:00:11.193 -->  00:00:12.116] ('correct', 'male', 'male')
[ 00:00:12.416 -->  00:00:12.500] ('false alarm', None, 'male')
[ 00:00:12.800 -->  00:00:15.198] ('correct', 'male', 'male')
[ 00:00:15.498 -->  00:00:15.589] ('false alarm', None, 'male')
[ 00:00:15.889 -->  00:00:18.245] ('correct', 'male', 'male')
[ 00:00:18.545 -->  00:00:18.671] ('false alarm', None, 'male')
[ 00:00:18.971 -->  00:00:22.273] ('correct', 'male', 'male')
[ 00:00:22.573 -->  00:00:22.679] ('false alarm', None, 'male')
[ 00:00:22.979 -->  00:00:25.914] ('correct', 'male', 'male')
[ 00:00:27.020 -->  00:00:27.025] ('false alarm', None, 'male')
[ 00:00:27.325 -->  00:00:27.650] ('correct', 'male', 'male')
[ 00:00:27.950 -->  00:00:29.678] ('false alarm', None, 'male')
[ 00:00:29.978 -->  00:00:34.146] ('correct', 'male', 

({('false alarm', None, 'male'): 2.735000000000014,
  ('missed detection', 'male', None): 2.673000000000009,
  ('correct', 'male', 'male'): 22.486000000000008},
 0.21495289955880675,
 0.0018574865553354098,
 0.21495289955880675)

In [82]:
ge.iea2.report()

Unnamed: 0_level_0,detailled gender prediction error,total,correct male,correct male,missed detection male,missed detection male,false alarm male,false alarm male,confusion male,confusion male,correct female,correct female,missed detection female,missed detection female,false alarm female,false alarm female,confusion female,confusion female
Unnamed: 0_level_1,%,Unnamed: 2_level_1,Unnamed: 3_level_1,%,Unnamed: 5_level_1,%,Unnamed: 7_level_1,%,Unnamed: 9_level_1,%,Unnamed: 11_level_1,%,Unnamed: 13_level_1,%,Unnamed: 15_level_1,%,Unnamed: 17_level_1,%
item,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
tv-TF1-131538,9.786282,44.685,44.108,98.708739,0.0,0.0,3.796,8.495021,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.577,1.291261
tv-ART-072411,12.965539,25.159,24.015,95.452919,1.144,4.547081,2.118,8.418459,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
tv-TF1-131538,8.86651,44.685,44.36,99.272687,0.325,0.727313,3.637,8.139197,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
tv-ART-072411,21.49529,25.159,22.486,89.375571,2.673,10.624429,2.735,10.870861,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TOTAL,12.173558,139.688,134.969,96.621757,4.142,2.96518,12.286,8.795315,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.577,0.413063


In [84]:
ge.ier.report()

Unnamed: 0_level_0,identification error rate,total,correct,correct,false alarm,false alarm,missed detection,missed detection,confusion,confusion
Unnamed: 0_level_1,%,Unnamed: 2_level_1,Unnamed: 3_level_1,%,Unnamed: 5_level_1,%,Unnamed: 7_level_1,%,Unnamed: 9_level_1,%
item,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
tv-TF1-131538,9.786282,44.685,44.108,98.708739,3.796,8.495021,0.0,0.0,0.577,1.291261
tv-ART-072411,12.965539,25.159,24.015,95.452919,2.118,8.418459,1.144,4.547081,0.0,0.0
tv-TF1-131538,8.86651,44.685,44.36,99.272687,3.637,8.139197,0.325,0.727313,0.0,0.0
tv-ART-072411,21.49529,25.159,22.486,89.375571,2.735,10.870861,2.673,10.624429,0.0,0.0
TOTAL,12.173558,139.688,134.969,96.621757,12.286,8.795315,4.142,2.96518,0.577,0.413063


In [68]:
df = pd.read_csv('./annotations/detailed_csv/tv-TF1-131538.csv')
df['dur'] = df.stop - df.start
df.dur[df.speaker_gender == 'male'].sum()

49.485

In [5]:
# compare reference and hypothesis directories
details, summary = ve.compare_directories(dref, dpred)

In [6]:
# see result summary
summary

{'accuracy': 0.9316051037305464,
 'precision': 0.9201954612806083,
 'recall': 0.97343292945581,
 'fmeasure': 0.9460658362837207}

In [7]:
# details per file
details

Unnamed: 0,true negative,true positive,false negative,false positive,retrieved,relevant,relevant retrieved,basename,accuracy,precision,recall,fmeasure
0,47.721,5.352,4.350,0.338,5.690,9.702,5.352,radio-FIP-20220423T220055-60,0.918838,0.940598,0.551639,0.695426
1,8.822,39.974,3.536,2.920,42.894,43.510,39.974,tv-FR2-20221206T050843-60,0.883154,0.931925,0.918731,0.925281
2,0.000,57.608,0.000,0.303,57.911,57.608,57.608,tv-BFT-20210409T165932-60,0.994768,0.994768,1.000000,0.997377
3,60.032,0.000,0.000,0.000,0.000,0.000,0.000,radio-FUN-20220823T013906-60,1.000000,1.000000,1.000000,1.000000
4,3.710,38.039,0.890,8.756,46.795,38.929,38.039,tv-TF1-20220709T021706-60,0.812316,0.812886,0.977138,0.887476
...,...,...,...,...,...,...,...,...,...,...,...,...
280,24.529,29.232,0.822,0.580,29.812,30.054,29.232,tv-LCI-20210624T142245-60,0.974584,0.980545,0.972649,0.976581
281,0.000,57.578,0.000,0.354,57.932,57.578,57.578,radio-RMC-20220727T161225-60,0.993889,0.993889,1.000000,0.996935
282,27.014,27.262,3.794,0.867,28.129,31.056,27.262,tv-T5M-20210712T040130-60,0.920916,0.969178,0.877834,0.921247
283,3.560,41.584,9.261,0.868,42.452,50.845,41.584,tv-C+_-20221206T181000-60,0.816746,0.979553,0.817858,0.891433


In [8]:
# evaluate based on AVGBEST3_DIFFICULTY criterion
df, summary = ve.compare_category(dpred, 'channel_category')

In [9]:
# summary per category
summary

Unnamed: 0,category,accuracy,precision,recall,fmeasure
0,generalist_radio,0.951689,0.952204,0.997257,0.97421
1,generalist_tv,0.883284,0.873171,0.943247,0.906857
2,music_radio,0.978965,0.925055,0.95748,0.940988
3,news_tv,0.947949,0.949738,0.992457,0.970627


In [10]:
# details per file
df

Unnamed: 0,true negative,true positive,false negative,false positive,retrieved,relevant,relevant retrieved,basename,accuracy,precision,recall,fmeasure,category
0,12.131,29.893,0.248,7.939,37.832,30.141,29.893,radio-FCR-20210309T031913-60,0.836948,0.790151,0.991772,0.879555,generalist_radio
1,0.000,46.914,0.000,3.818,50.732,46.914,46.914,radio-FCR-20210627T083707-60,0.924742,0.924742,1.000000,0.960900,generalist_radio
2,0.832,51.494,0.000,1.438,52.932,51.494,51.494,radio-FCR-20211004T074934-60,0.973253,0.972833,1.000000,0.986229,generalist_radio
3,6.386,38.849,0.000,2.497,41.346,38.849,38.849,radio-FCR-20211012T032010-60,0.947687,0.939607,1.000000,0.968863,generalist_radio
4,0.000,48.313,0.000,5.398,53.711,48.313,48.313,radio-FCR-20211030T225430-60,0.899499,0.899499,1.000000,0.947091,generalist_radio
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,1.128,55.713,0.000,0.640,56.353,55.713,55.713,tv-LCI-20220123T050538-60,0.988866,0.988643,1.000000,0.994289,news_tv
58,10.342,33.966,0.057,9.916,43.882,34.023,33.966,tv-LCI-20220214T061740-60,0.816271,0.774030,0.998325,0.871985,news_tv
59,0.000,56.559,0.000,0.618,57.177,56.559,56.559,tv-LCI-20220301T100827-60,0.989191,0.989191,1.000000,0.994566,news_tv
60,0.000,56.299,0.000,0.667,56.966,56.299,56.299,tv-LCI-20220517T213519-60,0.988291,0.988291,1.000000,0.994111,news_tv
