In [1]:
import sys
import os

sys.path.append(os.environ['RENNET_ROOT'])
data_root = os.environ['RENNET_DATA_ROOT']
print(data_root)

/nm-raid/audio/work/abdullah/nm-rennet/rennet-x/data


In [2]:
import numpy as np
import glob
from h5py import File as hFile

In [3]:
import rennet.utils.np_utils as nu
import rennet.datasets.fisher as fe

In [5]:
pickles = glob.glob(os.path.join(data_root, 'working', 'fisher', 'fe_03_p1', 'wav-8k-mono', 'pickles', '*logmel64*'))[0]
trn_h5 = os.path.join(pickles, 'trn.h5')
val_h5 = os.path.join(pickles, 'val.h5')

In [6]:
trn_ip = fe.UnnormedFramewiseInputsProvider.for_callids(
        trn_h5,
        callids='all',
        data_context=10,
        add_channel_at_end=False,
        label_subcontext=0,
        steps_per_chunk=1,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=1.,
        shuffle_seed=None,  # never shuffled
        npasses=1, )

val_ip = fe.UnnormedFramewiseInputsProvider.for_callids(
        val_h5,
        callids='all',
        data_context=10,
        add_channel_at_end=False,
        label_subcontext=0,
        steps_per_chunk=1,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=1.,
        shuffle_seed=None,  # never shuffled
        npasses=1, )

In [21]:
def priors(inputs_provider):
    currn = None

    init = None
    tran = None
    prior = None
    for xy, (_, chunking) in inputs_provider.flow(
            indefinitely=False,
            only_labels=True,
            with_chunking=True, ):

        true = xy[1].astype(int)
        if currn is None:
            currn = chunking.labelpath
            init = true[0, ...]
            prior = true.sum(axis=0)
            tran = nu.confusion_matrix_forcategorical(true[:-1], true[1:])
            continue
        elif chunking.labelpath != currn:
            init += true[0, ...]
            currn = chunking.labelpath
            
        
        prior += true.sum(axis=0)
        tran += nu.confusion_matrix_forcategorical(true[:-1], true[1:])

    return init, tran, prior


In [22]:
vinit, vtran, vprior = priors(val_ip)

In [23]:
vprior, vprior.sum(), vprior/vprior.sum()

(array([ 904419, 4234941,  447584]),
 5586944,
 array([ 0.16188081,  0.7580067 ,  0.08011249]))

In [24]:
vtran, vtran.sum(), vtran.sum(axis=1), nu.normalize_confusion_matrix(vtran)[1]

(array([[ 890116,   14211,      26],
        [  14251, 4212846,    7599],
        [     13,    7599,  439942]]),
 5586603,
 array([ 904353, 4234696,  447554]),
 array([[  9.84257254e-01,   1.57139966e-02,   2.87498355e-05],
        [  3.36529470e-03,   9.94840244e-01,   1.79446175e-03],
        [  2.90467742e-05,   1.69789567e-02,   9.82991996e-01]]))

In [25]:
vinit, vinit.sum(), vinit / vinit.sum()

(array([ 0, 99,  0]), 99, array([ 0.,  1.,  0.]))

In [26]:
tinit, ttran, tprior = priors(trn_ip)

In [27]:
tprior, tprior.sum(), tprior/tprior.sum()

(array([ 8786183, 45852093,  5654844]),
 60293120,
 array([ 0.14572447,  0.76048632,  0.09378921]))

In [28]:
ttran, ttran.sum(), ttran.sum(axis=1), nu.normalize_confusion_matrix(ttran)[1]

(array([[ 8652702,   132681,      235],
        [  133020, 45628715,    87602],
        [     107,    87585,  5566793]]),
 60289440,
 array([ 8785618, 45849337,  5654485]),
 array([[  9.84871184e-01,   1.51020679e-02,   2.67482606e-05],
        [  2.90124152e-03,   9.95188109e-01,   1.91064922e-03],
        [  1.89230319e-05,   1.54894743e-02,   9.84491603e-01]]))

In [29]:
tinit, tinit.sum(), tinit / tinit.sum()

(array([   0, 1196,    4]),
 1200,
 array([ 0.        ,  0.99666667,  0.00333333]))

In [30]:
init = vinit + tinit
tran = vtran + ttran
prior = vprior + tprior

In [31]:
prior, prior.sum(), prior/prior.sum()

(array([ 9690602, 50087034,  6102428]),
 65880064,
 array([ 0.14709461,  0.76027604,  0.09262936]))

In [32]:
tran, tran.sum(), tran.sum(axis=1), nu.normalize_confusion_matrix(tran)[1]

(array([[ 9542818,   146892,      261],
        [  147271, 49841561,    95201],
        [     120,    95184,  6006735]]),
 65876043,
 array([ 9689971, 50084033,  6102039]),
 array([[  9.84813886e-01,   1.51591785e-02,   2.69350651e-05],
        [  2.94047806e-03,   9.95158697e-01,   1.90082536e-03],
        [  1.96655577e-05,   1.55987204e-02,   9.84381614e-01]]))

In [33]:
init, init.sum(), init / init.sum()

(array([   0, 1295,    4]),
 1299,
 array([ 0.        ,  0.99692071,  0.00307929]))

In [34]:
path = os.path.join(data_root, "..", "outputs", "eval", "priors.h5")
path

'/nm-raid/audio/work/abdullah/nm-rennet/rennet-x/data/../outputs/eval/priors.h5'

In [36]:
with hFile(path, 'x') as f:
    for d, p in zip((init, tran, prior), ('init', 'tran', 'priors')):
        f.create_dataset(p, data=d)
        f.flush()

In [37]:
with hFile(path, 'r') as f:
    for p in ['init', 'tran', 'priors']:
        print(np.array(f[p]))

[   0 1295    4]
[[ 9542818   146892      261]
 [  147271 49841561    95201]
 [     120    95184  6006735]]
[ 9690602 50087034  6102428]
