# Imports

In [1]:
%reload_ext autoreload
%autoreload 2

from src.data.dataset import make_dataset
from src.data.labels import make_labels
from src.data.size_distribution import to_htk
from src.data.labels import read_dmps_maual_labels

from src.models.HTK import hmm

from src import features
from src import models

from src.visualization import visualize as viz

from src.models.HTK import htktools as htkt

In [3]:
import pandas as pd
pd.show_versions()


INSTALLED VERSIONS
------------------
commit           : None
python           : 3.7.4.final.0
python-bits      : 64
OS               : Linux
OS-release       : 4.15.0-129-generic
machine          : x86_64
processor        : x86_64
byteorder        : little
LC_ALL           : None
LANG             : es_AR.UTF-8
LOCALE           : es_AR.UTF-8

pandas           : 1.0.1
numpy            : 1.17.2
pytz             : 2019.3
dateutil         : 2.8.0
pip              : 20.3.3
setuptools       : 49.1.0
Cython           : 0.29.13
pytest           : 5.2.1
hypothesis       : None
sphinx           : 2.2.0
blosc            : None
feather          : None
xlsxwriter       : 1.2.1
lxml.etree       : 4.4.1
html5lib         : 1.0.1
pymysql          : None
psycopg2         : 2.8.4 (dt dec pq3 ext lo64)
jinja2           : 2.10.3
IPython          : 7.8.0
pandas_datareader: None
bs4              : 4.8.1
bottleneck       : 1.2.1
fastparquet      : None
gcsfs            : None
lxml.etree       : 4.4.1
matplot

# Inicializo Verta

In [2]:
from verta import Client

HOST = "http://127.0.0.1:3000/"

PROJECT_NAME = "NPF Detector"
EXPERIMENT_NAME = "2_normalize_adapted"
EXPERIMENT_DESCRIPTION = "Data Version -> 2. Adapted model to real labels.Notebook -> 2-EventNoEvent_HInit_normalized_adapted.ipynb"


client = Client(HOST)
proj = client.set_project(PROJECT_NAME)
expt = client.set_experiment(EXPERIMENT_NAME)
run = client.set_experiment_run(desc=EXPERIMENT_DESCRIPTION)

connection successfully established
set existing Project: NPF Detector from personal workspace
set existing Experiment: 2_normalize_adapted
created new ExperimentRun: Run 299661608683863639896


# Preparo los datos

In [3]:
data_version = 2
normalize = True

train_data, test_data = make_dataset(normalize, data_version)

run.log_observation('number_of_test_files', test_data['count'])
run.log_observation('number_of_train_files', train_data['count'])

run.log_hyperparameters({'data_version' : data_version, 'normalize': normalize})

INFO:root:Converting raw files to HTK format ...
INFO:root:Data version: 2
INFO:numexpr.utils:NumExpr defaulting to 4 threads.
INFO:root:Generating script (.scp) files...
INFO:root:Adding deltas and acelerations...
INFO:root:Test files:	537
INFO:root:Train files:	4521


# Pongo las etiquetas

In [4]:
thresholds = {'nuc_threshold': 0.15,        # 1/cm3/10min
              'pos_vol_threshold': 200,     # 1/m3^3/10min
              'neg_vol_threshold': -5000    # 1/cm3/10min
              }  

labels = make_labels(thresholds, how='event-noevent', data_version=2)

run.log_hyperparameters(thresholds)

INFO:root:Creating labels ...
INFO:root:Labels created OK!
INFO:root:Generating Master Label File...


In [5]:
viz.generate_plots('synth_test_data', test_data, labels)

## Inicializo el modelo de Markov

Edito el el archivo hmmdefs y le agrego los modelos creados con HInit

In [6]:
vf = 0.1
mv = 0.1

model = hmm.initialize(train_data, variance_floor=vf, minimum_variance=mv)

run.log_hyperparameters({'var_floor': vf, 'var_min': mv})

INFO:root:Initializing model...


Calculating Fixed Variance
  HMM Prototype: ../src/models/HTK/misc/proto
  Segment Label: None
  Num Streams  : 1
  UpdatingMeans: Yes
  Target Direct: ../models/hmm/0
217008 speech frames accumulated
Updating HMM Means and Covariances
Output written to directory ../models/hmm/0
Var floor macros output to file ../models/hmm/0/vFloors



### Estimo los parametros y evaluo los resultados

In [7]:
model = hmm.train(model, train_data, labels)
results = hmm.test(model, test_data, labels)

for k,v in results.items():
    run.log_observation(k,v)

Pruning-Off

Pruning-Off

Pruning-Off

/home/gfogwil/Documentos/Facultad/Tesis/programs/htk/HTKTools/HVite -C ../src/models/HTK/misc/config -H ../models/hmm/3/macros -H ../models/hmm/3/hmmdefs -p 0 -s 5 -A -S ../data/interim/test.scp -i ../data/interim/results.mlf -w ../src/models/HTK/misc/wdnet ../src/models/HTK/misc/dict ../src/models/HTK/misc/monophones 

  Date: Tue Dec 22 21:54:16 2020
  Ref : ../data/interim/labels.mlf
  Rec : ../data/interim/results.mlf
------------------------ Overall Results --------------------------
SENT: %Correct=90.50 [H=162, S=17, N=179]
WORD: %Corr=89.47, Acc=89.47 [H=221, D=24, S=2, I=0, N=247]
------------------------ Confusion Matrix -------------------------
       e   n 
           e  Del [ %c / %e]
   e  45   0    6
  ne   2  176  18 [98.9/0.8]
Ins    0   0



In [8]:
viz.generate_plots('labeled_synth_test_data', test_data, labels, results)

# Aumento el numero de Gausianas

In [9]:
edit_commands = ['MU 2 {*.state[2-4].mix}']

model = hmm.edit(model, edit_commands)
model = hmm.train(model, train_data, labels)

results = hmm.test(model, test_data, labels)

for k,v in results.items():
    run.log_observation(k,v)

for command in edit_commands:    
    run.log_observation('model_edit_commands', command)

HHEd
 2/2 Models Loaded [5 states max, 1 mixes max]

MU 2 {}
 Mixup to 2 components per stream
 MU: Number of mixes increased from 6 to 12

Saving new HMM files ...
Edit Complete

Pruning-Off

Pruning-Off

Pruning-Off

/home/gfogwil/Documentos/Facultad/Tesis/programs/htk/HTKTools/HVite -C ../src/models/HTK/misc/config -H ../models/hmm/7/macros -H ../models/hmm/7/hmmdefs -p 0 -s 5 -A -S ../data/interim/test.scp -i ../data/interim/results.mlf -w ../src/models/HTK/misc/wdnet ../src/models/HTK/misc/dict ../src/models/HTK/misc/monophones 

  Date: Tue Dec 22 21:56:14 2020
  Ref : ../data/interim/labels.mlf
  Rec : ../data/interim/results.mlf
------------------------ Overall Results --------------------------
SENT: %Correct=90.50 [H=162, S=17, N=179]
WORD: %Corr=90.69, Acc=89.88 [H=224, D=23, S=0, I=2, N=247]
------------------------ Confusion Matrix -------------------------
       e   n 
           e  Del [ %c / %e]
   e  45   0    6
  ne   0  179  17
Ins    2   0



In [10]:
viz.generate_plots('labeled_synth_test_data_2_gauss', test_data, labels, results)

# Adapto el modelo a los datos reales

In [11]:
labeled_real_data, unlabeled_real_data = to_htk()
labels = read_dmps_maual_labels()

In [12]:
model = hmm.adapt(model, labeled_real_data, labels)
results = hmm.test(model, labeled_real_data, labels)

viz.generate_plots('real_data_for_adaptation', labeled_real_data, labels, results)

run.log_metric('Correct_labels', results['WORD_Corr'])
run.log_artifact('final_model', '../models/hmm/' + str(model), overwrite=True)

  Using baseclass macro "global" from file ../models/classes/global
Attached 12 XFormInfo structures
Attached 12 RegAcc structures
Pruning-Off
Generating transform 2 (1)
  Using baseclass macro "global" from file ../models/classes/global
Estimated XForm 1 using 37837.005142 observations

/home/gfogwil/Documentos/Facultad/Tesis/programs/htk/HTKTools/HVite -C ../src/models/HTK/misc/config -H ../models/hmm/7/macros -H ../models/hmm/7/hmmdefs -A -J ../models/classes/ -J ../models/xforms mllr1 -h ../data/interim/2017.train_D_A.real/%* -k -w ../src/models/HTK/misc/wdnet -S ../data/interim/2017.train_D_A.real.scp -i ../data/interim/results.mlf ../src/models/HTK/misc/dict ../src/models/HTK/misc/monophones 

  Date: Tue Dec 22 21:58:04 2020
  Ref : ../data/raw/dmps/manual_labels.mlf
  Rec : ../data/interim/results.mlf
------------------------ Overall Results --------------------------
SENT: %Correct=32.96 [H=89, S=181, N=270]
WORD: %Corr=60.33, Acc=39.31 [H=333, D=206, S=13, I=116, N=552]
-----

In [14]:
result = hmm.predict(model, unlabeled_real_data)
viz.generate_plots('labeled_real_data', unlabeled_real_data, results)

/home/gfogwil/Documentos/Facultad/Tesis/programs/htk/HTKTools/HVite -C ../src/models/HTK/misc/config -H ../models/hmm/7/macros -H ../models/hmm/7/hmmdefs -A -J ../models/classes/ -J ../models/xforms mllr1 -h ../data/interim/2015.test_D_A.real/%* -k -w ../src/models/HTK/misc/wdnet -S ../data/interim/2015.test_D_A.real.scp -i ../data/interim/results.mlf ../src/models/HTK/misc/dict ../src/models/HTK/misc/monophones 

