# Imports

In [1]:
%reload_ext autoreload
%autoreload 2

from src.data.dataset import make_dataset
from src.data.labels import make_labels
from src.models.HTK import hmm
from src.data.size_distribution import to_htk
from src.data.labels import read_dmps_maual_labels

from src import features
from src import models

from src.visualization import visualize as viz

from src.models.HTK import htktools as htkt

# Inicializo Verta

In [4]:
from verta import Client

HOST = "http://127.0.0.1:3000/"

PROJECT_NAME = "NPF Detector"
EXPERIMENT_NAME = "2_normalize_real_prediction"
EXPERIMENT_DESCRIPTION = "Data Version -> 2. Adapted model to real labels.Notebook -> 2-EventNoEvent_HInit_normalized_adapted.ipynb"


client = Client(HOST)
proj = client.set_project(PROJECT_NAME)
expt = client.set_experiment(EXPERIMENT_NAME)
run = client.set_experiment_run(desc=EXPERIMENT_DESCRIPTION)



HTTPError: 503 Server Error: connection error: desc = "transport: Error while dialing dial tcp 172.21.0.3:8085: connect: connection refused" for url: http://127.0.0.1:3000/api/v1/modeldb/project/verifyConnection

# Preparo los datos

In [4]:
data_version = 2
normalize = True

train_data, test_data = make_dataset(normalize, data_version)

run.log_observation('number_of_test_files', test_data['count'])
run.log_observation('number_of_train_files', train_data['count'])

run.log_hyperparameters({'data_version' : data_version, 'normalize': normalize})

INFO:root:Converting raw files to HTK format ...
INFO:root:Data version: 2
INFO:numexpr.utils:NumExpr defaulting to 4 threads.
INFO:root:Generating script (.scp) files...
INFO:root:Adding deltas and acelerations...
INFO:root:Test files:	501
INFO:root:Train files:	4557


# Pongo las etiquetas

In [5]:
thresholds = {'nuc_threshold': 0.15,        # 1/cm3/10min
              'pos_vol_threshold': 200,     # 1/m3^3/10min
              'neg_vol_threshold': -5000    # 1/cm3/10min
              }  

labels = make_labels(thresholds, how='event-noevent', data_version=2)

run.log_hyperparameters(thresholds)

INFO:root:Creating labels ...
INFO:root:Labels created OK!
INFO:root:Generating Master Label File...


In [6]:
viz.generate_plots('synth_test_data', test_data, labels)

## Inicializo el modelo de Markov

Edito el el archivo hmmdefs y le agrego los modelos creados con HInit

In [7]:
vf = 0.1
mv = 0.1

model = hmm.initialize(train_data, variance_floor=vf, minimum_variance=mv)

run.log_hyperparameters({'var_floor': vf, 'var_min': mv})

INFO:root:Initializing model...


Calculating Fixed Variance
  HMM Prototype: ../src/models/HTK/misc/proto
  Segment Label: None
  Num Streams  : 1
  UpdatingMeans: Yes
  Target Direct: ../models/hmm/0
218736 speech frames accumulated
Updating HMM Means and Covariances
Output written to directory ../models/hmm/0
Var floor macros output to file ../models/hmm/0/vFloors



### Estimo los parametros y evaluo los resultados

In [8]:
model = hmm.train(model, train_data, labels)
results = hmm.test(model, test_data, labels)

for k,v in results.items():
    run.log_observation(k,v)

Pruning-Off

Pruning-Off

Pruning-Off

/home/gfogwil/Documentos/Facultad/Tesis/programs/htk/HTKTools/HVite -C ../src/models/HTK/misc/config -H ../models/hmm/3/macros -H ../models/hmm/3/hmmdefs -p 0 -s 5 -A -S ../data/interim/test.scp -i ../data/interim/results.mlf -w ../src/models/HTK/misc/wdnet ../src/models/HTK/misc/dict ../src/models/HTK/misc/monophones 

  Date: Tue Sep  8 14:38:41 2020
  Ref : ../data/interim/labels.mlf
  Rec : ../data/interim/results.mlf
------------------------ Overall Results --------------------------
SENT: %Correct=93.41 [H=156, S=11, N=167]
WORD: %Corr=91.44, Acc=90.99 [H=203, D=17, S=2, I=1, N=222]
------------------------ Confusion Matrix -------------------------
       e   n 
           e  Del [ %c / %e]
   e  39   0    6
  ne   2  164  11 [98.8/0.9]
Ins    1   0



In [8]:
viz.generate_plots('labeled_synth_test_data', test_data, labels, results)

# Aumento el numero de Gausianas

In [9]:
edit_commands = ['MU 2 {*.state[2-4].mix}']

model = hmm.edit(model, edit_commands)
model = hmm.train(model, train_data, labels)

results = hmm.test(model, test_data, labels)

for k,v in results.items():
    run.log_observation(k,v)

for command in edit_commands:    
    run.log_observation('model_edit_commands', command)

HHEd
 2/2 Models Loaded [5 states max, 1 mixes max]

MU 2 {}
 Mixup to 2 components per stream
 MU: Number of mixes increased from 6 to 12

Saving new HMM files ...
Edit Complete

Pruning-Off

Pruning-Off

Pruning-Off

/home/gfogwil/Documentos/Facultad/Tesis/programs/htk/HTKTools/HVite -C ../src/models/HTK/misc/config -H ../models/hmm/7/macros -H ../models/hmm/7/hmmdefs -p 0 -s 5 -A -S ../data/interim/test.scp -i ../data/interim/results.mlf -w ../src/models/HTK/misc/wdnet ../src/models/HTK/misc/dict ../src/models/HTK/misc/monophones 

  Date: Tue Sep  8 14:38:45 2020
  Ref : ../data/interim/labels.mlf
  Rec : ../data/interim/results.mlf
------------------------ Overall Results --------------------------
SENT: %Correct=94.01 [H=157, S=10, N=167]
WORD: %Corr=91.89, Acc=91.89 [H=204, D=16, S=2, I=0, N=222]
------------------------ Confusion Matrix -------------------------
       e   n 
           e  Del [ %c / %e]
   e  40   0    5
  ne   2  164  11 [98.8/0.9]
Ins    0   0



In [10]:
viz.generate_plots('labeled_synth_test_data_2_gauss', test_data, labels, results)

# Adapto el modelo a los datos reales

In [11]:
labeled_real_data, unlabeled_real_data = to_htk()
labels = read_dmps_maual_labels()

In [12]:
result = hmm.predict(model, unlabeled_real_data)
viz.generate_plots('labeled_real_data', unlabeled_real_data, results)

/home/gfogwil/Documentos/Facultad/Tesis/programs/htk/HTKTools/HVite -C ../src/models/HTK/misc/config -H ../models/hmm/7/macros -H ../models/hmm/7/hmmdefs -A -J ../models/classes/ -J ../models/xforms mllr1 -h ../data/interim/2015.test_D_A.real/%* -k -w ../src/models/HTK/misc/wdnet -S ../data/interim/2015.test_D_A.real.scp -i ../data/interim/results.mlf ../src/models/HTK/misc/dict ../src/models/HTK/misc/monophones 

