In [2]:
from hmm.bw import train_multiple_observations
from hmm.hmm import random_left_right_hmm_params
from data.digits import DigitDataset

hmm_params = random_left_right_hmm_params(n_states=4, n_symbols=128)
train_data = DigitDataset(dataset='train')
observations = train_data.get_first_n_observations(0, 10)
reestimated_params, trace = train_multiple_observations(hmm_params, observations, n_iterations=10)

In [4]:
reestimated_params.emission_matrix

array([[0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        6.30959891e-002, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 1.96661975e-051,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 4.76464937e-001,
        0.00000000e+000, 0.00000000e+000, 9.37593941e-003,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+00

In [1]:
%load_ext autoreload
%autoreload 2

from hmm.hmm import random_left_right_hmm_params
import numpy as np
import hmm.bw as bw
from data.digits import DigitDataset
import matplotlib.pyplot as plt
import seaborn as sns
from ga.numba_ga import GaHMM

import ga.fitness as fitness
import ga.mutation as mutation
import ga.selection as selection
import ga.crossover as crossover


#### Drawing n random HMMs and then optimizing them for k steps

In [2]:
training_data = DigitDataset('train')
n_samples = 10
observations = training_data.get_first_n_observations(category=0, n_observations=n_samples)

n_hmms = 50
n_iterations = 2000

n_states = 4
n_symbols = 128

# gabw.population

In [4]:
gabw = GaHMM(
    n_symbols=n_symbols,
    n_states=n_states,
    population_size=n_hmms,
    n_generations=10
)
gabw.fitness_func = fitness.mean_log_prob_fitness(observations)
gabw.mutation_func = mutation.constant_uniform_mutation_factory(mutation_threshold=0.001)
# gabw.mutation_func = mutation.dynamic_uniform_mutation_factory(0.001, beta=1)
gabw.crossover_func = crossover.rank_weighted(crossover.uniform_crossover)
gabw.parent_select_func = selection.rank_selection_factory(gabw.population_size)
gabw.train_population_with_baum_welch(observations, 10)
gabw.start()
gabw.plot()

IndexError: index 21 is out of bounds for axis 1 with size 21

In [None]:


all_hmm_params = [random_left_right_hmm_params(n_states, n_symbols) for i in range(n_hmms)]
log_probability_of_hmm_after_iteration = np.empty((n_hmms, n_iterations))


for i in range(len(all_hmm_params)):
    hmm_params = all_hmm_params[i]
    reestimated_hmm_params, log_probaility_after_iteration = bw.train_multiple_observations(hmm_params, observations, n_iterations)
    log_probability_of_hmm_after_iteration[i, :] = log_probaility_after_iteration

In [None]:
gabw_after_bw = GaHMM(
    n_symbols=n_symbols,
    n_states=n_states,
    population_size=n_hmms,
    n_generations=(n_iterations-10)
)
gabw_after_bw.fitness_func = fitness.numba_mean_log_prob_fitness(observations)
gabw_after_bw.mutation_func = mutation.numba_constant_uniform_mutation2(mutation_threshold=0.001)
gabw_after_bw.crossover_func = crossover.rank_weighted(crossover.uniform_crossover)
gabw_after_bw.parent_select_func = selection.rank_selection_factory(gabw_after_bw.population_size)
# gabw.train_population_with_baum_welch(observations, 1)
gabw_after_bw.train_population_with_baum_welch(observations, n_iterations=10)
gabw_after_bw.start()

In [None]:
gabw_before_bw = GaHMM(
    n_symbols=n_symbols,
    n_states=n_states,
    population_size=n_hmms,
    n_generations=200
)
gabw_before_bw.fitness_func = fitness.numba_mean_log_prob_fitness(observations)
gabw_before_bw.mutation_func = mutation.numba_constant_uniform_mutation2(mutation_threshold=0.75)
gabw_before_bw.crossover_func = crossover.rank_weighted(crossover.uniform_crossover)
gabw_before_bw.parent_select_func = selection.rank_selection_factory(gabw_before_bw.population_size)
# gabw.train_population_with_baum_welch(observations, 1)
gabw_before_bw.start()
# gabw_before_bw.train_population_with_baum_welch(observations, n_iterations=10)
gabw_before_bw.plot()

In [None]:
gabw.population[0]

In [None]:
sns.lineplot(data=log_probability_of_hmm_after_iteration.T / n_samples)

In [None]:
gabw.plot()

In [None]:

gabw_after_bw.plot()

In [None]:
gabw_before_bw.plot()