In [None]:
import pygad
import numpy
import random
from pomegranate import DiscreteDistribution, HiddenMarkovModel
from hmm import ChromosomeHMM

In [None]:
def normalize_rows(prob_array):
    axis = prob_array.ndim -1
    row_sums = numpy.sum(prob_array, axis=axis, keepdims=True)
    return prob_array / row_sums

In [None]:
def create_discrete_hmm(num_states, alphabet):

    # random.rand(row_count, col_count)
    state_probabilities = numpy.random.rand(num_states, len(alphabet))
    state_probabilities = normalize_rows(state_probabilities)
    state_dists = [DiscreteDistribution(dict(zip(alphabet, probs))) for probs in state_probabilities]
    
    # create random nxn matrix
    trans_mat = numpy.random.rand(num_states, num_states)
    trans_mat = normalize_rows(trans_mat)

    starts = numpy.random.rand(num_states)
    starts = normalize_rows(starts)

    model = HiddenMarkovModel.from_matrix(trans_mat, state_dists, starts)
    return model

In [None]:
def generate_random_solution_vector(num_states, alphabet):
    num_symbols=len(alphabet)

    start_probs = normalize_rows(numpy.random.rand(num_states))
    emission_probs = normalize_rows(numpy.random.rand(num_states, num_symbols))
    transition_probs = normalize_rows(numpy.random.rand(num_states, num_states))

    vector = [*start_probs, *emission_probs.flatten(), *transition_probs.flatten()]
    return vector

In [None]:
def hmm_from_vector(vector, n_states, alphabet):
    n_symbols = len(alphabet)
    start_probs_slice = n_states 
    state_probs_slice = start_probs_slice + n_states*n_symbols

    
    start_probs = vector[:start_probs_slice]

    state_probs_vector = vector[start_probs_slice: state_probs_slice]
    state_probs_matrix = numpy.reshape(state_probs_vector, (n_states, n_symbols))
    state_probs_dists = [DiscreteDistribution(dict(zip(alphabet, probs))) for probs in state_probs_matrix]

    trans_probs_vector = vector[state_probs_slice:]
    trans_probs_matrix = numpy.reshape(trans_probs_vector, (n_states, n_states))

    model = HiddenMarkovModel.from_matrix(trans_probs_matrix, state_probs_dists, start_probs)
    return model

In [None]:
test_hmm = create_discrete_hmm(4, list('abcdef'))
print(test_hmm.to_json())

# representation as dictionary

# mutation 


# representation as matrix dict
# emission_probs => N*M numpy array
# transition_probs => N*N numpy array
# initial_probs => N numpy array

# HMM-matrix representation to HMM


In [None]:
init_range_low = 0
init_range_high = 1

N_STATES = 6
ALPHABET = list('ABCDEFG')

parent_hmm = create_discrete_hmm(N_STATES, ALPHABET)

# Genetic Algorithm Steps
def fitness_func(solution, solution_idx):
    samples = parent_hmm.sample(1, 50)
    child_hmm = hmm_from_vector(solution, N_STATES, ALPHABET)

    total_score = 0
    for sample in samples:
        total_score += child_hmm.log_probability(sample)
    mean_score = total_score/len(samples)
    return mean_score


def crossover_func(parents, offspring_size, ga_instance):
    offspring = []
    idx = 0
    while len(offspring) != offspring_size[0]:
        parent1 = parents[idx % parents.shape[0], :].copy()
        parent2 = parents[(idx + 1) % parents.shape[0], :].copy()

        random_split_point = numpy.random.choice(range(offspring_size[1]))

        parent1[random_split_point:] = parent2[random_split_point:]

        offspring.append(parent1)

        idx += 1

    return numpy.array(offspring)


# Random Child Vector
possible_solution = generate_random_solution_vector(N_STATES, ALPHABET)
print(possible_solution)

# number of solutions to be selected as parents
num_parents_mating=2

# Initial Population (can not be generated py pygad because elements have to be row-stochastic)
initial_population = []

# Evaluate fitness of vector
# fitness = fitness_func(possible_solution, 0)
# print(fitness)

ga_instance = pygad.GA(num_generations=10,
                       sol_per_pop=5,
                       num_parents_mating=2,
                       num_genes=(N_STATES + N_STATES**2 + N_STATES*len(ALPHABET)),
                       fitness_func=fitness_func,
                       crossover_type=crossover_func,
                       init_range_high=init_range_high,
                       init_range_low=init_range_low)

ga_instance.run()
ga_instance.plot_fitness()