In [4]:
from hmmlearn import hmm
import random
import numpy as np
from sklearn.utils import check_random_state
from itertools import product

### Set up System to be Modelled

In [5]:
states = ["happy","sad","angry"]
n_states = len(states)

In [6]:
observations_dict={0:"rom-coms",
                   1:"facebook",
                   2:"dishes",
                   3:"whiskey",
                   4:"guitar"}
n_features = len(observations_dict)

In [21]:
model = hmm.MultinomialHMM(n_components=n_states,
                           n_iter=100,
                           tol=1,
                           verbose=True,
                           init_params="ste")

### Initialize Model with random _Start Probability_, _Transition Probability_ and _Emission Probability_ 

In [15]:
random_state =check_random_state(1262364)
startprob = random_state.rand(n_states)
transprob = random_state.rand(n_states, n_states)
emissionprob = random_state.rand(n_states, n_features)

In [16]:
def normalize(a, axis=None):
    a_sum = a.sum(axis)
    if axis and a.ndim > 1:
        a_sum[a_sum == 0] = 1
        shape = list(a.shape)
        shape[axis] = 1
        a_sum.shape = shape
    a /= a_sum

In [17]:
normalize(startprob, axis = 0)
normalize(transprob, axis = 1)
normalize(emissionprob, axis = 1)

In [19]:
print("Initial Probability of states:\n", startprob)
print("Initial Transition Probability:\n", transprob)
print("Initial Emission Probability:\n", emissionprob)

Initial Probability of states:
 [ 0.66745146  0.30811764  0.02443091]
Initial Transition Probability:
 [[ 0.37170102  0.21095157  0.41734741]
 [ 0.3429768   0.35836152  0.29866168]
 [ 0.40965589  0.23887871  0.3514654 ]]
Initial Emission Probability:
 [[ 0.1033951   0.40292229  0.32794463  0.12377851  0.04195948]
 [ 0.23339948  0.19291119  0.1682474   0.37836137  0.02708056]
 [ 0.27825058  0.24215855  0.08723548  0.30362295  0.08873244]]


In [20]:
model.startprob_ = startprob
model.transmat_ = transprob
model.emissionprob_ = emissionprob

### Load data & train model

In [22]:
observations = np.loadtxt("training_observations.csv", dtype = int)
observations = np.array([[x] for x in observations])

In [23]:
print(observations[:10], "...")
print("n_observations: ", len(observations))

[[0]
 [4]
 [0]
 [0]
 [0]
 [1]
 [2]
 [0]
 [0]
 [4]] ...
n_observations:  1000000


In [25]:
print(observations[:20].T[0], "...")
human_read = [observations_dict[obs] for obs in observations[:20].T[0]] 
print(human_read, "...")

[0 4 0 0 0 1 2 0 0 4 4 4 0 4 0 1 4 4 4 3] ...
['rom-coms', 'guitar', 'rom-coms', 'rom-coms', 'rom-coms', 'facebook', 'dishes', 'rom-coms', 'rom-coms', 'guitar', 'guitar', 'guitar', 'rom-coms', 'guitar', 'rom-coms', 'facebook', 'guitar', 'guitar', 'guitar', 'whiskey'] ...


In [26]:
model_trained = model.fit(observations)

         1    -1672500.6529             +nan
         2    -1532486.8020     +140013.8509
         3    -1531673.8124        +812.9896
         4    -1530767.9531        +905.8593
         5    -1529708.6971       +1059.2560
         6    -1528433.2914       +1275.4057
         7    -1526885.0550       +1548.2364
         8    -1525034.4099       +1850.6451
         9    -1522912.5344       +2121.8755
        10    -1520640.5752       +2271.9593
        11    -1518417.7706       +2222.8045
        12    -1516448.5736       +1969.1971
        13    -1514853.4941       +1595.0795
        14    -1513637.5943       +1215.8998
        15    -1512728.7141        +908.8802
        16    -1512034.8208        +693.8933
        17    -1511479.3649        +555.4559
        18    -1511010.7180        +468.6469
        19    -1510598.2404        +412.4776
        20    -1510225.2184        +373.0220
        21    -1509882.9805        +342.2379
        22    -1509567.0180        +315.9624
        23

### How have the emission and transition probabilities changed?

In [27]:
emission = model_trained.emissionprob_

In [28]:
print("\t\t".join([" "]+[observations_dict[i] for i in range(0, len(observations_dict))]))
for i in range(len(states)):
    print("\t\t".join(["state_" + str(i)] + [ '{:.2f}'.format(j) for j in emission[i]]))

 		rom-coms		facebook		dishes		whiskey		guitar
state_0		0.16		0.10		0.06		0.06		0.62
state_1		0.40		0.10		0.18		0.22		0.10
state_2		0.08		0.10		0.03		0.22		0.58


In [720]:
transition = model_trained.transmat_

In [721]:
print("\t".join([" "]+["state_" + str(i) for i in range(0, len(states))]))
for i in range(len(states)):
    print("\t".join(["state_" + str(i)] + [ '{:.2f}'.format(j) for j in transition[i]]))

 	state_0	state_1	state_2
state_0	0.77	0.06	0.17
state_1	0.06	0.75	0.19
state_2	0.13	0.10	0.77


### How to the states generated by the model map to our understanding of the emotional states?

In [735]:
state_mapping = {0:"happy", 1:"angry", 2:"sad"}

In [748]:
create_visualizations(model_trained, observations_dict, state_mapping, ".")

In [751]:
from IPython.display import IFrame
IFrame('emissionprobmat.html', width=800, height=350)

In [752]:
from IPython.display import IFrame
IFrame('transmat.html', width=800, height=350)

### What does the robot interpret from observing the human?

In [None]:
observations = np.loadtxt("new_observations.csv", dtype = int)
observations = np.array([[x] for x in observations])
print(observations[:20].T[0], "...")

In [723]:
logprob, robot_hears = model_trained.decode(observations, algorithm="viterbi")
print(robot_hears[:50])

[1 2 0 0 0 0 0 0 0 0 2 2 2 2 0 0 2 2 2 2 2 0 0 2 2 2 2 2 0 0 0 0 0 1 1 1 1
 2 2 2 2 1 1 1 1 1 1 1 1 1]


In [732]:
print("Human does:", ", ".join(map(lambda x: observations_dict[x[0]], observations[:20])),"...\n")
print("Robot thinks:", ", ".join(map(lambda x: state_mapping[x], robot_hears[:20])),"...")

Human does: whiskey, netflix, guitar, facebook, guitar, facebook, guitar, guitar, guitar, guitar, netflix, netflix, netflix, netflix, guitar, guitar, netflix, guitar, netflix, whiskey ...

Robot thinks: angry, sad, happy, happy, happy, happy, happy, happy, happy, happy, sad, sad, sad, sad, happy, happy, sad, sad, sad, sad ...


### How certain is the robot of his interpretation?

In [728]:
robot_thinks = model_trained.predict_proba(observations)

In [734]:
for i in range(10,20):
    print("Human does: {:}".format(observations_dict[observations[i][0]]))
    #print("The value is {:,}".format(x))
    print("Robot thinks: {:}: {:.2f}, {:}: {:.2f}, {:}: {:.2f}\n".format(
                                    state_mapping[0], robot_thinks[i][0],
                                    state_mapping[1], robot_thinks[i][1],
                                    state_mapping[2], robot_thinks[i][2]))
print(".....")

Human does: netflix
Robot thinks: happy: 0.13, angry: 0.01, sad: 0.86

Human does: netflix
Robot thinks: happy: 0.03, angry: 0.00, sad: 0.96

Human does: netflix
Robot thinks: happy: 0.03, angry: 0.00, sad: 0.97

Human does: netflix
Robot thinks: happy: 0.11, angry: 0.01, sad: 0.88

Human does: guitar
Robot thinks: happy: 0.72, angry: 0.02, sad: 0.26

Human does: guitar
Robot thinks: happy: 0.76, angry: 0.02, sad: 0.22

Human does: netflix
Robot thinks: happy: 0.43, angry: 0.01, sad: 0.56

Human does: guitar
Robot thinks: happy: 0.55, angry: 0.02, sad: 0.43

Human does: netflix
Robot thinks: happy: 0.22, angry: 0.03, sad: 0.76

Human does: whiskey
Robot thinks: happy: 0.25, angry: 0.13, sad: 0.62

.....


In [747]:
def _create_transmat_nodes(transmat, states_encoding):
    """
    It creates the nodes for a Sankey diagram using a transition matrix and a dictionary for the
    states names.
    :param transmat: HMM transition matrix
    :param states_dict: Dictionary mapping state label to state name {0:'exploring', ... }.
            keys must be integers ascending from 0
    :return: A list of array with the format: ['origin', 'destination', probability]
    """

    n_states = len(states_encoding)
    return [["From: " + states_encoding[i[0]], "To: " + states_encoding[i[1]], transmat[i[0], i[1]]]
            for i in product(range(n_states), repeat=2)]


def _create_emissionprob_nodes(emissionprob, states_encoding, hmm_dict):
    """
    It creates the nodes for a Sankey diagram using a emission prob matrix and a dictionary for the
    states names.
    :param emissionprob: HMM transition matrix
    :param states_dict: Dictionary with the state's names
    :param hmm_dict: dictionary used for trained model
    :return: A list of array with the format: ['state', 'observation', probability]
    """

    n_states = len(states_encoding)
    return [[states_encoding[i[0]], hmm_dict[i[1]],
             emissionprob[i[0], i[1]]] for i in product(range(n_states), hmm_dict.keys())]


def _create_js(nodes_list, file_path, filename):
    """
    Method to create a html/js file using google charts javascript library for sankey diagrams
    :param nodes_list: A list of arrays with the format: ['state', 'observation', probability]
    :param file_path: local or s3 location to save file
    :param filename: name for file without extension
    """

    header = """<html>
<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
       <div id="sankey_basic" style="width: 600px; height: 320px;"></div>
       <script>
       google.charts.load('current', {'packages':['sankey']});
       google.charts.setOnLoadCallback(drawChart);

       function drawChart() {
       var data = new google.visualization.DataTable();
       data.addColumn('string', 'From');
       data.addColumn('string', 'To');
       data.addColumn('number', 'Weight');
       data.addRows("""

    footer = """);

        // Sets chart options.
        var options = {
          width: 600,
        };

        // Instantiates and draws our chart, passing in some options.
        var chart = new google.visualization.Sankey(document.getElementById('sankey_basic'));
        chart.draw(data, options);
      }
      </script>
</html>
    """

    if file_path[-1] != '/':
        file_path += '/'
    filename += '.html'

    data = header+str(nodes_list)+footer
    file = open(file_path+filename, 'w')
    file.write(data)
    file.close()


def create_visualizations(hmm_trained, hmm_dict, states_encoding, file_path):
    """

    :param hmm_trained: trained hmm model
    :param hmm_dict: dictionary used for trained model
    :param states_encoding: Dictionary mapping state name to state label {'exploring':0, ... }.
            As output by model_label_fitting.fit_labels
    :param file_path: string of path to directory where html sankey diagrams to be saved
    :return: none
    """
    transmat = hmm_trained.transmat_
    transmat_nodes = _create_transmat_nodes(transmat, states_encoding)
    _create_js(transmat_nodes, file_path, 'transmat')

    emissionprob = hmm_trained.emissionprob_
    emissionprob_nodes = _create_emissionprob_nodes(emissionprob, states_encoding, hmm_dict)
    _create_js(emissionprob_nodes, file_path,'emissionprobmat')