In [50]:
from hmmlearn import hmm
import random
import numpy as np
from sklearn.utils import check_random_state
from itertools import product

### Set up System to be Modelled

In [51]:
states = ["happy","sad","angry"]
n_states = len(states)

In [52]:
observations_dict={0:"rom-coms",
                   1:"facebook",
                   2:"dishes",
                   3:"whiskey",
                   4:"guitar"}
n_features = len(observations_dict)

In [58]:
model = hmm.MultinomialHMM(n_components=n_states,
                           n_iter=100,
                           tol=1,
                           verbose=True,
                           init_params="ste")

### Initialize Model with random _Start Probability_, _Transition Probability_ and _Emission Probability_ 

In [53]:
random_state =check_random_state(1262364)
startprob = random_state.rand(n_states)
transprob = random_state.rand(n_states, n_states)
emissionprob = random_state.rand(n_states, n_features)

In [54]:
def normalize(a, axis=None):
    a_sum = a.sum(axis)
    if axis and a.ndim > 1:
        a_sum[a_sum == 0] = 1
        shape = list(a.shape)
        shape[axis] = 1
        a_sum.shape = shape
    a /= a_sum

In [55]:
normalize(startprob, axis = 0)
normalize(transprob, axis = 1)
normalize(emissionprob, axis = 1)

In [56]:
print("Initial Probability of states:\n", startprob)
print("Initial Transition Probability:\n", transprob)
print("Initial Emission Probability:\n", emissionprob)

Initial Probability of states:
 [ 0.66745146  0.30811764  0.02443091]
Initial Transition Probability:
 [[ 0.37170102  0.21095157  0.41734741]
 [ 0.3429768   0.35836152  0.29866168]
 [ 0.40965589  0.23887871  0.3514654 ]]
Initial Emission Probability:
 [[ 0.1033951   0.40292229  0.32794463  0.12377851  0.04195948]
 [ 0.23339948  0.19291119  0.1682474   0.37836137  0.02708056]
 [ 0.27825058  0.24215855  0.08723548  0.30362295  0.08873244]]


In [57]:
model.startprob_ = startprob
model.transmat_ = transprob
model.emissionprob_ = emissionprob

### Load data & train model

In [59]:
observations = np.loadtxt("training_observations.csv", dtype = int)
observations = np.array([[x] for x in observations])

In [60]:
print(observations[:10], "...")
print("n_observations: ", len(observations))

[[3]
 [0]
 [0]
 [3]
 [2]
 [0]
 [0]
 [2]
 [4]
 [0]] ...
n_observations:  100000


In [61]:
print(observations[:20].T[0], "...")
human_read = [observations_dict[obs] for obs in observations[:20].T[0]] 
print(human_read, "...")

[3 0 0 3 2 0 0 2 4 0 0 3 0 1 0 1 0 0 4 4] ...
['whiskey', 'rom-coms', 'rom-coms', 'whiskey', 'dishes', 'rom-coms', 'rom-coms', 'dishes', 'guitar', 'rom-coms', 'rom-coms', 'whiskey', 'rom-coms', 'facebook', 'rom-coms', 'facebook', 'rom-coms', 'rom-coms', 'guitar', 'guitar'] ...


In [62]:
model_trained = model.fit(observations)

         1     -164747.2373             +nan
         2     -153037.1482      +11710.0892
         3     -152866.7003        +170.4479
         4     -152699.2429        +167.4574
         5     -152530.3798        +168.8631
         6     -152359.0513        +171.3285
         7     -152187.1565        +171.8948
         8     -152019.0006        +168.1559
         9     -151860.1996        +158.8010
        10     -151716.0657        +144.1339
        11     -151589.9943        +126.0714
        12     -151482.5973        +107.3971
        13     -151391.9678         +90.6294
        14     -151314.7771         +77.1908
        15     -151247.5039         +67.2732
        16     -151187.2571         +60.2468
        17     -151132.0594         +55.1977
        18     -151080.7559         +51.3034
        19     -151032.7687         +47.9872
        20     -150987.8471         +44.9216
        21     -150945.8805         +41.9667
        22     -150906.7819         +39.0986
        23

### How have the emission and transition probabilities changed?

In [63]:
emission = model_trained.emissionprob_

In [64]:
print("\t  ".join(["\t"]+[observations_dict[i] for i in range(0, len(observations_dict))]))
for i in range(len(states)):
    print("\t\t".join(["state_" + str(i)] + [ '{:.2f}'.format(j) for j in emission[i]]))

		  rom-coms	  facebook	  dishes	  whiskey	  guitar
state_0		0.11		0.10		0.04		0.14		0.60
state_1		0.10		0.10		0.30		0.39		0.10
state_2		0.62		0.10		0.09		0.09		0.10


In [65]:
transition = model_trained.transmat_

In [66]:
print("\t".join([" "]+["state_" + str(i) for i in range(0, len(states))]))
for i in range(len(states)):
    print("\t".join(["state_" + str(i)] + [ '{:.2f}'.format(j) for j in transition[i]]))

 	state_0	state_1	state_2
state_0	0.82	0.05	0.13
state_1	0.06	0.78	0.16
state_2	0.11	0.12	0.78


### How to the states generated by the model map to our understanding of the emotional states?

In [35]:
state_mapping = {0:"happy", 1:"angry", 2:"sad"}

In [67]:
create_visualizations(model_trained, observations_dict, state_mapping, ".")

In [68]:
from IPython.display import IFrame
IFrame('emissionprobmat.html', width=800, height=350)

In [40]:
from IPython.display import IFrame
IFrame('transmat.html', width=800, height=350)

### What does the robot interpret from observing the human?

In [69]:
observations = np.loadtxt("new_observations.csv", dtype = int)
observations = np.array([[x] for x in observations])
print(observations[:20].T[0], "...")

[0 4 0 0 0 1 2 0 0 4 4 4 0 4 0 1 4 4 4 3] ...


In [70]:
logprob, robot_hears = model_trained.decode(observations)
print(robot_hears[:20], "...")

[1 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0] ...


In [71]:
print("Human does:", ", ".join(map(lambda x: observations_dict[x[0]], observations[10:30])),"...\n")
print("Robot thinks:", ", ".join(map(lambda x: state_mapping[x], robot_hears[10:30])),"...")

Human does: guitar, guitar, rom-coms, guitar, rom-coms, facebook, guitar, guitar, guitar, whiskey, guitar, rom-coms, whiskey, guitar, dishes, rom-coms, rom-coms, rom-coms, rom-coms, facebook ...

Robot thinks: happy, happy, happy, happy, happy, happy, happy, happy, happy, happy, happy, happy, happy, happy, sad, sad, sad, sad, sad, sad ...


### How certain is the robot of his interpretation?

In [72]:
robot_thinks = model_trained.predict_proba(observations)

In [73]:
for i in range(20,30):
    print("Human does: {:}".format(observations_dict[observations[i][0]]))
    #print("The value is {:,}".format(x))
    print("Robot thinks: {:}: {:.2f}, {:}: {:.2f}, {:}: {:.2f}\n".format(
                                    state_mapping[0], robot_thinks[i][0],
                                    state_mapping[1], robot_thinks[i][1],
                                    state_mapping[2], robot_thinks[i][2]))
print(".....")

Human does: guitar
Robot thinks: happy: 0.84, angry: 0.05, sad: 0.10

Human does: rom-coms
Robot thinks: happy: 0.50, angry: 0.08, sad: 0.42

Human does: whiskey
Robot thinks: happy: 0.50, angry: 0.25, sad: 0.25

Human does: guitar
Robot thinks: happy: 0.51, angry: 0.22, sad: 0.27

Human does: dishes
Robot thinks: happy: 0.17, angry: 0.29, sad: 0.54

Human does: rom-coms
Robot thinks: happy: 0.04, angry: 0.05, sad: 0.91

Human does: rom-coms
Robot thinks: happy: 0.01, angry: 0.02, sad: 0.97

Human does: rom-coms
Robot thinks: happy: 0.01, angry: 0.01, sad: 0.98

Human does: rom-coms
Robot thinks: happy: 0.02, angry: 0.02, sad: 0.95

Human does: facebook
Robot thinks: happy: 0.09, angry: 0.11, sad: 0.81

.....


In [37]:
def _create_transmat_nodes(transmat, states_encoding):
    """
    It creates the nodes for a Sankey diagram using a transition matrix and a dictionary for the
    states names.
    :param transmat: HMM transition matrix
    :param states_dict: Dictionary mapping state label to state name {0:'exploring', ... }.
            keys must be integers ascending from 0
    :return: A list of array with the format: ['origin', 'destination', probability]
    """

    n_states = len(states_encoding)
    return [["From: " + states_encoding[i[0]], "To: " + states_encoding[i[1]], transmat[i[0], i[1]]]
            for i in product(range(n_states), repeat=2)]


def _create_emissionprob_nodes(emissionprob, states_encoding, hmm_dict):
    """
    It creates the nodes for a Sankey diagram using a emission prob matrix and a dictionary for the
    states names.
    :param emissionprob: HMM transition matrix
    :param states_dict: Dictionary with the state's names
    :param hmm_dict: dictionary used for trained model
    :return: A list of array with the format: ['state', 'observation', probability]
    """

    n_states = len(states_encoding)
    return [[states_encoding[i[0]], hmm_dict[i[1]],
             emissionprob[i[0], i[1]]] for i in product(range(n_states), hmm_dict.keys())]


def _create_js(nodes_list, file_path, filename):
    """
    Method to create a html/js file using google charts javascript library for sankey diagrams
    :param nodes_list: A list of arrays with the format: ['state', 'observation', probability]
    :param file_path: local or s3 location to save file
    :param filename: name for file without extension
    """

    header = """<html>
<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
       <div id="sankey_basic" style="width: 600px; height: 320px;"></div>
       <script>
       google.charts.load('current', {'packages':['sankey']});
       google.charts.setOnLoadCallback(drawChart);

       function drawChart() {
       var data = new google.visualization.DataTable();
       data.addColumn('string', 'From');
       data.addColumn('string', 'To');
       data.addColumn('number', 'Weight');
       data.addRows("""

    footer = """);

        // Sets chart options.
        var options = {
          width: 600,
        };

        // Instantiates and draws our chart, passing in some options.
        var chart = new google.visualization.Sankey(document.getElementById('sankey_basic'));
        chart.draw(data, options);
      }
      </script>
</html>
    """

    if file_path[-1] != '/':
        file_path += '/'
    filename += '.html'

    data = header+str(nodes_list)+footer
    file = open(file_path+filename, 'w')
    file.write(data)
    file.close()


def create_visualizations(hmm_trained, hmm_dict, states_encoding, file_path):
    """

    :param hmm_trained: trained hmm model
    :param hmm_dict: dictionary used for trained model
    :param states_encoding: Dictionary mapping state name to state label {'exploring':0, ... }.
            As output by model_label_fitting.fit_labels
    :param file_path: string of path to directory where html sankey diagrams to be saved
    :return: none
    """
    transmat = hmm_trained.transmat_
    transmat_nodes = _create_transmat_nodes(transmat, states_encoding)
    _create_js(transmat_nodes, file_path, 'transmat')

    emissionprob = hmm_trained.emissionprob_
    emissionprob_nodes = _create_emissionprob_nodes(emissionprob, states_encoding, hmm_dict)
    _create_js(emissionprob_nodes, file_path,'emissionprobmat')