In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
convs = {
    0 : "neutral",
    1 : "anger",
    2 : "disgust",
    3 : "fear",
    4 : "happy",
    5 : "sad",
    6 : "surprise",
    -1 : "unknown",
}

INIT = "^"
END = "$"

In [3]:
PATH = "data/sequence_train"
files = os.listdir(PATH)

records = {}
c_records = {}
lm_record = {}


for file in files:
    l_record = {}
    with open(PATH +"/"+ file, "r") as fh:
        last_value = INIT
        counter = 0
        order = 0
        for line in fh:
            if line.strip() == "Neutral,Anger,Disgust,Fear,Happiness,Sadness,Surprise":
                continue
            else:
                value = int(line.strip())

            try:
                assert value in {-1, 0, 1, 2, 3, 4, 5, 6}
                value = convs[value]
            except AssertionError:
                print(file, line, value, sep="<>")

            records.setdefault(last_value, {}).setdefault(value, 0)
            c_records.setdefault((last_value, value), 0)
            l_record.setdefault(value, 0)

            if last_value == value:
                records[last_value][value] *= 0.95
                c_records[(last_value, value)] *= 0.95

            records[last_value][value] += 1
            c_records[(last_value, value)] += 1
            l_record[value] += 1


            last_value = value

        records.setdefault(value, {}).setdefault(END, 0)
        c_records.setdefault((value, END), 0)
        records[value][END] += 1
        c_records[(value, END)] += 1

        for key, v in l_record.items():
            lm_record.setdefault(key, {}).setdefault(value, 0)
            lm_record[key][value] += v



In [4]:
df = pd.DataFrame(records)
print(df)
print()
df = df.reindex(sorted(df.columns), axis=1).sort_index()
print(df)

             ^  neutral  happy  surprise  anger  unknown  disgust   sad  fear
neutral   26.0     20.0  106.0      77.0   32.0     13.0     27.0  14.0  16.0
unknown    7.0     12.0   20.0       9.0    1.0     20.0      1.0   8.0   1.0
surprise   4.0     91.0   69.0      20.0   21.0      7.0      8.0   2.0   8.0
sad        3.0     15.0    3.0       3.0    6.0      5.0      1.0  20.0   9.0
happy     11.0    105.0   20.0      72.0   20.0     19.0      7.0   7.0   7.0
anger      NaN     36.0   15.0      24.0   20.0      4.0      2.0   5.0   5.0
disgust    NaN     16.0   10.0      13.0    3.0      1.0     20.0   3.0   4.0
$          NaN     13.0   18.0       3.0    5.0      9.0      1.0   2.0   NaN
fear       NaN     23.0    7.0       9.0    3.0      1.0      3.0   4.0  20.0

             ^  anger  disgust  fear  happy  neutral   sad  surprise  unknown
$          NaN    5.0      1.0   NaN   18.0     13.0   2.0       3.0      9.0
anger      NaN   20.0      2.0   5.0   15.0     36.0   5.0     

In [5]:
emissions = pd.DataFrame(lm_record)
emissions = emissions.reindex(sorted(emissions.columns), axis=1).sort_index()
emissions = emissions.div(emissions.sum(axis=1), axis=0)
d = pd.Series(name="fear")
emissions = emissions.append(d).fillna(0)
emissions

  d = pd.Series(name="fear")


Unnamed: 0,anger,disgust,fear,happy,neutral,sad,surprise,unknown
anger,0.841279,0.0,0.0,0.0,0.088663,0.067442,0.0,0.002616
disgust,0.0,0.839506,0.0,0.0,0.131687,0.0,0.0,0.028807
happy,0.046473,0.029977,0.048088,0.23112,0.278846,0.255609,0.098162,0.011725
neutral,0.149868,0.11659,0.017261,0.223614,0.330168,0.077727,0.078358,0.006414
sad,0.0,0.0,0.0,0.0,0.011988,0.972886,0.0,0.015126
surprise,0.02433,0.003645,0.001549,0.060324,0.021597,0.002369,0.886094,9.1e-05
unknown,0.108178,0.0,0.000933,0.616069,0.0467,0.06571,0.139885,0.022525
fear,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
trans = df.drop(INIT, axis=1).drop(END, axis=0)
trans = trans.reindex(sorted(trans.columns), axis=1).sort_index()
trans = trans.div(trans.sum(axis=1), axis=0).fillna(0)

trans

Unnamed: 0,anger,disgust,fear,happy,neutral,sad,surprise,unknown
anger,0.18018,0.018018,0.045045,0.135135,0.324324,0.045045,0.216216,0.036036
disgust,0.042857,0.285714,0.057143,0.142857,0.228571,0.042857,0.185714,0.014286
fear,0.042857,0.042857,0.285714,0.1,0.328571,0.057143,0.128571,0.014286
happy,0.077821,0.027237,0.027237,0.077821,0.40856,0.027237,0.280156,0.07393
neutral,0.104918,0.088525,0.052459,0.347541,0.065574,0.045902,0.252459,0.042623
sad,0.096774,0.016129,0.145161,0.048387,0.241935,0.322581,0.048387,0.080645
surprise,0.09292,0.035398,0.035398,0.30531,0.402655,0.00885,0.088496,0.030973
unknown,0.013889,0.013889,0.013889,0.277778,0.166667,0.111111,0.125,0.277778


In [7]:
red = df.drop(END, axis=0).sort_index()
print(red)
init = red[INIT].fillna(0).tolist()
init = [i / sum(init) for i in init]
init = np.array(init)
init

             ^  anger  disgust  fear  happy  neutral   sad  surprise  unknown
anger      NaN   20.0      2.0   5.0   15.0     36.0   5.0      24.0      4.0
disgust    NaN    3.0     20.0   4.0   10.0     16.0   3.0      13.0      1.0
fear       NaN    3.0      3.0  20.0    7.0     23.0   4.0       9.0      1.0
happy     11.0   20.0      7.0   7.0   20.0    105.0   7.0      72.0     19.0
neutral   26.0   32.0     27.0  16.0  106.0     20.0  14.0      77.0     13.0
sad        3.0    6.0      1.0   9.0    3.0     15.0  20.0       3.0      5.0
surprise   4.0   21.0      8.0   8.0   69.0     91.0   2.0      20.0      7.0
unknown    7.0    1.0      1.0   1.0   20.0     12.0   8.0       9.0     20.0


array([0.        , 0.        , 0.        , 0.21568627, 0.50980392,
       0.05882353, 0.07843137, 0.1372549 ])

In [8]:
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
G = nx.MultiDiGraph()

for n in convs.values():
    G.add_node(n)

In [10]:
print(f'Nodes:\n{G.nodes()}\n')

Nodes:
['neutral', 'anger', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'unknown']



In [11]:
for key, value in c_records.items():
    start, end = key
    G.add_edge(start, end, weight=value)

print(f'Edges:')
print(G.edges(data=True))  

Edges:
[('neutral', 'neutral', {'weight': 19.99999999999995}), ('neutral', 'happy', {'weight': 105}), ('neutral', 'anger', {'weight': 36}), ('neutral', 'disgust', {'weight': 16}), ('neutral', '$', {'weight': 13}), ('neutral', 'unknown', {'weight': 12}), ('neutral', 'surprise', {'weight': 91}), ('neutral', 'fear', {'weight': 23}), ('neutral', 'sad', {'weight': 15}), ('anger', 'anger', {'weight': 19.99999999999995}), ('anger', '$', {'weight': 5}), ('anger', 'happy', {'weight': 20}), ('anger', 'neutral', {'weight': 32}), ('anger', 'disgust', {'weight': 3}), ('anger', 'fear', {'weight': 3}), ('anger', 'surprise', {'weight': 21}), ('anger', 'sad', {'weight': 6}), ('anger', 'unknown', {'weight': 1}), ('disgust', 'disgust', {'weight': 19.99999999999995}), ('disgust', '$', {'weight': 1}), ('disgust', 'fear', {'weight': 3}), ('disgust', 'neutral', {'weight': 27}), ('disgust', 'surprise', {'weight': 8}), ('disgust', 'happy', {'weight': 7}), ('disgust', 'anger', {'weight': 2}), ('disgust', 'unkno

In [36]:
def Viterbi(pi, a, b, obs):
    pi = np.array(pi)
    a = np.array(a)
    b = np.array(b)
    obs = np.array(obs)
    
    nStates = np.shape(b)[0]
    T = np.shape(obs)[0]

    path = np.zeros(T)
    delta = np.zeros((nStates,T))
    phi = np.zeros((nStates,T))

    delta[:,0] = pi * b[:,obs[0]]
    phi[:,0] = 0

    for t in range(1,T):
        for s in range(nStates):
            delta[s,t] = np.max( delta[:,t-1] * a[:,s] ) * b[s,obs[t]]
            phi[s,t] = np.argmax(delta[:,t-1] * a[:,s])

    path[T-1] = np.argmax(delta[:,T-1])
    for t in range(T-2,-1,-1):
        path[t] = phi[int(path[t+1]),t+1]

    return path,delta, phi

In [37]:
obs = np.array([0, 1, 3, 4, 5])

In [38]:
path, delta, phi = Viterbi(init, trans, emissions, obs)
print('\nsingle best state path: \n', path)
print('delta:\n', delta)
print('phi:\n', phi)


single best state path: 
 [3. 1. 6. 3. 4.]
delta:
 [[0.00000000e+00 0.00000000e+00 0.00000000e+00 6.96704201e-07
  4.47402692e-08]
 [0.00000000e+00 7.39131315e-04 0.00000000e+00 3.94204365e-07
  0.00000000e+00]
 [0.00000000e+00 2.63925442e-05 9.76161758e-06 8.34721820e-07
  6.09607595e-08]
 [3.23245727e-02 3.02017287e-04 2.36113982e-05 8.52456406e-06
  5.15632827e-08]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 4.08205837e-07
  3.38836592e-06]
 [1.43119030e-03 3.20917015e-06 1.91089952e-06 1.61622241e-08
  5.50104867e-10]
 [8.48454159e-03 0.00000000e+00 8.45660756e-05 3.49490961e-07
  1.56929377e-07]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]]
phi:
 [[0. 3. 1. 6. 3.]
 [0. 3. 1. 6. 3.]
 [0. 3. 1. 6. 2.]
 [0. 6. 1. 6. 3.]
 [0. 3. 1. 6. 3.]
 [0. 3. 1. 6. 3.]
 [0. 3. 1. 6. 3.]
 [0. 3. 3. 6. 3.]]
