In [1]:
import pickle
import numpy as np
import cvxpy as cp
import matplotlib.pyplot as plt
import copy

In [2]:
np.random.seed(42)

In [3]:
N_players = 5
players = [f'player{i}' for i in range(1, N_players+1)]
n_components = 3 # num of hidden states
n_features = 3 # num of observed states
O_symbols = [0, 1, 2] # under-, avg-, over- performance
H_symbols = [0, 1, 2] # corresponding mental states
T = 100
learning_iterations = 100

In [4]:
class TeamModel:
    def __init__(self, initial_dist, M, N, R, emission_prob):
        self.initial_dist = initial_dist
        
        # Transition matrices
        self.M = M # dictionary. key = player, value = a row-stochastic matrix, observation-to-state transitions
        self.N = N # dictionary. key = player, value = a row-stochastic matrix, state-to-state transitions
        
        # Graph weights - Tie strengh
        self.R = R # row-stochastic matrix
        
        # Emission matrices
        self.emission_prob = emission_prob # for each player, a row-stochastic matrix
        
        # Store latest hidden and observed states
        self.H = None
        self.O = None
    
        self.t = 0
   
    def check_input_parameters(self):
        pass
    
    def next(self):
        if self.t == 0:
            self.H = {p: np.random.choice(H_symbols, p=self.initial_dist) for p in players}
            self.O = {p: np.random.choice(O_symbols, p=self.emission_prob[p][self.H[p]]) for p in players}
            self.t = 1
            
            return self.H, self.O
            
        H_new = {}
        O_new = {}
        for p in players:
            R_ = self.R[p]
            emission_ = self.emission_prob[p]
            
            # produce next hidden state
            dist = []
            for h in H_symbols: # calculate the probability of each next state h
                v = []
                v.append(self.N[p][self.H[p]][h]) # P(H_t^player = h | H__{t-1}^player = H[player])
                for teammate in players:
                    if (p, teammate) in M:
                        v.append(self.M[(p, teammate)][self.O[teammate]][h]) # P(H_t^player = h | O_{t-1}^teammate = O[teammate])
                    else:
                        v.append(self.M[p][self.O[teammate]][h])
                v = np.array(v)
                
                dist.append(np.dot(R_, v))
            
            h_new = np.random.choice(H_symbols, p=dist)
            
            # produce next observation
            o_new = np.random.choice(O_symbols, p=emission_[h_new])
            
            # Add new values to
            H_new[p] = h_new
            O_new[p] = o_new
            
        # Update hidden and observed states
        self.H = H_new
        self.O = O_new
        
        # Update time
        self.t += 1
        
        return H_new, O_new
    
    def get_data(self, T = 1000):
        # T = number of observations (i.e. number of iterations)
        self.restart()
        data = []
        for _ in range(T):
            H, O = self.next()
            data.append((H, O))
        return data
    
    def restart(self):
        self.H = None
        self.O = None
        self.t = 0

In [5]:
# === M ===
avg_transO = np.array([[0.5, 0.3, 0.2],
                       [0.25, 0.5, 0.25],
                       [0.2, 0.3, 0.5]])

star_transO = np.array([[0.7, 0.3, 0],
                        [0.2, 0.6, 0.2],
                        [0, 0.3, 0.7]])
# === N ===
avg_transH = np.array([[0.7, 0.3, 0.0],
                       [0.2, 0.6, 0.2],
                       [0.0, 0.3, 0.7]])

# === emission_prob ===
avg_emission = np.array([[0.6, 0.3, 0.1],
                       [0.2, 0.6, 0.2],
                       [0.1, 0.3, 0.6]])

# ===  R ===
R_singleH = np.array([1] + [0] * len(players))
def R_singleHO(player):
    i = int(player[-1])
    arr = [0] * (len(players)+1)
    arr[0] = 0.7
    arr[i] = 0.3
    return np.array(arr)

def R_singleO(player):
    i = int(player[-1])
    arr = [0] * (len(players)+1)
    arr[i] = 1
    return np.array(arr)

def R_star(player, star):
    if player == star:
        return R_singleH
    arr = [0] * (len(players)+1)
    i = int(star[-1])
    arr[i] = 1
    return np.array(arr)
    
R_uniform = np.array([1/(1 + len(players))] * (len(players) + 1))

In [6]:
def generate_data(model, T, seed=73):
    np.random.seed(seed)
    data = []
    data = model.get_data(T)

    # Store data
    observations = {player: [] for player in players}
    true_hidden = {player: [] for player in players}
    for (h, o) in data:
        for player in players:
            true_hidden[player].append(h[player])
            observations[player].append(o[player])

    for player in players:
        observations[player] = np.array(observations[player])
        true_hidden[player] = np.array(true_hidden[player])
    return observations, true_hidden

In [7]:
M = {player: star_transO for player in players}
N = {player: avg_transH for player in players}
emission_prob = {player: avg_emission for player in players}
R = {player: R_star(player, 'player1') for player in players}
initial_dist = np.array([0, 1, 0])

model = TeamModel(initial_dist, M, N, R, emission_prob)

In [8]:
# Generate data
Os, Hs = generate_data(model, T)

In [11]:
def init_trans():
    trans_mat = np.zeros((n_components, n_components))
    
    trans_mat[0][0] = np.random.uniform(0.5, 1)
    trans_mat[0][1] = 1 - trans_mat[0][0]
    trans_mat[0][2] = 0.0
    
    trans_mat[1][1] = np.random.uniform(0.5, 1)
    trans_mat[1][0] = (1 - trans_mat[1][1]) / 2
    trans_mat[1][2] = (1 - trans_mat[1][1]) / 2
    
    trans_mat[2][2] = np.random.uniform(0.5, 1)
    trans_mat[2][0] = 0.0
    trans_mat[2][1] = 1 - trans_mat[2][2]
    
    return trans_mat

In [12]:
def cond(player, h1, h2, t, M_, N_, R_): # P(H_t^player = h1 | H_{t-1}^player = h2, O_{t-1})    
    # Requires M_, N_, R_, Os
    v = [N_[h2][h1]]
    for teammate in players:
        v.append(M_[Os[teammate][t-1]][h1])
            
    v = np.array(v)
    return np.dot(R_[player], v)  

In [13]:
def learn_R(M, N, samples, O):
    R = {p: cp.Variable(len(players) + 1, nonneg=True) for p in players}

    objective = 0
    for H in samples:
        for p in players:
            for t in range(1, T):
                h_t = H[p][t] # state of player p at time t
                v_t = [N[H[p][t-1]][h_t]]
                for teammate in players:
                    v_t.append(M[O[teammate][t-1]][h_t])
                v_t = np.array(v_t)
                prod = R[p] @ v_t
                objective -= cp.log(prod)


    constraints = [cp.sum(R[p]) == 1 for p in R]
    prob = cp.Problem(cp.Minimize(objective), constraints)
    prob.solve(solver=cp.MOSEK)

    R_optimized = {p: R[p].value for p in players}

    return R_optimized, prob.value

In [14]:
def learn_M_N(R, samples, O):
    M = cp.Variable((n_features, n_components), nonneg=True)
    N = cp.Variable((n_components, n_components), nonneg=True) 

    objective = 0
    for H in samples:
        for p in players:
            for t in range(1, T):
                h_t = H[p][t] # state of player p at time t
                v_t = [N[H[p][t-1], h_t]]
                for teammate in players:
                    v_t.append(M[O[teammate][t-1], h_t])
                v_t = np.array(v_t)
                prod = R[p] @ v_t
                objective -= cp.log(prod)


    M_constraints = [cp.sum(M[i, :]) == 1 for i in range(n_features)]
    N_constraints = [cp.sum(N[i, :]) == 1 for i in range(n_components)]

    constraints = M_constraints + N_constraints
    prob = cp.Problem(cp.Minimize(objective), constraints)
    prob.solve(solver=cp.MOSEK)

    M_optimized = M.value
    N_optimized = N.value

    return M_optimized, N_optimized, prob.value

In [None]:
def learn_E():
    pass

In [15]:
def calculate_forward(M_, N_, E_, R_):
    alpha = {p: [] for p in players}
    alpha_help = {p: [] for p in players}
    
    # Initialize forward parameters
    for p in players:
        alpha_help[p].append(initial_dist)
    for p in players:
        arr = np.array([E_[p][h][Os[p][0]] * alpha_help[p][0][h] for h in H_symbols])
        arr /= np.sum(arr)
        alpha[p].append(arr)

    # Compute forward parameters (bottom-up)
    for p in players:
        for t in range(1, T):
            arr = [sum([cond(p, h, h_, t, M_, N_, R_) * alpha[p][t-1][h_] for h_ in H_symbols]) for h in H_symbols]
            arr = np.array(arr)
            alpha_help[p].append(arr)
            
            arr = [E_[p][h][Os[p][t]] * alpha_help[p][t][h] for h in H_symbols]
            arr = np.array(arr)
            arr /= np.sum(arr)
            alpha[p].append(arr)
            
    return alpha, alpha_help

In [16]:
def E_step(M_, N_, E_, R_, num_of_samples = 10):
    alpha, alpha_help = calculate_forward(M_, N_, E_, R_)
    
    # Sample hidden states from the posterior distribution
    samples = []
    for _ in range(num_of_samples):
        Hs_ = {p: [1] for p in players}

        for p in players:
            for t in range(1, T):
                dist = np.array([alpha[p][t][h] * cond(p, h, Hs_[p][t-1], t, M_, N_, R_) / alpha_help[p][t][h] if alpha_help[p][t][h] != 0 else 0 for h in H_symbols])
                dist /= np.sum(dist)
                Hs_[p].append(np.random.choice(H_symbols, p=dist))
        for p in players:
            Hs_[p] = np.array(Hs_[p])
        samples.append(copy.deepcopy(Hs_))
    
    return samples

def M_step(samples, M, N, R, iterations=1):
#     M_opt = copy.deepcopy(M)
#     N_opt = copy.deepcopy(N)
    M_opt = M
    N_opt = N
    R_opt = copy.deepcopy(R)
    for _ in range(iterations):
        try:
            # Fix M, N and maximize R
            R_opt, val = learn_R(M_opt, N_opt, samples, Os)
            
            # Fix R and maximize M, N
            M_opt, N_opt, val = learn_M_N(R_opt, samples, Os)
            
                
            print(f'\tObj = {val}')            
        except cp.error.SolverError as e:
            print(e)
            break
            
    return M_opt, N_opt, R_opt, val

def EM(params, iterations = 50, reltol = 1e-3):
    # Initialize parameters
    M_, N_, E_, R_ = params['M'], params['N'], params['E'], params['R']
   
    
    val = 1e+100 # best objective value achieved so far
    M_opt = M_
    N_opt = N_
    E_opt = E_
    R_opt = R_
    for i in range(iterations):
        print(f'=== EM iteration {i+1} ===')
        # E-step
        print(f'E-step...')
        samples = E_step(M_, N_, E_, R_)
        
        # M-step
        print(f'M-step...')
        M_, N_, R_, obj_val = M_step(samples, M_, N_, R_)
        
        if (val - obj_val) / val < reltol:
            break
        if obj_val < val:
            # Store current best parameters
            M_opt = M_
            N_opt = N_
            E_opt = E_
            R_opt = R_
            
        val = min(val, obj_val)
        
    return M_opt, N_opt, R_opt, E_opt, val

In [28]:
def EM_helper(param_list):
    val_opt = 1e+100
    for params in param_list:
        M_, N_, R_, E_, val = EM(params)
        
        if val < val_opt:
            val_opt = val
            M_opt = M_
            N_opt = N_
            E_opt = E_
            R_opt = R_
            
    return M_opt, N_opt, E_opt, R_opt, val_opt

In [17]:
np.random.seed(42)
M_ = init_trans()
N_ = init_trans()
E_ = emission_prob
R_ = {p: R_star(p, 'player1') for p in players}
M_, N_, R_, val = EM(params)

=== EM iteration 1 ===
E-step...
M-step...




	Obj = 2138.213252850518
{'player1': array([1., 0., 0., 0., 0., 0.]), 'player2': array([0., 1., 0., 0., 0., 0.]), 'player3': array([0.        , 0.98936907, 0.        , 0.01063093, 0.        ,
       0.        ]), 'player4': array([0.00000000e+00, 9.99999970e-01, 0.00000000e+00, 0.00000000e+00,
       3.07895019e-08, 0.00000000e+00]), 'player5': array([0., 1., 0., 0., 0., 0.])}
=== EM iteration 2 ===
E-step...
M-step...
	Obj = 2132.821242465801
{'player1': array([0.95993052, 0.        , 0.03778003, 0.        , 0.00228945,
       0.        ]), 'player2': array([0., 1., 0., 0., 0., 0.]), 'player3': array([0.        , 0.98675009, 0.        , 0.01324991, 0.        ,
       0.        ]), 'player4': array([0.        , 0.99088377, 0.        , 0.00911623, 0.        ,
       0.        ]), 'player5': array([0.        , 0.99463005, 0.00536995, 0.        , 0.        ,
       0.        ])}
=== EM iteration 3 ===
E-step...
M-step...
	Obj = 2190.3441993623774


In [18]:
R_

{'player1': array([0.87802794, 0.04997556, 0.04983815, 0.01555951, 0.00659883,
        0.        ]),
 'player2': array([0., 1., 0., 0., 0., 0.]),
 'player3': array([0.        , 0.98281519, 0.        , 0.01718481, 0.        ,
        0.        ]),
 'player4': array([0.        , 0.97907743, 0.        , 0.01214694, 0.00877563,
        0.        ]),
 'player5': array([0.        , 0.99345375, 0.00551681, 0.        , 0.00102944,
        0.        ])}

In [19]:
M_

array([[0.76874358, 0.23125643, 0.        ],
       [0.00762495, 0.97618851, 0.01618654],
       [0.        , 0.13819118, 0.86180882]])

In [20]:
N_

array([[6.93501569e-01, 3.06498430e-01, 0.00000000e+00],
       [2.38238715e-01, 5.33317401e-01, 2.28443879e-01],
       [9.71880021e-10, 4.66905482e-01, 5.33094517e-01]])

In [30]:
def likelihood(params): # Calculate log P(O) (given the model parameters)
    M, N, E, R = params['M'], params['N'], params['E'], params['R']

    # Caluclate forward parameters
    alpha, alpha_help = calculate_forward(M, N, E, R)
    obj = 0
    for p in players:
        for t in range(T):
            obj += -np.log(sum([E[p][h][Os[p][t]] * alpha_help[p][t][h] for h in H_symbols])) # Pr(O_t^i | O_{1:t-1})
    return obj

def likelihood_test(params_list):
    # Given a list of model parameters returns the model parameters that achieve the highest likelihood
    val_opt = 1e+100
    for params in params_list:
        val = likelihood(params)
        if val < val_opt:
            val_opt = val
            params_opt = params
    return params_opt

In [24]:
R_ = {p: R_singleO(p) for p in players}
likelihood(M[p], N[p], emission_prob, R_)

570.4552441987914