In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import ipywidgets as widgets
import matplotlib.animation as animation
import matplotlib.gridspec as gridspec
from pathlib import Path

from IPython.display import display
from HMM_target_detector_20251204_UPDATED_with_target_change import HMM_target_detector

In [2]:
ANGLE_RANGE = 2*np.pi
CODE_COLOR_MAP = {0 : 'yellow',
                  1 : 'blue',
                  2 : 'purple'}

In [3]:
prob_code0_in_states = np.array([1, 0, 1, 1])
prob_code1_in_states = np.array([0, 1, 0, 0])
EMISSION_MATRIX_TARGET1 = np.vstack([prob_code0_in_states, prob_code1_in_states])

prob_code0_in_states = np.array([1, 1, 1, 1])
prob_code1_in_states = np.array([0, 0, 0, 0])
EMISSION_MATRIX_TARGET2 = np.vstack([prob_code0_in_states, prob_code1_in_states])

EMISSION_MATRIX_TARGETS = np.stack([EMISSION_MATRIX_TARGET1, EMISSION_MATRIX_TARGET2])
EMISSION_MATRIX_TARGETS, EMISSION_MATRIX_TARGETS.shape

(array([[[1, 0, 1, 1],
         [0, 1, 0, 0]],
 
        [[1, 1, 1, 1],
         [0, 0, 0, 0]]]),
 (2, 2, 4))

In [4]:
agent = HMM_target_detector(emission_matrix_targets=EMISSION_MATRIX_TARGETS)

In [5]:
initial_angle_slider = widgets.IntSlider(
    value=0,  
    min=0,
    max=360,
    description="Initial Angle",
    style={'description_width': 'initial'},
    continuous_update=True,
    layout=widgets.Layout(width="600px"),
)

interactive_plot = widgets.interactive(
    agent.observe_angle_state_pair, initial_angle=initial_angle_slider)
display(interactive_plot)

interactive(children=(IntSlider(value=0, description='Initial Angle', layout=Layout(width='600px'), max=360, s…

In [78]:
save_folder = Path(f'../test_trials__detection_model_20251205_115300_four_states')
save_folder.mkdir(parents=True, exist_ok=True)

In [43]:
def get_deterministic_target_transition_matrix(forecast_t_candidate_step):
    if forecast_t_candidate_step == 'switch':
        target_transition_matrix = np.array([[0, 1], 
                                             [1, 0]])
    else:
        target_transition_matrix = np.array([[1, 0], 
                                             [0, 1]])
    return target_transition_matrix

In [44]:
def debug_forecasting_each_obs_print_statements(agent):
    forecasted_obs_string = f'O{int(agent.within_trial_params["time_ind"]+2)}={agent.within_trial_arrays['forecasted_obs']}'

    if int(agent.within_trial_params["time_ind"]+1)>1:
        up_to_date_obs_sequence_term_name = f'O1:{int(agent.within_trial_params["time_ind"]+1)}'
    else:
        up_to_date_obs_sequence_term_name = f'O1'
    up_to_date_obs_sequence_string = f'{up_to_date_obs_sequence_term_name}={agent.within_trial_arrays['code_received_t'][:int(agent.within_trial_params["time_ind"]+1)]}'

    if int(agent.within_trial_params["time_ind"]+1)>1:
        angle_steps_string = f'Δθ2:{int(agent.within_trial_params["time_ind"]+1)}'
    else:
        angle_steps_string = f'Δθ2'

    print(f'P({forecasted_obs_string},{up_to_date_obs_sequence_string}|{angle_steps_string},T)={agent.within_trial_arrays["likelihood_obs_from_target_forecast_t_fromtargets"]}')
    print(f'P({forecasted_obs_string}|{up_to_date_obs_sequence_string},{angle_steps_string},T)={agent.within_trial_arrays['likelihood_obs_forecast_t_given_received_obs']}')


In [45]:
def compute_forecasted_entropy_from_forecasted_obs(self, state_transition_matrix):
    forecasted_S = np.zeros(self.num_observations)
    prob_obs_at_forecast_t_given_obs_at_cur_t = np.zeros(self.num_observations)
    posterior_forecast_given_received_obs = np.zeros((self.num_observations, self.num_beliefs))
    for obs in range(self.num_observations):
        self.within_trial_arrays['joint_prob_received_obs_state_forecast_t_fromtargets'][self.within_trial_params["time_ind"]] = (self.within_trial_arrays['joint_prob_received_obs_state_t_from_targets'][self.within_trial_params["time_ind"]] @ state_transition_matrix) * self.emission_matrix_targets[:,obs]
        likelihood_obs_from_target_forecast_t_fromtargets = self.within_trial_arrays['joint_prob_received_obs_state_forecast_t_fromtargets'][self.within_trial_params["time_ind"]].sum(axis=1)
        likelihood_obs_forecast_t_given_received_obs = self.update_likelihood_given_observed_sequence(likelihood_obs_from_target_forecast_t_fromtargets, self.within_trial_arrays['likelihood_received_obs_from_targets_t'][self.within_trial_params["time_ind"]])

        self.within_trial_arrays['forecasted_obs'] = obs
        self.within_trial_arrays['likelihood_obs_forecast_t_given_received_obs'] = likelihood_obs_forecast_t_given_received_obs
        self.debug_forecasting_each_obs_print_statements()
        
        numerator = (self.within_trial_arrays['posterior_t'][self.within_trial_params["time_ind"]] * likelihood_obs_forecast_t_given_received_obs)
        prob_obs_at_forecast_t_given_obs_at_cur_t[obs] = numerator.sum()
        posterior_forecast_given_received_obs[obs] = self.bayes_update(numerator)
        forecasted_S[obs] = self.compute_entropy_from_posterior_across_targets(posterior_forecast_given_received_obs[obs])

    return prob_obs_at_forecast_t_given_obs_at_cur_t, forecasted_S

In [46]:
def compute_forecasted_entropy_from_forecasted_obs(agent, state_transition_matrix, target_transition_matrix):
    updated_posteriors_across_targets_and_obs = np.zeros((agent.num_observations, agent.num_beliefs))
    normalized_updated_posteriors_across_targets_and_obs = np.zeros((agent.num_observations, agent.num_beliefs))
    likelihood_obs_forecast_t_given_received_obs = np.zeros((agent.num_observations, agent.num_beliefs))
    likelihood_obs_from_target_forecast_t_fromtargets = np.zeros((agent.num_observations, agent.num_beliefs))
    prob_obs_at_forecast_t_given_obs_at_cur_t = np.zeros(agent.num_observations)
    k = agent.within_trial_params["time_ind"]

    for target_t_destination in range(agent.num_beliefs):
        subterm1 = agent.within_trial_arrays['joint_prob_received_obs_state_t_from_targets'][k] @ state_transition_matrix
        subterm2 = (agent.emission_matrix_targets[target_t_destination,:] @ subterm1.T)
        likelihood_obs_from_target_forecast_t_fromtargets[:,target_t_destination] = subterm2 @ target_transition_matrix[:,target_t_destination]
        if k > 0:
            print(f'P(O{int(k+2)},O1:{int(k)}=received|T2=t{int(target_t_destination)}, action) = {likelihood_obs_from_target_forecast_t_fromtargets[:,target_t_destination]}')
        else:
            print(f'P(O{int(k+2)},O1=received|T2=t{int(target_t_destination)}, action) = {likelihood_obs_from_target_forecast_t_fromtargets[:,target_t_destination]}')
        term2 = agent.within_trial_arrays['likelihood_received_obs_from_targets_t'][k] @ target_transition_matrix
        likelihood_obs_forecast_t_given_received_obs[:,target_t_destination] = agent.update_likelihood_given_observed_sequence(likelihood_obs_from_target_forecast_t_fromtargets[:,target_t_destination], term2)

        updated_belief_after_action = agent.within_trial_arrays['posterior_t'][k] @ target_transition_matrix[:,target_t_destination]
        print(f'P(T2=t{int(target_t_destination)}|O1=received)={updated_belief_after_action}')
        updated_posteriors_across_targets_and_obs[:,target_t_destination] = updated_belief_after_action * likelihood_obs_forecast_t_given_received_obs[:,target_t_destination]

    prob_obs_at_forecast_t_given_obs_at_cur_t = updated_posteriors_across_targets_and_obs.sum(axis=1)
    for target_t_destination in range(agent.num_beliefs):
        normalized_updated_posterior = updated_posteriors_across_targets_and_obs[:,target_t_destination] / prob_obs_at_forecast_t_given_obs_at_cur_t
        normalized_updated_posteriors_across_targets_and_obs[:,target_t_destination] = normalized_updated_posterior

    forecasted_entropy_across_obs = np.zeros(agent.num_observations)
    for obs in range(agent.num_observations):
        agent.within_trial_arrays['forecasted_obs'] = obs
        agent.within_trial_arrays['likelihood_obs_forecast_t_given_received_obs'] = likelihood_obs_forecast_t_given_received_obs[obs]
        agent.within_trial_arrays["likelihood_obs_from_target_forecast_t_fromtargets"] = likelihood_obs_from_target_forecast_t_fromtargets[obs]
        debug_forecasting_each_obs_print_statements(agent)
        forecasted_entropy_across_obs[obs] = (agent.compute_entropy_from_posterior_across_targets(normalized_updated_posteriors_across_targets_and_obs[obs]))
    return prob_obs_at_forecast_t_given_obs_at_cur_t, forecasted_entropy_across_obs

In [47]:
agent = HMM_target_detector(emission_matrix_targets=EMISSION_MATRIX_TARGETS)
agent.move_and_sample_from_object()

agent.update_posterior_and_compute_current_entropy()

agent.debug_within_trial_print_statements()

# agent.candidate_steps = np.array([-agent.state_step, 0, agent.state_step])
# agent.forecast_and_compute_expected_entropy_from_actions()

Current angle: 0.0
Code received: 0.0
[0.375 0.5  ]
P(O1=[0.],S1|T1)=[0.25 0.   0.25 0.25], P(O1=[0.]|T1)=0.75, P(S1|O1=[0.],T1)=[0.33 0.   0.33 0.33], P(T|O1=[0.])=[0.42857143 0.57142857], H1=0.99
P(O1=[0.],S1|T2)=[0.25 0.25 0.25 0.25], P(O1=[0.]|T2)=1.0, P(S1|O1=[0.],T2)=[0.25 0.25 0.25 0.25], P(T|O1=[0.])=[0.42857143 0.57142857], H1=0.99


In [48]:
agent.within_trial_arrays['steps_taken'] 

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [49]:
agent.within_trial_arrays['steps_taken'] = []
agent.within_trial_arrays['steps_taken']

[]

In [50]:
agent.within_trial_arrays['posterior_t'][agent.within_trial_params["time_ind"]]

array([0.42857143, 0.57142857])

In [51]:
agent.candidate_steps

array([['-1.5707963267948966', 'switch'],
       ['0', 'switch'],
       ['1.5707963267948966', 'switch'],
       ['-1.5707963267948966', 'noswitch'],
       ['0', 'noswitch'],
       ['1.5707963267948966', 'noswitch']], dtype='<U32')

In [52]:
for i, forecast_t_candidate_step in enumerate(agent.candidate_steps):
    forecast_t_candidate_step = agent.candidate_steps[0]
    forecast_t_candidate_step_states = (forecast_t_candidate_step[0]).astype(float)
    forecast_t_candidate_step_targets = (forecast_t_candidate_step[1]).astype(str)
    state_transition_matrix = (agent.get_deterministic_state_transition_matrix(forecast_t_candidate_step_states))
    target_transition_matrix = (get_deterministic_target_transition_matrix(forecast_t_candidate_step_targets))

    prob_obs_at_forecast_t_given_obs_at_cur_t, forecasted_S = compute_forecasted_entropy_from_forecasted_obs(agent, state_transition_matrix, target_transition_matrix)
    expected_forecast_t_S = (forecasted_S @ prob_obs_at_forecast_t_given_obs_at_cur_t)
    agent.print_debug_forecasted_entropy_statements(prob_obs_at_forecast_t_given_obs_at_cur_t, forecasted_S)
    agent.within_trial_arrays['expected_forecast_t_S_per_step'][agent.within_trial_params["time_ind"], i] = expected_forecast_t_S

P(O2,O1=received|T2=t0, action) = [0.75 0.25]
P(T2=t0|O1=received)=0.5714285714285714
P(O2,O1=received|T2=t1, action) = [0.75 0.  ]
P(T2=t1|O1=received)=0.42857142857142855
P(O2=0,O1=[0.]|Δθ2,T)=[0.75 0.75]
P(O2=0|O1=[0.],Δθ2,T)=[0.75 0.75]
P(O2=1,O1=[0.]|Δθ2,T)=[0.25 0.  ]
P(O2=1|O1=[0.],Δθ2,T)=[0.33333333 0.        ]
S2 = 0.750x0.985 + 0.190x0.000 = 0.7389211020256886
P(O2,O1=received|T2=t0, action) = [0.75 0.25]
P(T2=t0|O1=received)=0.5714285714285714
P(O2,O1=received|T2=t1, action) = [0.75 0.  ]
P(T2=t1|O1=received)=0.42857142857142855
P(O2=0,O1=[0.]|Δθ2,T)=[0.75 0.75]
P(O2=0|O1=[0.],Δθ2,T)=[0.75 0.75]
P(O2=1,O1=[0.]|Δθ2,T)=[0.25 0.  ]
P(O2=1|O1=[0.],Δθ2,T)=[0.33333333 0.        ]
S2 = 0.750x0.985 + 0.190x0.000 = 0.7389211020256886
P(O2,O1=received|T2=t0, action) = [0.75 0.25]
P(T2=t0|O1=received)=0.5714285714285714
P(O2,O1=received|T2=t1, action) = [0.75 0.  ]
P(T2=t1|O1=received)=0.42857142857142855
P(O2=0,O1=[0.]|Δθ2,T)=[0.75 0.75]
P(O2=0|O1=[0.],Δθ2,T)=[0.75 0.75]
P(O2=1,O1=[0.

In [53]:
agent.within_trial_arrays['expected_forecast_t_S_per_step'][agent.within_trial_params["time_ind"]]

array([0.7389211, 0.7389211, 0.7389211, 0.7389211, 0.7389211, 0.7389211])

In [54]:
agent.compute_expected_entropy_change_from_each_action()

S1=0.9852281360342515
S2=[0.7389211 0.7389211 0.7389211 0.7389211 0.7389211 0.7389211]
ΔS1→2=[0.24630703 0.24630703 0.24630703 0.24630703 0.24630703 0.24630703]


In [55]:
def determine_decision_from_forecasted_entropy_change(self):
    unique, counts = np.unique(self.within_trial_arrays['deltaS_t'][self.within_trial_params["time_ind"]], return_counts=True)
    frequent_cond = counts>1
    unique_frequent_delta_S = unique[frequent_cond]
    if (counts[unique==self.within_trial_arrays['deltaS_t'][self.within_trial_params["time_ind"]].max()]==1).all():
        print('Choosing according to max ΔS')
        self.within_trial_arrays['decision_type_t'][self.within_trial_params["time_ind"]] = 'max ΔS'
        step_to_take = self.candidate_steps[self.within_trial_arrays['deltaS_t'][self.within_trial_params["time_ind"]].argmax()]
    else:
        print(f'DETERMINING DECISION {self.within_trial_arrays["posterior_t"][self.within_trial_params["time_ind"]]}: {(1 in self.within_trial_arrays["posterior_t"][self.within_trial_params["time_ind"]])}')
        if (1 not in self.within_trial_arrays['posterior_t'][self.within_trial_params["time_ind"]]):
            print(f'Choosing randomly between {self.candidate_steps[np.where(self.within_trial_arrays["deltaS_t"][self.within_trial_params["time_ind"]]==unique_frequent_delta_S.max())[0]]}')
            self.within_trial_arrays['decision_type_t'][self.within_trial_params["time_ind"]] = 'random'
            step_to_take = self.candidate_steps[np.random.choice(np.where(self.within_trial_arrays['deltaS_t'][self.within_trial_params["time_ind"]]==unique_frequent_delta_S.max())[0])]
        else:
            print('DECISION REACHED')
            self.within_trial_arrays['decision_type_t'][self.within_trial_params["time_ind"]] = 'end'
            step_to_take = 0

    self.within_trial_arrays['steps_taken'] += [step_to_take]

In [56]:
k = agent.within_trial_params["time_ind"]
unique, counts = np.unique(agent.within_trial_arrays['deltaS_t'][k], return_counts=True)
frequent_cond = counts>1
unique_frequent_delta_S = unique[frequent_cond]
if (counts[unique==agent.within_trial_arrays['deltaS_t'][k].max()]==1).all():
    print('Choosing according to max ΔS')
    agent.within_trial_arrays['decision_type_t'][k] = 'max ΔS'
    step_to_take = agent.candidate_steps[agent.within_trial_arrays['deltaS_t'][k].argmax()]
else:
    print(f'DETERMINING DECISION {agent.within_trial_arrays["posterior_t"][k]}: {(1 in agent.within_trial_arrays["posterior_t"][k])}')
    if (1 not in agent.within_trial_arrays['posterior_t'][k]):
        print(f'Choosing randomly between {agent.candidate_steps[np.where(agent.within_trial_arrays["deltaS_t"][agent.within_trial_params["time_ind"]]==unique_frequent_delta_S.max())[0]]}')
        agent.within_trial_arrays['decision_type_t'][k] = 'random'
        step_to_take = agent.candidate_steps[np.random.choice(np.where(agent.within_trial_arrays['deltaS_t'][k]==unique_frequent_delta_S.max())[0])]
    else:
        print('DECISION REACHED')
        agent.within_trial_arrays['decision_type_t'][k] = 'end'
        step_to_take = 0
step_to_take

DETERMINING DECISION [0.42857143 0.57142857]: False
Choosing randomly between [['-1.5707963267948966' 'switch']
 ['0' 'switch']
 ['1.5707963267948966' 'switch']
 ['-1.5707963267948966' 'noswitch']
 ['0' 'noswitch']
 ['1.5707963267948966' 'noswitch']]


array(['-1.5707963267948966', 'noswitch'], dtype='<U32')

In [57]:
agent.within_trial_arrays['steps_taken'] += [step_to_take]
agent.within_trial_arrays['steps_taken']

[array(['-1.5707963267948966', 'noswitch'], dtype='<U32')]

In [58]:
def forecast_and_compute_expected_entropy_from_actions(agent):
    for i, forecast_t_candidate_step in enumerate(agent.candidate_steps):
        forecast_t_candidate_step_states = (forecast_t_candidate_step[0]).astype(float)
        forecast_t_candidate_step_targets = (forecast_t_candidate_step[1]).astype(str)
        state_transition_matrix = (agent.get_deterministic_state_transition_matrix(forecast_t_candidate_step_states))
        target_transition_matrix = (get_deterministic_target_transition_matrix(forecast_t_candidate_step_targets))

        prob_obs_at_forecast_t_given_obs_at_cur_t, forecasted_S = compute_forecasted_entropy_from_forecasted_obs(agent, state_transition_matrix, target_transition_matrix)
        expected_forecast_t_S = (forecasted_S @ prob_obs_at_forecast_t_given_obs_at_cur_t)
        agent.print_debug_forecasted_entropy_statements(prob_obs_at_forecast_t_given_obs_at_cur_t, forecasted_S)
        agent.within_trial_arrays['expected_forecast_t_S_per_step'][agent.within_trial_params["time_ind"], i] = expected_forecast_t_S

In [73]:
def move_and_sample_from_object(agent):
    k = agent.within_trial_params["time_ind"]
    if k<1:
        agent.within_trial_arrays['angles_visited'][k] = agent.within_trial_params["initial_angle"]
        target_transition_matrix = get_deterministic_target_transition_matrix('noswitch')
    else:
        step_to_take = agent.within_trial_arrays['steps_taken'][int(k-1)]
        state_step_to_take = step_to_take[0].astype(float)
        target_step_to_take = step_to_take[1].astype(str)
        agent.within_trial_arrays['angles_visited'][k] = agent.within_trial_arrays['angles_visited'][k-1] + np.rad2deg(state_step_to_take)
        target_transition_matrix = get_deterministic_target_transition_matrix(target_step_to_take)

    angle_visited_radians = np.radians((agent.within_trial_arrays['angles_visited'][k] % 360))
    state_visited = np.floor((angle_visited_radians) / agent.state_step)
    emission_probs = agent.emission_matrix_true[:,:,int(state_visited)] @ target_transition_matrix
    agent.within_trial_arrays['code_received_t'][k] = np.random.choice(np.arange(agent.num_observations), p=emission_probs[0])

    print(f'Current angle: {agent.within_trial_arrays["angles_visited"][k]}')
    print(f'Code received: {agent.within_trial_arrays["code_received_t"][k]}')

In [74]:
target_transition_matrix = get_deterministic_target_transition_matrix('switch')
target_transition_matrix

array([[0, 1],
       [1, 0]])

In [75]:
agent.emission_matrix_targets[:,:,1] @ target_transition_matrix

array([[1, 0],
       [0, 1]])

In [76]:
agent.emission_matrix_targets[0][:,1]

array([0, 1])

In [79]:
agent = HMM_target_detector(emission_matrix_targets=EMISSION_MATRIX_TARGETS)
agent.within_trial_arrays['steps_taken'] = []
agent.emission_matrix_true = agent.emission_matrix_targets
if np.isclose(agent.within_trial_arrays['posterior_t'][agent.within_trial_params["time_ind"]-1], 1, atol=1e-2).any() or agent.within_trial_params['stopping_num']>0:
    agent.within_trial_params['stopping_num']+=1

move_and_sample_from_object(agent)

agent.update_posterior_and_compute_current_entropy()

agent.debug_within_trial_print_statements()

forecast_and_compute_expected_entropy_from_actions(agent)

agent.compute_expected_entropy_change_from_each_action()

determine_decision_from_forecasted_entropy_change(agent)
agent.within_trial_params["time_ind"]+=1

Current angle: 0.0
Code received: 0.0
[0.375 0.5  ]
P(O1=[0.],S1|T1)=[0.25 0.   0.25 0.25], P(O1=[0.]|T1)=0.75, P(S1|O1=[0.],T1)=[0.33 0.   0.33 0.33], P(T|O1=[0.])=[0.42857143 0.57142857], H1=0.99
P(O1=[0.],S1|T2)=[0.25 0.25 0.25 0.25], P(O1=[0.]|T2)=1.0, P(S1|O1=[0.],T2)=[0.25 0.25 0.25 0.25], P(T|O1=[0.])=[0.42857143 0.57142857], H1=0.99
P(O2,O1=received|T2=t0, action) = [0.75 0.25]
P(T2=t0|O1=received)=0.5714285714285714
P(O2,O1=received|T2=t1, action) = [0.75 0.  ]
P(T2=t1|O1=received)=0.42857142857142855
P(O2=0,O1=[0.]|Δθ2,T)=[0.75 0.75]
P(O2=0|O1=[0.],Δθ2,T)=[0.75 0.75]
P(O2=1,O1=[0.]|Δθ2,T)=[0.25 0.  ]
P(O2=1|O1=[0.],Δθ2,T)=[0.33333333 0.        ]
S2 = 0.750x0.985 + 0.190x0.000 = 0.7389211020256886
P(O2,O1=received|T2=t0, action) = [0.75 0.25]
P(T2=t0|O1=received)=0.5714285714285714
P(O2,O1=received|T2=t1, action) = [0.75 0.  ]
P(T2=t1|O1=received)=0.42857142857142855
P(O2=0,O1=[0.]|Δθ2,T)=[0.75 0.75]
P(O2=0|O1=[0.],Δθ2,T)=[0.75 0.75]
P(O2=1,O1=[0.]|Δθ2,T)=[0.25 0.  ]
P(O2=1|O1

  normalized_updated_posterior = updated_posteriors_across_targets_and_obs[:,target_t_destination] / prob_obs_at_forecast_t_given_obs_at_cur_t


In [84]:
def update_posterior_and_compute_current_entropy(agent):
    if agent.within_trial_params["time_ind"]<1:
        agent.within_trial_arrays["likelihood_received_obs_from_targets_t"][agent.within_trial_params["time_ind"]] = agent.prior_state_prob @ agent.emission_matrix_targets[:,int(agent.within_trial_arrays['code_received_t'][agent.within_trial_params["time_ind"]])]
    else:
        step_to_take = agent.within_trial_arrays['steps_taken'][int(agent.within_trial_params["time_ind"]-1)]
        state_step_to_take = step_to_take[0].astype(float)
        target_state_step_to_take = state_step_to_take[1].astype(str)
        state_transition_matrix = agent.get_deterministic_state_transition_matrix(state_step_to_take)
        target_transition_matrix = get_deterministic_target_transition_matrix(target_state_step_to_take)
        prob_of_received_obs_from_states_given_targets = agent.emission_matrix_targets[:,int(agent.within_trial_arrays['code_received_t'][agent.within_trial_params["time_ind"]])]
        joint_prob_of_previous_obs_from_states = agent.within_trial_arrays['joint_prob_received_obs_state_t_from_targets'][agent.within_trial_params["time_ind"]-1]
        agent.within_trial_arrays['likelihood_received_obs_from_targets_t'][agent.within_trial_params["time_ind"]] = (prob_of_received_obs_from_states_given_targets @ (joint_prob_of_previous_obs_from_states @ state_transition_matrix).T) @ target_transition_matrix

    if agent.within_trial_params["time_ind"]<1:
        likelihood_received_obs = agent.within_trial_arrays['likelihood_received_obs_from_targets_t'][agent.within_trial_params["time_ind"]]
        print(agent.prior_target_prob*likelihood_received_obs)
        agent.within_trial_arrays['posterior_t'][agent.within_trial_params["time_ind"]] = agent.bayes_update(agent.prior_target_prob*likelihood_received_obs)
    else:
        likelihood_received_obs_given_observed_sequence = agent.update_likelihood_given_observed_sequence(agent.within_trial_arrays['likelihood_received_obs_from_targets_t'][agent.within_trial_params["time_ind"]], agent.within_trial_arrays['likelihood_received_obs_from_targets_t'][agent.within_trial_params["time_ind"]-1])
        agent.within_trial_arrays['posterior_t'][agent.within_trial_params["time_ind"]] = agent.bayes_update(agent.within_trial_arrays['posterior_t'][agent.within_trial_params["time_ind"]-1]*likelihood_received_obs_given_observed_sequence)

    agent.within_trial_arrays['current_entropyS_t'][agent.within_trial_params["time_ind"]] = agent.compute_entropy_from_posterior_across_targets(agent.within_trial_arrays['posterior_t'][agent.within_trial_params["time_ind"]])


In [None]:
across_trial_params = dict()
across_trial_params['trial_num'] = 0
across_trial_params['max_iter'] = 100
across_trial_params['num_trials'] = 10

across_trial_arrays = dict()
across_trial_arrays['posterior_across_trials'] = np.zeros((across_trial_params['num_trials'], across_trial_params['max_iter']+1, agent.num_beliefs))
across_trial_arrays['expected_forecast_t_across_trials'] = np.zeros((across_trial_params['num_trials'], across_trial_params['max_iter'], agent.candidate_steps.shape[0]))
across_trial_arrays['decision_type_across_trials'] = np.empty((across_trial_params['num_trials'], across_trial_params['max_iter']), dtype=np.dtypes.StringDType())
across_trial_arrays['angles_visited_across_trials'] = np.zeros((across_trial_params['num_trials'], across_trial_params['max_iter']))
across_trial_arrays['steps_taken_across_trials'] = np.zeros((across_trial_params['num_trials'], across_trial_params['max_iter']))
across_trial_arrays['entropy_across_trials'] = np.zeros((across_trial_params['num_trials'], across_trial_params['max_iter']))
across_trial_arrays['time_taken_per_trial'] = np.zeros(across_trial_params['num_trials'])
across_trial_arrays['codes_received_across_trials'] = np.zeros((across_trial_params['num_trials'], across_trial_params['max_iter']))

emission_matrix_true = EMISSION_MATRIX_TARGETS[0]

while across_trial_params['trial_num'] < across_trial_params['num_trials']:

    agent = HMM_target_detector(emission_matrix_targets=EMISSION_MATRIX_TARGETS)
    agent.within_trial_arrays['steps_taken'] = []
    agent.emission_matrix_true = agent.emission_matrix_targets
    while agent.within_trial_params["time_ind"]<agent.within_trial_params['max_iter'] and agent.within_trial_params['stopping_num'] < 2:
        if np.isclose(agent.within_trial_arrays['posterior_t'][agent.within_trial_params["time_ind"]-1], 1, atol=1e-2).any() or agent.within_trial_params['stopping_num']>0:
            agent.within_trial_params['stopping_num']+=1

        move_and_sample_from_object(agent)

        update_posterior_and_compute_current_entropy(agent)

        agent.debug_within_trial_print_statements()

        forecast_and_compute_expected_entropy_from_actions(agent)

        agent.compute_expected_entropy_change_from_each_action()
        
        determine_decision_from_forecasted_entropy_change(agent)
        agent.within_trial_params["time_ind"]+=1
    
    summary_df = agent.get_summary_of_trial()
    summary_df.to_csv(f'{save_folder}/trial_{across_trial_params["trial_num"]}.csv')

    across_trial_arrays['posterior_across_trials'][across_trial_params['trial_num'],:agent.within_trial_params["time_ind"]+1,:] = np.vstack([agent.prior_target_prob, agent.within_trial_arrays['posterior_t'][:agent.within_trial_params["time_ind"]]])
    across_trial_arrays['entropy_across_trials'][across_trial_params['trial_num'], :agent.within_trial_params["time_ind"]] = agent.within_trial_arrays['current_entropyS_t'][:agent.within_trial_params["time_ind"]]
    across_trial_arrays['expected_forecast_t_across_trials'][across_trial_params['trial_num'], :agent.within_trial_params["time_ind"],:] = agent.within_trial_arrays['expected_forecast_t_S_per_step'][:agent.within_trial_params["time_ind"]]
    across_trial_arrays['steps_taken_across_trials'][across_trial_params['trial_num'],          :] = agent.within_trial_arrays['steps_taken']
    across_trial_arrays['angles_visited_across_trials'][across_trial_params['trial_num'],       :] = agent.within_trial_arrays['angles_visited']
    across_trial_arrays['codes_received_across_trials'][across_trial_params['trial_num'],       :] = agent.within_trial_arrays['code_received_t']
    across_trial_arrays['time_taken_per_trial'][across_trial_params['trial_num']] = agent.within_trial_params["time_ind"]
    across_trial_arrays['decision_type_across_trials'][across_trial_params['trial_num']] = agent.within_trial_arrays["decision_type_t"]
    across_trial_params['trial_num']+=1

Current angle: 0.0
Code received: 0.0
[0.375 0.5  ]
P(O1=[0.],S1|T1)=[0.25 0.   0.25 0.25], P(O1=[0.]|T1)=0.75, P(S1|O1=[0.],T1)=[0.33 0.   0.33 0.33], P(T|O1=[0.])=[0.42857143 0.57142857], H1=0.99
P(O1=[0.],S1|T2)=[0.25 0.25 0.25 0.25], P(O1=[0.]|T2)=1.0, P(S1|O1=[0.],T2)=[0.25 0.25 0.25 0.25], P(T|O1=[0.])=[0.42857143 0.57142857], H1=0.99
P(O2,O1=received|T2=t0, action) = [0.75 0.25]
P(T2=t0|O1=received)=0.5714285714285714
P(O2,O1=received|T2=t1, action) = [0.75 0.  ]
P(T2=t1|O1=received)=0.42857142857142855
P(O2=0,O1=[0.]|Δθ2,T)=[0.75 0.75]
P(O2=0|O1=[0.],Δθ2,T)=[0.75 0.75]
P(O2=1,O1=[0.]|Δθ2,T)=[0.25 0.  ]
P(O2=1|O1=[0.],Δθ2,T)=[0.33333333 0.        ]
S2 = 0.750x0.985 + 0.190x0.000 = 0.7389211020256886
P(O2,O1=received|T2=t0, action) = [0.75 0.25]
P(T2=t0|O1=received)=0.5714285714285714
P(O2,O1=received|T2=t1, action) = [0.75 0.  ]
P(T2=t1|O1=received)=0.42857142857142855
P(O2=0,O1=[0.]|Δθ2,T)=[0.75 0.75]
P(O2=0|O1=[0.],Δθ2,T)=[0.75 0.75]
P(O2=1,O1=[0.]|Δθ2,T)=[0.25 0.  ]
P(O2=1|O1

  normalized_updated_posterior = updated_posteriors_across_targets_and_obs[:,target_t_destination] / prob_obs_at_forecast_t_given_obs_at_cur_t


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
test_df = pd.read_csv(f'{save_folder}/trial_0.csv', index_col=0)
test_df

Unnamed: 0,posterior_t_T1,posterior_t_T2,expected_forecast_t_S_per_step_CW,expected_forecast_t_S_per_step_STAY,expected_forecast_t_S_per_step_CCW,angles_visited,steps_taken,current_entropyS_t,code_received_t,decision_type_t,time_taken
0,0.428571,0.571429,0.787111,0.985228,0.787111,0.0,1.570796,0.985228,0.0,random,4.0
1,1.0,0.0,0.0,0.0,0.0,90.0,0.0,0.0,1.0,end,4.0
2,1.0,0.0,0.0,0.0,0.0,90.0,0.0,0.0,1.0,end,4.0
3,1.0,0.0,0.0,0.0,0.0,90.0,0.0,0.0,1.0,end,4.0


In [None]:
num_trials = len(list(save_folder.glob('*.csv')))
trial_selection_limit = min(10, num_trials)
trial_df_cache = {}

def get_summary_df(trial_idx):
    capped_idx = min(trial_idx, num_trials - 1)
    if capped_idx not in trial_df_cache:
        trial_df_cache[capped_idx] = pd.read_csv(f"{save_folder}/trial_{capped_idx}.csv", index_col=0)
    return trial_df_cache[capped_idx]

def update(time_ind, trial_num):
    summary_df_for_trial = get_summary_df(trial_num)
    fig = plt.figure(figsize=(12, 6))
    gs = gridspec.GridSpec(1, 2, width_ratios=[4, 4], wspace=0.3, figure=fig)
    gs_signals = gridspec.GridSpecFromSubplotSpec(
        2, 1, height_ratios=[2,1], subplot_spec=gs[0, 1], hspace=0.3)

    ax_grid = fig.add_subplot(gs[0, 0])
    ax_diffusion = fig.add_subplot(gs_signals[0])
    ax_entropy = fig.add_subplot(gs_signals[1])

    current_angle = (summary_df_for_trial['angles_visited'][time_ind]%360)
    code_received = summary_df_for_trial['code_received_t'][time_ind]
    initial_radians = np.radians(current_angle)

    setup_details = {'title': 'Target', 'grid_extent': 10, 
                        'inner_radius' : 1, 'outer_radius' : 5,
                        'agent_radians' : initial_radians}
    agent.plot_code_dependent_regions(ax_grid, EMISSION_MATRIX_TARGETS[0], setup_details)
    agent.plot_agent_in_env(ax_grid, setup_details, code_received)

    if time_ind>0:
        prev_angle = (summary_df_for_trial['angles_visited'][time_ind-1]%360)
        initial_radians = np.radians(prev_angle)
        ax_grid.scatter(1.2*setup_details['outer_radius']*np.cos(initial_radians), 1.2*setup_details['outer_radius']*np.sin(initial_radians),
                s=200, marker='.', facecolor='r', edgecolor='k', alpha=0.6, zorder=4)
        
    if time_ind>1:
        prev_angle = (summary_df_for_trial['angles_visited'][time_ind-2]%360)
        initial_radians = np.radians(prev_angle)
        ax_grid.scatter(1.4*setup_details['outer_radius']*np.cos(initial_radians), 1.4*setup_details['outer_radius']*np.sin(initial_radians),
                s=200, marker='.', facecolor='r', edgecolor='k', alpha=0.3, zorder=4)

    for i in range(agent.num_beliefs):
        if i == 0:
            class_name = 'target'
        else:
            class_name = 'non-target'
        ax_diffusion.plot(summary_df_for_trial[f'posterior_t_T{int(i+1)}'][:int(time_ind+1)], marker='.', label=f'Class {class_name}')
    ax_diffusion.axhline(y=1.0, linestyle='dashed', color='k')
    ax_diffusion.set_xlim(0, summary_df_for_trial['time_taken'][0]+1)
    ax_diffusion.set_ylim(0.0, 1.05)
    ax_diffusion.set_xticks(np.arange(summary_df_for_trial['time_taken'][0]+2).astype(int))
    ax_diffusion.set_ylabel('Class probability')
    ax_diffusion.set_xlabel('Time index (k)')

    x_vals = np.arange(max(0, time_ind-2), min(summary_df_for_trial.shape[0], time_ind+3)).astype(int)
    ax_entropy.plot(x_vals, summary_df_for_trial['expected_forecast_t_S_per_step_CW'][max(0, time_ind-2):min(summary_df_for_trial.shape[0], time_ind+3)], marker='.')
    ax_entropy.plot(x_vals, summary_df_for_trial['expected_forecast_t_S_per_step_STAY'][max(0, time_ind-2):min(summary_df_for_trial.shape[0], time_ind+3)], marker='.')
    ax_entropy.plot(x_vals, summary_df_for_trial['expected_forecast_t_S_per_step_CCW'][max(0, time_ind-2):min(summary_df_for_trial.shape[0], time_ind+3)], marker='.')
    ax_entropy.axvline(x=time_ind, linestyle='dashed', color='k', label=f"Decision type:{summary_df_for_trial['decision_type_t'][time_ind]}")
    xlabels = np.arange(time_ind-2, min(summary_df_for_trial.shape[0], time_ind+3)).astype(int)
    ax_entropy.set_xticks(xlabels)
    ax_entropy.set_ylabel('Forecasted entropy')
    ax_entropy.legend(loc='lower left')
    ax_entropy.set_xlabel('Time index (k)')


trial_selector = widgets.ToggleButtons(
    options=[(f'Trial {i}', i) for i in range(trial_selection_limit)],
    description='Trial',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

time_ind_slider = widgets.IntSlider(
    value=0, min=0, max=max(0, get_summary_df(0).shape[0]-1), step=1,
    description="Time (k)",
    continuous_update=True,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width="400px")
)


def _sync_slider_with_trial():
    df = get_summary_df(trial_selector.value)
    new_max = max(0, df.shape[0]-1)
    time_ind_slider.max = new_max
    if time_ind_slider.value > new_max:
        time_ind_slider.value = new_max


def _on_trial_change(change):
    if change['name'] == 'value':
        _sync_slider_with_trial()


trial_selector.observe(_on_trial_change, names='value')
_sync_slider_with_trial()

controls = widgets.HBox([trial_selector, time_ind_slider])
interactive_plot = widgets.interactive_output(
    update, {'time_ind': time_ind_slider, 'trial_num': trial_selector})
display(widgets.VBox([controls, interactive_plot]))



VBox(children=(HBox(children=(ToggleButtons(description='Trial', layout=Layout(width='400px'), options=(('Tria…