In [None]:
import numpy as np
import pdb

def update_matrix_with_RL_and_weight(matrix, chosen_features, feedback, learning_rate, w=0.5):
    """
    Update the probability matrix using a reinforcement learning style update rule with integrated weight (w),
    ensuring that both chosen and unchosen features are updated appropriately.
    
    Parameters:
    - matrix: 2x2 numpy array representing the current probability matrix.
    - chosen_features: Dict indicating the chosen features (dim1 and dim2).
    - feedback: Boolean indicating whether the chosen feature combination was correct (True) or incorrect (False).
    - learning_rate: Float value representing how strongly the learner updates based on feedback.
    - w: Float value (0 <= w <= 1) controlling how the feedback is distributed between the two chosen features.
    
    Returns:
    - Updated probability matrix.
    """
    
    print(f'initial matrix {matrix}')
    # Get the chosen feature indices from both dimensions
    chosen_dim1 = chosen_features['dim1']
    chosen_dim2 = chosen_features['dim2']
    
    # Get the indices for the unchosen features
    unchosen_dim1 = 1 - chosen_dim1  # If chosen_dim1 is 0, unchosen_dim1 is 1 (and vice versa)
    unchosen_dim2 = 1 - chosen_dim2  # If chosen_dim2 is 0, unchosen_dim2 is 1 (and vice versa)
    
    # Convert feedback (True/False) to reward signal (1 or 0)
    rew = 1 if feedback else 0
    
    pdb.set_trace()
    
    # Update chosen features with weight integration
    delta_dim1 = rew - matrix[chosen_dim1, 0]
    delta_dim2 = rew - matrix[chosen_dim2, 1]
    matrix[chosen_dim1, 0] = (matrix[chosen_dim1, 0] + learning_rate * delta_dim1) * w
    matrix[chosen_dim2, 1] = (matrix[chosen_dim2, 1] + learning_rate * delta_dim2) * (1 - w)
    
    # Update unchosen features inversely with weight integration
    delta_unchosen_dim1 = (1 - rew) - matrix[unchosen_dim1, 0]
    delta_unchosen_dim2 = (1 - rew) - matrix[unchosen_dim2, 1]
    matrix[unchosen_dim1, 0] = (matrix[unchosen_dim1, 0] + learning_rate * delta_unchosen_dim1) * w
    matrix[unchosen_dim2, 1] = (matrix[unchosen_dim2, 1] + learning_rate * delta_unchosen_dim2) * (1 - w)
    
    # Ensure probabilities remain within bounds (0 to 1)
    matrix = np.clip(matrix, 0, 1)
    
    return matrix

def simulate_trials_with_fixed_correct_feature(num_trials=10, learning_rate=1, w=0.5):
    """
    Simulate a series of trials where only one feature is correct, and feedback is provided based on the chosen features.
    
    Parameters:
    - num_trials: The number of trials to simulate.
    - learning_rate: Float value representing how strongly the learner updates based on feedback.
    - w: Weight for distributing feedback.
    
    Returns:
    - Final probability matrix after the simulated trials.
    """
    # Initialize probability matrix (each cell starts at 0.25 probability)
    probability_matrix = np.array([[0.25, 0.25],
                                   [0.25, 0.25]])
    
    # Randomly select a correct feature (either from dim1 or dim2)
    correct_feature = np.random.choice(['dim1', 'dim2'])
    correct_dim = np.random.choice([0, 1])  # Correct feature within the chosen dimension
    print(f"Correct Feature: {correct_feature}, Index: {correct_dim}")
    
    # Loop through the trials
    for trial in range(num_trials):
        # Randomly choose features for the trial
        chosen_features = {'dim1': np.random.choice([0, 1]), 'dim2': np.random.choice([0, 1])}
        
        # Provide feedback based on whether the correct feature is chosen
        feedback = (chosen_features[correct_feature] == correct_dim)
        
        # Update the matrix based on feedback and chosen features
        probability_matrix = update_matrix_with_RL_and_weight(probability_matrix, chosen_features, feedback, learning_rate, w)
        
        # Output the trial details
        print(f"Trial {trial + 1} | Chosen Features: {chosen_features} | Feedback: {'Correct' if feedback else 'Incorrect'}")
        print(f"Updated Matrix after Trial {trial + 1}:\n{probability_matrix}\n")
    
    return probability_matrix

# Simulate 10 trials with one correct feature across trials
simulate_trials_with_fixed_correct_feature(10, learning_rate=1, w=0.5)

In [3]:
import numpy as np

def bayesian_update_with_change_points(matrix, chosen_features, feedback, run_length, hazard_rate, learning_rate, w=0.5):
    """
    Bayesian update of the probability matrix with change-point detection for shifts in the relevant dimension or feature.
    
    Parameters:
    - matrix: 2x2 numpy array representing the current probability matrix.
    - chosen_features: Dict indicating the chosen features (dim1 and dim2).
    - feedback: Boolean indicating whether the chosen feature combination was correct (True) or incorrect (False).
    - run_length: Current run-length (trials since the last change-point).
    - hazard_rate: Probability of a change-point occurring.
    - learning_rate: Float value representing how strongly the learner updates based on feedback.
    - w: Float value controlling how feedback is distributed between the two chosen features.
    
    Returns:
    - Updated probability matrix and updated run-length.
    """
    
    chosen_dim1 = chosen_features['dim1']
    chosen_dim2 = chosen_features['dim2']
    
    # Convert feedback (True/False) to reward signal (1 or 0)
    r = 1 if feedback else 0
    
    # Update prior beliefs over dimensions and features (Bayesian update)
    PE_dim1 = r - matrix[chosen_dim1, 0]  # Prediction error for chosen feature in dim1
    PE_dim2 = r - matrix[chosen_dim2, 1]  # Prediction error for chosen feature in dim2
    matrix[chosen_dim1, 0] += learning_rate * PE_dim1 * w
    matrix[chosen_dim2, 1] += learning_rate * PE_dim2 * (1 - w)
    
    # Check if a change-point occurred based on hazard rate
    if np.random.rand() < hazard_rate:
        # Change-point detected: reset the run-length and reinitialize probabilities
        run_length = 0
        matrix = np.array([[0.5, 0.5], [0.5, 0.5]])  # Reset probabilities to uniform
    else:
        run_length += 1  # No change-point, increment run-length
    
    # Normalize the matrix to ensure probabilities sum to 1
    matrix /= matrix.sum()
    
    return matrix, run_length

def simulate_trials_with_change_points(num_trials=10, hazard_rate=0.1, learning_rate=1, w=0.5):
    """
    Simulate a series of trials with Bayesian updates and change-point detection.
    
    Parameters:
    - num_trials: The number of trials to simulate.
    - hazard_rate: Probability of a change-point occurring on each trial.
    - learning_rate: Float value representing how strongly the learner updates based on feedback.
    - w: Weight for distributing feedback.
    
    Returns:
    - Final probability matrix after the simulated trials.
    """
    # Initialize probability matrix
    probability_matrix = np.array([[0.25, 0.25],
                                   [0.25, 0.25]])
    
    run_length = 0  # Initialize run-length (time since last change-point)
    
    correct_feature = np.random.choice(['dim1', 'dim2'])
    correct_dim = np.random.choice([0, 1])  # Correct feature within the chosen dimension
    print(f"Correct Feature: {correct_feature}, Index: {correct_dim}")
    
    # Loop through trials
    for trial in range(num_trials):
        chosen_features = {'dim1': np.random.choice([0, 1]), 'dim2': np.random.choice([0, 1])}
        feedback = (chosen_features[correct_feature] == correct_dim)  # Determine feedback
        
        # Update matrix and run-length based on Bayesian inference and change-point detection
        probability_matrix, run_length = bayesian_update_with_change_points(
            probability_matrix, chosen_features, feedback, run_length, hazard_rate, learning_rate, w
        )
        
        # Output trial details
        print(f"Trial {trial + 1} | Chosen Features: {chosen_features} | Feedback: {'Correct' if feedback else 'Incorrect'}")
        print(f"Updated Matrix after Trial {trial + 1}:\n{probability_matrix}\n")
    
    return probability_matrix

# Simulate 10 trials with a hazard rate of 0.1 (10% chance of a change-point on each trial)
simulate_trials_with_change_points(10, hazard_rate=0.1, learning_rate=1, w=0.5)


Correct Feature: dim2, Index: 0
Trial 1 | Chosen Features: {'dim1': 1, 'dim2': 1} | Feedback: Incorrect
Updated Matrix after Trial 1:
[[0.33333333 0.33333333]
 [0.16666667 0.16666667]]

Trial 2 | Chosen Features: {'dim1': 0, 'dim2': 1} | Feedback: Incorrect
Updated Matrix after Trial 2:
[[0.22222222 0.44444444]
 [0.22222222 0.11111111]]

Trial 3 | Chosen Features: {'dim1': 1, 'dim2': 1} | Feedback: Incorrect
Updated Matrix after Trial 3:
[[0.26666667 0.53333333]
 [0.13333333 0.06666667]]

Trial 4 | Chosen Features: {'dim1': 1, 'dim2': 1} | Feedback: Incorrect
Updated Matrix after Trial 4:
[[0.2962963  0.59259259]
 [0.07407407 0.03703704]]

Trial 5 | Chosen Features: {'dim1': 1, 'dim2': 0} | Feedback: Correct
Updated Matrix after Trial 5:
[[0.17777778 0.47777778]
 [0.32222222 0.02222222]]

Trial 6 | Chosen Features: {'dim1': 1, 'dim2': 1} | Feedback: Incorrect
Updated Matrix after Trial 6:
[[0.2147651  0.57718121]
 [0.19463087 0.01342282]]

Trial 7 | Chosen Features: {'dim1': 0, 'dim2':

array([[0.13333333, 0.53333333],
       [0.26666667, 0.06666667]])

In [None]:
import numpy as np

def bayesian_update_with_pe_change_point(matrix, chosen_features, feedback, run_length, learning_rate, pe_threshold=0.8, w=0.5):
    """
    Bayesian update of the probability matrix with PE-based change-point detection.
    
    Parameters:
    - matrix: 2x2 numpy array representing the current probability matrix.
    - chosen_features: Dict indicating the chosen features (dim1 and dim2).
    - feedback: Boolean indicating whether the chosen feature combination was correct (True) or incorrect (False).
    - run_length: Current run-length (trials since the last change-point).
    - learning_rate: Float value representing how strongly the learner updates based on feedback.
    - pe_threshold: Threshold for PE to detect change-points.
    - w: Float value controlling how feedback is distributed between the two chosen features.
    
    Returns:
    - Updated probability matrix and updated run-length.
    """
    
    chosen_dim1 = chosen_features['dim1']
    chosen_dim2 = chosen_features['dim2']
    
    # Convert feedback (True/False) to reward signal (1 or 0)
    r = 1 if feedback else 0
    
    # Compute prediction errors for the chosen features
    PE_dim1 = r - matrix[chosen_dim1, 0]  # PE for chosen feature in dim1
    PE_dim2 = r - matrix[chosen_dim2, 1]  # PE for chosen feature in dim2
    
    # Update chosen features based on prediction error (Bayesian updating)
    matrix[chosen_dim1, 0] += learning_rate * PE_dim1 * w
    matrix[chosen_dim2, 1] += learning_rate * PE_dim2 * (1 - w)
    
    # Check if the PE is larger than the threshold for change-point detection
    if abs(PE_dim1) > pe_threshold or abs(PE_dim2) > pe_threshold:
        # Change-point detected: reset the run-length and reinitialize probabilities
        run_length = 0
        matrix = np.array([[0.5, 0.5], [0.5, 0.5]])  # Reset probabilities to uniform
        print(f"Change-point detected! Resetting probabilities and run-length.")
    else:
        run_length += 1  # No change-point, increment run-length
    
    # Normalize the matrix to ensure probabilities sum to 1
    matrix /= matrix.sum()
    
    return matrix, run_length

def simulate_trials_with_pe_based_change_point(num_trials=10, learning_rate=1, pe_threshold=0.8, w=0.5):
    """
    Simulate a series of trials with Bayesian updates and PE-based change-point detection.
    
    Parameters:
    - num_trials: The number of trials to simulate.
    - learning_rate: Float value representing how strongly the learner updates based on feedback.
    - pe_threshold: Threshold for PE to trigger change-point detection.
    - w: Weight for distributing feedback.
    
    Returns:
    - Final probability matrix after the simulated trials.
    """
    # Initialize probability matrix
    probability_matrix = np.array([[0.25, 0.25],
                                   [0.25, 0.25]])
    
    run_length = 0  # Initialize run-length (time since last change-point)
    
    correct_feature = np.random.choice(['dim1', 'dim2'])
    correct_dim = np.random.choice([0, 1])  # Correct feature within the chosen dimension
    print(f"Correct Feature: {correct_feature}, Index: {correct_dim}")
    
    # Loop through trials
    for trial in range(num_trials):
        chosen_features = {'dim1': np.random.choice([0, 1]), 'dim2': np.random.choice([0, 1])}
        feedback = (chosen_features[correct_feature] == correct_dim)  # Determine feedback
        
        # Update matrix and run-length based on Bayesian inference and PE-based change-point detection
        probability_matrix, run_length = bayesian_update_with_pe_change_point(
            probability_matrix, chosen_features, feedback, run_length, learning_rate, pe_threshold, w
        )
        
        # Output trial details
        print(f"Trial {trial + 1} | Chosen Features: {chosen_features} | Feedback: {'Correct' if feedback else 'Incorrect'}")
        print(f"Updated Matrix after Trial {trial + 1}:\n{probability_matrix}\n")
    
    return probability_matrix

# Simulate 10 trials with PE-based change-point detection and a threshold of 0.8
simulate_trials_with_pe_based_change_point(10, learning_rate=1, pe_threshold=0.8, w=0.5)
