In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import math
import pandas as pd
import numpy as np
from matplotlib.lines import Line2D
import pdb


def read_csv_files(directory):
    csv_files = []
    print(f"Checking files in directory: {directory}")
    for filename in os.listdir(directory):
        print(f"Found file: {filename}")
        if filename.startswith("sub") and filename.endswith(".csv"):
            print(f"Reading CSV file: {filename}")
            file_path = os.path.join(directory, filename)
            df = pd.read_csv(file_path)
            csv_files.append(df)
        else:
            print(f"Skipping file: {filename}")
    return csv_files

# Set the directory path
directory = 'data'

# Call the function and get the list of dataframes
dataframes = read_csv_files(directory)

# Display the first few rows of each DataFrame
for i, df in enumerate(dataframes):
    print(f"First few rows of DataFrame {i+1}:")
    print(df.head())
    print("\n")


Checking files in directory: data
Found file: Table_CrypticCreaturesShiftRelative_controls_YaleCohort.csv
Skipping file: Table_CrypticCreaturesShiftRelative_controls_YaleCohort.csv
Found file: .DS_Store
Skipping file: .DS_Store
Found file: CrypticCreaturesBayesianLearner_relativeID_controls.csv
Skipping file: CrypticCreaturesBayesianLearner_relativeID_controls.csv
Found file: avg_confidence_deviation_plot.png
Skipping file: avg_confidence_deviation_plot.png
Found file: CrypticCreaturesBayesianLearner_relativeShift_OCD.csv
Skipping file: CrypticCreaturesBayesianLearner_relativeShift_OCD.csv
Found file: Table_CrypticCreaturesShiftRelative_YaleCohort.csv
Skipping file: Table_CrypticCreaturesShiftRelative_YaleCohort.csv
Found file: test.csv
Skipping file: test.csv
Found file: Table_CrypticCreatures_patients_YaleCohort.csv
Skipping file: Table_CrypticCreatures_patients_YaleCohort.csv
Found file: CrypticCreaturesBayesianLearner_relativeED.csv
Skipping file: CrypticCreaturesBayesianLearner_re

In [2]:
# Set directory etc
os.chdir("data")

# Load data
CrypticCreatures = pd.read_csv("Table_CrypticCreatures_YaleCohort.csv")


In [3]:
# 1. Function to dynamically initialize a matrix based on the task ID and characteristics
def initialize_matrix(task_id):
    if task_id == 1:
        num_dimensions = 2
        variants_per_dimension = 2
    elif task_id == 2:
        # Tasks 1 and 2 have 4 dimensions, each with 2 variations
        num_dimensions = 4
        variants_per_dimension = 2
    elif task_id == 3:
        # Task 3 has 4 dimensions, each with 3 variations
        num_dimensions = 4
        variants_per_dimension = 3
    else:
        raise ValueError("Unsupported task ID")

    # Calculate the size of the matrix
    matrix_size = variants_per_dimension ** num_dimensions

    # Initialize the matrix with equal probabilities
    matrix = np.full((matrix_size,), 1 / matrix_size)

    # Reshape the matrix into a hypercube based on the number of dimensions
    matrix = matrix.reshape([variants_per_dimension] * num_dimensions)

    return matrix

# 2. Merged function to identify differing dimensions and create the feature matrix
def identify_and_create_feature_matrix(row):
    differing_dimensions = []
    dimensions = ['color', 'hair', 'eyes', 'bumpiness']  # Assuming these are the dimensions
    feature_matrix = {}

    for dim in dimensions:
        stim1_feature = row[f'stim1_{dim}']
        stim2_feature = row[f'stim2_{dim}']

        if stim1_feature != stim2_feature:
            differing_dimensions.append(dim)
            # Sort the features so that the higher value is always in the first row
            sorted_features = sorted([stim1_feature, stim2_feature], reverse=True)
            feature_matrix[dim] = sorted_features

    feature_matrix_df = pd.DataFrame(feature_matrix)
    return differing_dimensions, feature_matrix_df

# 3. Function to map chosen stimulus' features onto the initialised feature matrix
def map_features_to_standardized_matrix(stim_features, feature_matrix):
    mapped_cells = []
    for dim in feature_matrix.columns:
        stim_value = stim_features[dim]
        row_index = None
        col_index = None

        # Iterate over the feature matrix to find where the stim_value matches
        for i in range(len(feature_matrix)):
            if feature_matrix.at[i, dim] == stim_value:
                # Find the row (i) and column (j) where the feature matches in the matrix
                row_index = i
                col_index = feature_matrix.columns.get_loc(dim)
                break

        if row_index is not None and col_index is not None:
            # Assuming each dimension and feature has a corresponding mapped cell
            mapped_cells.append((row_index, col_index))
        else:
            print(f"Warning: Could not find matching row/column for feature {stim_value} in dimension {dim}.")

    # Create a matrix based on the mapped_cells
    matrix_size = len(feature_matrix)  # Assuming a square matrix based on the number of features
    mapped_matrix = np.zeros((matrix_size, matrix_size))

    for (i, j) in mapped_cells:
        mapped_matrix[i, j] = 1

    # Return both mapped_cells and the newly created matrix
    return mapped_cells, mapped_matrix

# 4. Function to calculate entropy of the probability matrix
def calculate_entropy(matrix):
    """
    Calculate the entropy of a given probability matrix.
    
    Entropy is a measure of uncertainty or randomness in the probability distribution.
    A high entropy value indicates a more uniform distribution (high uncertainty), 
    while a low entropy value indicates a more concentrated distribution (low uncertainty).

    Parameters:
    - matrix: A 2D numpy array representing the probability matrix.

    Returns:
    - entropy: The calculated entropy value for the matrix.
    """
    # Flatten the matrix to a 1D array of probabilities
    probabilities = matrix.flatten()
    # Filter out zero probabilities to avoid log(0) which is undefined
    probabilities = probabilities[probabilities > 0]
    # Calculate entropy using the formula: H = -sum(p * log2(p))
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy


In [4]:
def update_matrix(matrix, chosen_matrix, feedback):
    """
    Update the probability matrix based on feedback regarding whether the chosen stimulus entailed the correct feature.
    
    This function adjusts the probability distribution within the matrix by setting certain cells to zero based on feedback, 
    while ensuring that cells already set to zero remain unaffected. After updating, the matrix is normalized so that 
    the sum of all probabilities remains equal to 1.

    Matrix Structure:
    The matrix represents combinations of two dimensions, each with two possible features:
    
          D2F1  | D2F2
        ----------------
    D1F1 |  [0, 0]
    D1F2 |  [0, 0]
    
    Parameters:
    - matrix: 2x2 numpy array representing the current probability matrix.
    - chosen_matrix: A 2x2 numpy array indicating the chosen feature combinations with 1s, and non-chosen combinations with 0s.
    - feedback: Boolean indicating whether the chosen feature combination was correct (True) or incorrect (False).
    
    Process:
    1. The function creates a mask to identify non-zero cells in the current matrix.
    2. If feedback is positive (True):
       - The function sets to zero the cells where `chosen_matrix` has a 1 and the corresponding cell in `matrix` is non-zero.
    3. If feedback is negative (False):
       - The function sets to zero the cells where `chosen_matrix` has a 0 and the corresponding cell in `matrix` is non-zero.
    4. The function then normalizes the matrix so that the sum of all elements equals 1.
    
    Returns:
    - The updated and normalized probability matrix.
    """
 
   # Create a mask for non-zero values in the matrix
    non_zero_mask = matrix != 0
    
    #pdb.set_trace()
    # Update the matrix based on the feedback condition and non-zero values
    if feedback:
        # Set cells to zero where chosen_matrix has a 0 and matrix is non-zero
        matrix[(chosen_matrix == 0) & non_zero_mask] = 0
    else:
        # Set cells to zero where chosen_matrix has a 1 and matrix is non-zero
        matrix[(chosen_matrix == 1) & non_zero_mask] = 0

    # Normalize the matrix so that the sum of all elements equals 1
    matrix_sum = np.sum(matrix)
    if matrix_sum > 0:
        matrix = matrix / matrix_sum
    
    return matrix


In [5]:
#use it

In [6]:
def process_first_participant(CrypticCreatures, id_tested, task_id_tested):
    participant_data = CrypticCreatures[(CrypticCreatures['id'] == id_tested) & (CrypticCreatures['task_id'] == task_id_tested)]
    first_trial_row = participant_data.iloc[0]
    
    prob_matrix = initialize_matrix(task_id_tested)

    for index, row in participant_data.iterrows():
        # Initialise shift specific variables if it's the first trial or a rule change
        if row['trial'] == 1:
            differing_dimensions, feature_matrix = identify_and_create_feature_matrix(first_trial_row)
            
        if row['ruleChange'] == 1:
                pdb.set_trace()
                shift_matrix = feature_matrix.copy()
                print(f"Shift detected at Trial {row['trial']}. Storing matrix:")
                print(shift_matrix)
                feature_matrix = pd.DataFrame()
                differing_dimensions, feature_matrix = identify_and_create_feature_matrix(first_trial_row)
            
        # The rest of the code continues here
        chosen_stimulus = int(row['response'])
        stim_features = {dim: row[f'stim{chosen_stimulus}_{dim}'] for dim in differing_dimensions}

        print(f"\nProcessing Trial {row['trial']} (Shift: {row['ruleChange'] == 1})")
        print(f"Feature Matrix: {feature_matrix}")
        print(f"Chosen Stimulus: {chosen_stimulus}")
        print(f"Feedback: {row['chosen_outcome']}")
        print(f"Stimulus Features: {stim_features}")
        print(f"Differing Dimensions: {differing_dimensions}")
        
        # Generate mapped cells in the required format
        mapped_cells, mapped_matrix = map_features_to_standardized_matrix(stim_features, feature_matrix)
        print(f"Mapped Matrix: {mapped_matrix}")

        # Convert feedback to boolean
        feedback = bool(row['chosen_outcome'])
        
        

        # Call update_matrix with correctly formatted inputs
        prob_matrix = update_matrix(prob_matrix, mapped_matrix, feedback)
        entropy = calculate_entropy(prob_matrix)
        
        print(f"Updated Probability Matrix after Trial {row['trial']}:")
        print(f"Prob Matrix:{prob_matrix}")
        print(f"Entropy: {entropy}")
        print("-" * 30)


In [7]:
process_first_participant(CrypticCreatures, id_tested=4, task_id_tested=1)


KeyError: 'stim1_color'

In [None]:
print(CrypticCreatures['id'].unique())

In [None]:
#simulations and tests

In [None]:
# Function to loop through the first participant's trials and understand their structure
def process_first_participant_test(CrypticCreatures, id_tested, task_id_tested):
    # Filter the data for the first participant and task 1
    participant_data = CrypticCreatures[(CrypticCreatures['id'] == id_tested) & (CrypticCreatures['task_id'] == task_id_tested)]
    
    previous_trial_features = None
    
    for index, row in participant_data.iterrows():
        trial_number = row['trial']
        correctness = row['chosen_outcome']  # Assuming 1 for correct, 0 for incorrect
        chosen_stimulus = row['response']
        
        # Extract the features of stimuli 1 and 2
        stim1_features = {
            'color': row['stim1_color'],
            'hair': row['stim1_hair'],
            'eyes': row['stim1_eyes'],
            'bumpiness': row['stim1_bumpiness']
        }
        
        stim2_features = {
            'color': row['stim2_color'],
            'hair': row['stim2_hair'],
            'eyes': row['stim2_eyes'],
            'bumpiness': row['stim2_bumpiness']
        }
        
        # Filter only the dimensions where there is a difference
        differing_dimensions = identify_differing_dimensions(row)
        feature_matrix = create_feature_matrix(stim1_features, stim2_features, differing_dimensions)
        print(f"\nTrial {trial_number}")
        print(f"Correctness: {correctness}")
        print(f"Chosen Stimulus: {chosen_stimulus}")
        print(f"Stimulus 1: {stim1_features}")
        print(f"Stimulus 2: {stim2_features}")
        print("Dimension Differences Matrix:")
        print(feature_matrix)
        
        # Compare the feature values with the previous trial
        if previous_trial_features is not None:
            same_features = feature_matrix.equals(previous_trial_features)
            print(f"Feature values remained the same as the previous trial: {same_features}")
        else:
            print("No previous trial to compare.")
        
        # Update previous trial features
        previous_trial_features = feature_matrix.copy()
        
        print("-" * 30)

# Assuming CrypticCreatures is your DataFrame
# Replace this with the actual DataFrame loading code
# CrypticCreatures = pd.read_csv('your_data_file.csv')
# Process the first participant
id_tested = 2
task_id_tested = 1
process_first_participant_test(CrypticCreatures,id_tested,task_id_tested)


In [None]:
#stimulate/check function 
trials = [  # Trial 1: choose cells (0, 1) and (1, 0), feedback is negative
    ([(0, 1), (0, 1)], False,
    [(1, 0), (0, 1)], True) # Trial 3: choose cells (0, 1) and (1, 0), feedback is positive
]
prob_matrix = initialize_matrix(1)
# Iterate through each trial and update the matrix
for i, (chosen_cells, feedback) in enumerate(trials):
    print(f"Trial {i+1}:")
    print("Before update:")
    print(prob_matrix)
    
    # Update the matrix based on the feedback for this trial
    prob_matrix = update_matrix(prob_matrix, chosen_cells, feedback)
    
    print("After update:")
    print(prob_matrix)
    print("-" * 30)

# Final matrix after all trials
print("Final matrix after all trials:")
print(prob_matrix)


In [None]:
#backup code