In [13]:
from utils import load_session_data
from utils import calculate_session_duration, count_rewards

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# mat_path = '/Users/doug.ollerenshaw/code/aind-workbench/data/m699461d20231217/sorted/session/m699461d20231217_sessionData_behav.mat'
mat_path = '/Volumes/scratch/sueSu/KJ005/mKJ005d20230624/sorted/session/mKJ005d20230624_sessionData_behav.mat'
beh_df_mat, licks_L_mat, licks_R_mat = load_session_data(mat_path)

In [14]:
# Test the session duration calculation
session_duration = calculate_session_duration(beh_df_mat)
print(f"Session duration: {session_duration} seconds ({session_duration/60:.2f} minutes)")

Session duration: 3565.125 seconds (59.42 minutes)


In [15]:
# Analyze reward data in detail
print("=== REWARD ANALYSIS ===")
print(f"DataFrame shape: {beh_df_mat.shape}")
print(f"Columns: {list(beh_df_mat.columns)}")

# Check reward columns
if 'rewardL' in beh_df_mat.columns and 'rewardR' in beh_df_mat.columns:
    reward_l_sum = beh_df_mat['rewardL'].sum()
    reward_r_sum = beh_df_mat['rewardR'].sum()
    total_rewards_sum = reward_l_sum + reward_r_sum
    print(f"\nSum of rewardL: {reward_l_sum}")
    print(f"Sum of rewardR: {reward_r_sum}")
    print(f"Total rewards (sum): {total_rewards_sum}")
    
    # Check individual reward values
    print(f"\nUnique rewardL values: {sorted(beh_df_mat['rewardL'].dropna().unique())}")
    print(f"Unique rewardR values: {sorted(beh_df_mat['rewardR'].dropna().unique())}")
    
    if 'rewardTime' in beh_df_mat.columns:
        unique_reward_times = beh_df_mat['rewardTime'].dropna().nunique()
        all_reward_times = beh_df_mat['rewardTime'].dropna()
        print(f"\nNumber of unique rewardTime values: {unique_reward_times}")
        print(f"Total rewardTime entries (non-null): {len(all_reward_times)}")
        print(f"Sample rewardTime values: {all_reward_times.head(10).tolist()}")
        
        # Check if rewardTime has duplicates
        duplicated_times = all_reward_times[all_reward_times.duplicated()]
        if len(duplicated_times) > 0:
            print(f"\nFound {len(duplicated_times)} duplicated rewardTime values!")
            print(f"Example duplicates: {duplicated_times.head(10).tolist()}")
        else:
            print("\nNo duplicated rewardTime values found.")

=== REWARD ANALYSIS ===
DataFrame shape: (489, 10)
Columns: ['trialType', 'trialEnd', 'CSon', 'rewardL', 'rewardR', 'respondTime', 'rewardTime', 'rewardProbL', 'rewardProbR', 'laser']

Sum of rewardL: 69.0
Sum of rewardR: 170.0
Total rewards (sum): 239.0

Unique rewardL values: [np.float64(0.0), np.float64(1.0)]
Unique rewardR values: [np.float64(0.0), np.float64(1.0)]

Number of unique rewardTime values: 416
Total rewardTime entries (non-null): 416
Sample rewardTime values: [459341.0, 469671.0, 475056.0, 484692.0, 489207.0, 497497.0, 550197.0, 563150.0, 571506.0, 578704.0]

No duplicated rewardTime values found.


In [16]:
# Try the count_rewards function and handle the error
try:
    total_rewards = count_rewards(beh_df_mat)
    print(f"Total rewards counted: {total_rewards}")
except ValueError as e:
    print(f"Validation error: {e}")
    print("\nThis suggests the validation logic needs to be adjusted for the data structure.")

Validation error: Sum of rewardL and rewardR (239) does not match number of unique rewardTime values (416)

This suggests the validation logic needs to be adjusted for the data structure.


In [6]:
beh_df_mat.head(30)

Unnamed: 0,trialType,trialEnd,CSon,rewardL,rewardR,respondTime,rewardTime,rewardProbL,rewardProbR,laser
0,CSplus,468839.0,458393.0,0.0,,459140.0,459341.0,10.0,90.0,0.0
1,CSplus,474260.0,468839.0,0.0,,469470.0,469671.0,10.0,90.0,0.0
2,CSplus,479481.0,474260.0,0.0,,474855.0,475056.0,10.0,90.0,0.0
3,CSplus,483981.0,479481.0,,,,,10.0,90.0,0.0
4,CSplus,488733.0,483981.0,0.0,,484491.0,484692.0,10.0,90.0,0.0
5,CSplus,496730.0,488733.0,0.0,,489006.0,489207.0,10.0,90.0,0.0
6,CSplus,501088.0,496730.0,0.0,,497296.0,497497.0,10.0,90.0,0.0
7,CSplus,507058.0,501088.0,,,,,10.0,90.0,0.0
8,CSplus,511819.0,507058.0,,,,,10.0,90.0,0.0
9,CSplus,549783.0,511819.0,,,,,10.0,90.0,0.0


In [7]:
beh_df_mat.tail()

Unnamed: 0,trialType,trialEnd,CSon,rewardL,rewardR,respondTime,rewardTime,rewardProbL,rewardProbR,laser
484,CSplus,3970363.0,3963035.0,,,3964841.0,,10.0,50.0,0.0
485,CSplus,3976989.0,3970363.0,,,,,10.0,50.0,0.0
486,CSplus,3981566.0,3976989.0,,,,,10.0,50.0,0.0
487,CSminus,3987035.0,3981566.0,,,,,10.0,50.0,0.0
488,CSplus,,3987035.0,,,4023518.0,,10.0,50.0,0.0


In [17]:
# Calculate the mean of non-NaN reward values
reward_l_mean = beh_df_mat['rewardL'].dropna().mean()
reward_r_mean = beh_df_mat['rewardR'].dropna().mean()

print(f"Mean of non-NaN rewardL values: {reward_l_mean:.3f}")
print(f"Mean of non-NaN rewardR values: {reward_r_mean:.3f}")

# Also show the reward probabilities for comparison
print(f"\nReward probabilities in the data:")
print(f"Unique rewardProbL values: {sorted(beh_df_mat['rewardProbL'].dropna().unique())}")
print(f"Unique rewardProbR values: {sorted(beh_df_mat['rewardProbR'].dropna().unique())}")

# Convert probabilities to proportions (assuming they're percentages)
prob_l_values = beh_df_mat['rewardProbL'].dropna().unique()
prob_r_values = beh_df_mat['rewardProbR'].dropna().unique()

print(f"\nAs proportions:")
print(f"rewardProbL as proportions: {sorted(prob_l_values / 100)}")
print(f"rewardProbR as proportions: {sorted(prob_r_values / 100)}")

# Compare means to the most common probability settings
most_common_prob_l = beh_df_mat['rewardProbL'].mode().iloc[0] / 100
most_common_prob_r = beh_df_mat['rewardProbR'].mode().iloc[0] / 100

print(f"\nComparison:")
print(f"Most common rewardProbL: {most_common_prob_l:.3f}, Actual mean rewardL: {reward_l_mean:.3f}")
print(f"Most common rewardProbR: {most_common_prob_r:.3f}, Actual mean rewardR: {reward_r_mean:.3f}")

Mean of non-NaN rewardL values: 0.539
Mean of non-NaN rewardR values: 0.590

Reward probabilities in the data:
Unique rewardProbL values: [np.float64(10.0), np.float64(50.0), np.float64(90.0)]
Unique rewardProbR values: [np.float64(10.0), np.float64(50.0), np.float64(90.0)]

As proportions:
rewardProbL as proportions: [np.float64(0.1), np.float64(0.5), np.float64(0.9)]
rewardProbR as proportions: [np.float64(0.1), np.float64(0.5), np.float64(0.9)]

Comparison:
Most common rewardProbL: 0.100, Actual mean rewardL: 0.539
Most common rewardProbR: 0.500, Actual mean rewardR: 0.590


In [18]:
# Group by probability settings and calculate means
grouped_analysis = beh_df_mat.groupby(['rewardProbL', 'rewardProbR']).agg({
    'rewardL': lambda x: x.dropna().mean(),
    'rewardR': lambda x: x.dropna().mean(),
    'rewardL': [lambda x: x.dropna().mean(), 'count'],
    'rewardR': [lambda x: x.dropna().mean(), 'count']
}).round(3)

print("Reward delivery rates by probability settings:")
print(grouped_analysis)

Reward delivery rates by probability settings:
                           rewardL          rewardR      
                        <lambda_0> count <lambda_0> count
rewardProbL rewardProbR                                  
10.0        50.0             0.147    34      0.448    87
            90.0             0.091    11      0.906    53
50.0        10.0             0.556    18      0.000     2
            50.0               NaN     0      0.500    12
            90.0               NaN     0      0.929    14
90.0        10.0             0.842    38      0.100    50
            50.0             0.778    27      0.667     9
            90.0               NaN     0      0.869    61


In [8]:
beh_df_mat['trialType'].unique()

array([np.str_('CSplus'), np.str_('CSminus')], dtype=object)

In [9]:
(beh_df_mat['trialEnd'].max() - beh_df_mat['trialEnd'].min())/1000/60

np.float64(58.6366)

In [10]:
(beh_df_mat['rewardTime'].max() - beh_df_mat['rewardTime'].min())/1000/60

np.float64(58.205333333333336)

In [11]:
from scipy.io import loadmat

mat = loadmat(mat_path)
beh = mat.get('behSessionData', mat.get('sessionData'))
print(beh.dtype.names)

('trialType', 'trialEnd', 'CSon', 'licksL', 'licksR', 'rewardL', 'rewardR', 'respondTime', 'rewardTime', 'rewardProbL', 'rewardProbR', 'allLicks', 'laser')


In [12]:
mat = loadmat(mat_path)
print(mat.keys())

dict_keys(['__header__', '__version__', '__globals__', 'behSessionData', 'blockSwitch', 'blockSwitchL', 'blockSwitchR'])


In [34]:
mat['behSessionData']

array([[(array(['CSplus'], dtype='<U6'), array([[51200]], dtype=uint16), array([[45641]], dtype=uint16), array([[45877, 46035, 46205]], dtype=uint16), array([], shape=(0, 0), dtype=uint8), array([[0]], dtype=uint8), array([[nan]]), array([[45877]], dtype=uint16), array([[46128]], dtype=uint16), array([[10]], dtype=uint8), array([[90]], dtype=uint8), array([[  45877,   46035,   46205, ..., 4008270, 4008452, 4008611]],
              shape=(1, 4771), dtype=int32), array([[0]], dtype=uint8), array([[1]], dtype=uint8))                                                                                                                                                                                                                                                                                                                                  ,
        (array(['CSplus'], dtype='<U6'), array([[57749]], dtype=uint16), array([[51200]], dtype=uint16), array([[51432, 51655, 51759, 53867, 54033, 54167, 55