In [8]:
import pandas as pd
import numpy as np
import copy
import json

final = []

TRIAL_N = 75 * 10 # 75 trials = 1 minute
FWHM = 9
SIGMA = FWHM / (2 * np.sqrt(2 * np.log(2)))

In [9]:
from scipy.ndimage import gaussian_filter1d

TRIAL_N = 75 * 10 # 75 trials = 1 minute
FWHM = 9
SIGMA = FWHM / (2 * np.sqrt(2 * np.log(2)))

def process_responses(trials):
    """Calculate response times (RTs) from the trial data."""
    trials = copy.deepcopy(trials)  # Deep copy to avoid modifying original data
    response_times = [float('inf')] * TRIAL_N

    # Edge case: first trial
    if trials[0]['responses']:
        response_times[0] = trials[0]['responses'][0]

    # Loop 0: unamibiguous correct responses
    for i, trial in enumerate(trials[1:], start=1):
        remaining_responses = []
        for rt in trial['responses']:
            if rt < 320 and not trials[i-1]['is_mountain']:
                response_times[i-1] = min(800 + rt, response_times[i-1])
            elif rt > 560 and not trial['is_mountain']:
                response_times[i] = min(rt, response_times[i])
            else:
                remaining_responses.append(rt)
        trial['responses'] = remaining_responses


    # Loop 1: ambigous presses
    for i, trial in enumerate(trials[1:], start=1):
        for rt in trial['responses']:
            if response_times[i-1] == float('inf') and response_times[i] != float('inf'):
                response_times[i-1] = 800 + rt
            elif response_times[i-1] != float('inf') and response_times[i] == float('inf'):
                response_times[i] = rt
            elif response_times[i-1] == float('inf') and response_times[i] == float('inf'):
                if trials[i-1]['is_mountain']:
                    response_times[i] = rt
                elif trial['is_mountain']:
                    response_times[i-1] = 800 + rt
                else:
                    if rt < 400:
                        response_times[i-1] = 800 + rt
                    else:
                        response_times[i] = rt

    # Replace inf with None
    processed = [None if x == float('inf') else x for x in response_times]
    start_timestamps = [trial['start_timestamp'] for trial in trials]
    is_mountains = [trial['is_mountain'] for trial in trials]
    return processed, start_timestamps, is_mountains

def label(response_times, start_timestamps, is_mountains):
    """Label responses w.r.t RTV aka the trial to trial variation in response time"""
    response_times = np.array(response_times, dtype=float)

    mask = np.array(is_mountains, dtype=bool)
    response_times[mask] = np.nan

    # Z-tranform the sequence
    z_normalized_rt = (response_times - np.nanmean(response_times)) / np.nanstd(response_times)

    # Calculate variance time course
    vtc = np.abs(z_normalized_rt - np.nanmean(z_normalized_rt))

    # Linearly interpolate missing values in the vtc
    nans, x = np.isnan(vtc), lambda z: z.nonzero()[0]
    vtc[nans] = np.interp(x(nans), x(~nans), vtc[~nans])

    # Smooth the VTC
    vtc_smoothed = gaussian_filter1d(vtc, sigma=SIGMA)  # sigma derived from FWHM

    # Determine if there was a response
    responses_bool = [False if not resp else True for resp in responses]

    # Determine "in the zone" (1) and "out of the zone" (0) labels
    median_vtc = np.median(vtc_smoothed)
    zone_labels = [(start_timestamps[i], 1, value, is_mountains[i], responses_bool[i], i, response_times[i]) if value <= median_vtc else (start_timestamps[i], 0, value, is_mountains[i], responses_bool[i], i, response_times[i]) for i, value in enumerate(vtc_smoothed)]

    return zone_labels, vtc_smoothed, responses_bool

In [10]:
for subject_id in range(2, 12):
    raw = []
    processed = []
    
    for i in range(1,4):
        processed.append(pd.read_csv(f'data/subject{subject_id}/gradcpt-session1-{i}.csv'))
        with open(f'data/subject{subject_id}/gradcpt-raw-session-1-{i}.json', 'r') as file:
            data = json.load(file)
            raw.append(data)
    from scipy.ndimage import gaussian_filter1d

    labels = []
    vtcs = []
    responses_bools = []
    count = 0
    total = 0
    for i in range(3):
        responses, start_timestamps, is_mountains = process_responses(raw[i])
        labels_i, vtc_i, rsp_i = label(responses, start_timestamps, is_mountains)
        n = len(labels_i)
    
        count_i = 0
        for j in range(n):
            if processed[i]['in_the_zone'][j] == labels_i[j][1]:
                count_i += 1
        if count_i != n:
            print(f'{i} is not matching')
        count += count_i
        total += n
        labels.append(labels_i)
        vtcs.append(vtc_i)
        responses_bools.append(rsp_i)
    
    print(f'{count}/{total} matches')

    dfs = []
    for idx, sublist in enumerate(labels):
        df = pd.DataFrame(sublist, columns=['Timestamp', 'Zone', 'Smoothed VTC', 'Mountain', 'Button Pressed', 'Trial', 'Response Time'])
        df['Session'] = idx + 1  # Add the index of the sublist as a new column
        df['Subject'] = subject_id
        dfs.append(df)
    
    # Concatenate all DataFrames into one
    data = pd.concat(dfs, ignore_index=True)
    
    # Reorder columns
    new_order = ['Timestamp', 'Subject', 'Session', 'Trial', 'Mountain', 'Button Pressed', 'Response Time', 'Smoothed VTC', 'Zone']
    data = data.reindex(columns=new_order)

    final.append(data)

0 is not matching
1 is not matching
2 is not matching
2196/2250 matches
0 is not matching
1 is not matching
2 is not matching
2206/2250 matches
0 is not matching
1 is not matching
2 is not matching
2070/2250 matches
0 is not matching
1 is not matching
2 is not matching
2214/2250 matches
0 is not matching
1 is not matching
2 is not matching
2170/2250 matches
0 is not matching
1 is not matching
2 is not matching
2074/2250 matches
0 is not matching
1 is not matching
2 is not matching
2182/2250 matches
0 is not matching
1 is not matching
2 is not matching
2216/2250 matches
0 is not matching
1 is not matching
2 is not matching
2206/2250 matches
0 is not matching
1 is not matching
2 is not matching
2232/2250 matches


In [11]:
all_subjects_gradcpt = pd.concat(final, ignore_index=True)

In [12]:
all_subjects_gradcpt.to_csv('data/res/all_subjects_gradcpt.csv', index=False)

In [13]:
all_subjects_gradcpt

Unnamed: 0,Timestamp,Subject,Session,Trial,Mountain,Button Pressed,Response Time,Smoothed VTC,Zone
0,1.711973e+09,2,1,0,False,True,493.792057,0.726505,0
1,1.711973e+09,2,1,1,False,True,608.792067,0.728058,0
2,1.711973e+09,2,1,2,False,True,492.748976,0.733312,0
3,1.711973e+09,2,1,3,False,True,577.411890,0.745615,0
4,1.711973e+09,2,1,4,False,True,681.189060,0.768141,0
...,...,...,...,...,...,...,...,...,...
22495,1.713257e+09,11,3,745,False,True,644.381046,0.625752,1
22496,1.713257e+09,11,3,746,False,True,756.016970,0.602916,1
22497,1.713257e+09,11,3,747,True,False,,0.579492,1
22498,1.713257e+09,11,3,748,False,True,731.014013,0.560432,1
