# Mousetracking Analyses

## Information on the event tags
- CFS: Cursor Fixation Start (triggered when the mouse hovers over a fixation point, i.e. the target square)
- CFB: Cursor Fixation Break (mouse cursor left the central fixation square before the 1 second required for options to reveal)
- CFD: ?? (always immediately followed by a CFB)
    - I think this is the period where the shape targets are actively being hidden
    - It seems like CFS activates the tile targets, so when there is a CFB after a CFS, there is also a CFD where the code is keeping those targets hidden
    - This is why when there is a CFB at the start of a choice before a CFS, there is no CFS
- CFE: Cursor Fixation End (end 'fixation'; tile choice options revealed)
- MOS: Mouse Over Square (mouse cursor on a tile)
- MHS: Mouse Hit Square (hit revealed)
- MMS: Mouse Miss Square (miss revealed)
- CGB: Clear Game Board (end of trial, reset the game board)

## Names of files
- YA/MA/OA(number) (e.g, YA01) = subject's raw data
- YA/MA/OA(number)_mousetrack (e.g, YA01_mousetrack) = subject's raw data organized better into a dataframe that has time, x, y, and id columns
- YA/MA/OA(number)_events (e.g, YA01_events) = all of a given subject's "events" (CFS, MMS, MOS, CFD, etc.) that has time, x, y, and id columns
- YA/MA/OA(number)_coords (e.g, YA01_coords) = all of a given subject's x-y coordinates and NO events (CFS, MMS, MOS, CFD, etc.) that has time, x, y, and id columns

## Important Information
CF coordinates = (199.71440661348086, 159.97512720217148)

## Project Setup

In [1]:
# import packages
import pandas as pd
import os
import numpy as np
#import sklearn as sl
#from sklearn.linear_model import LinearRegression as lr
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from scipy.stats import norm
from numpy import isnan
from scipy import stats
from scipy.stats import pearsonr
import seaborn as sns
import statistics

# set working directory
os.chdir('/Users/agshivers/Library/CloudStorage/Box-Box/Bakkour-Lab/projects/Battleship_task/data/all_raw_data') 

### Other setup for later in the project

In [3]:
# print lists of subject IDs to prevent typing it all out
# YA - print(', '.join("'YA{:02}'".format(i) for i in range(1, 58)), end='')
# MA - print(', '.join("'MA{:02}'".format(i) for i in range(1, 60)), end='')
# OA - print(', '.join("'OA{:02}'".format(i) for i in range(1, 54)), end='')

# list of subject IDs -- 2 lists because YA has different output format than OA and MA
# YA: ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57']
# MA/OA:  ['MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']

# print list of mousetrack file names
#mousetracks = ','.join(["'" + s + "_mousetrack'" for s in ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57','MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']])
#print(mousetracks)
# 'YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack'

# print list of events file names
#events = ','.join(["'" + s + "_events'" for s in ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57','MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']])
#print(events)
# 'YA01_events','YA02_events','YA03_events','YA04_events','YA05_events','YA06_events','YA07_events','YA08_events','YA09_events','YA10_events','YA11_events','YA12_events','YA13_events','YA14_events','YA15_events','YA16_events','YA17_events','YA18_events','YA19_events','YA20_events','YA21_events','YA22_events','YA23_events','YA24_events','YA25_events','YA26_events','YA27_events','YA28_events','YA29_events','YA30_events','YA31_events','YA32_events','YA33_events','YA34_events','YA35_events','YA36_events','YA37_events','YA38_events','YA39_events','YA40_events','YA41_events','YA42_events','YA43_events','YA44_events','YA45_events','YA46_events','YA47_events','YA48_events','YA49_events','YA50_events','YA51_events','YA52_events','YA53_events','YA54_events','YA55_events','YA56_events','YA57_events','MA01_events','MA02_events','MA03_events','MA04_events','MA05_events','MA06_events','MA07_events','MA08_events','MA09_events','MA10_events','MA11_events','MA12_events','MA13_events','MA14_events','MA15_events','MA16_events','MA17_events','MA18_events','MA19_events','MA20_events','MA21_events','MA22_events','MA23_events','MA24_events','MA25_events','MA26_events','MA27_events','MA28_events','MA29_events','MA30_events','MA31_events','MA32_events','MA33_events','MA34_events','MA35_events','MA36_events','MA37_events','MA38_events','MA39_events','MA40_events','MA41_events','MA42_events','MA43_events','MA44_events','MA45_events','MA46_events','MA47_events','MA48_events','MA49_events','MA50_events','MA51_events','MA52_events','MA53_events','MA54_events','MA55_events','MA56_events','MA57_events','MA58_events','MA59_events','OA01_events','OA02_events','OA03_events','OA04_events','OA05_events','OA06_events','OA07_events','OA08_events','OA09_events','OA10_events','OA11_events','OA12_events','OA13_events','OA14_events','OA15_events','OA16_events','OA17_events','OA18_events','OA19_events','OA20_events','OA21_events','OA22_events','OA23_events','OA24_events','OA25_events','OA26_events','OA27_events','OA28_events','OA29_events','OA30_events','OA31_events','OA32_events','OA33_events','OA34_events','OA35_events','OA36_events','OA37_events','OA38_events','OA39_events','OA40_events','OA41_events','OA42_events','OA43_events','OA44_events','OA45_events','OA46_events','OA47_events','OA48_events','OA49_events','OA50_events','OA51_events','OA52_events','OA53_events'

# print list of coordinates file names
#coords = ','.join(["'" + s + "_coords'" for s in ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57','MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']])
#print(coords)
# 'YA01_coords','YA02_coords','YA03_coords','YA04_coords','YA05_coords','YA06_coords','YA07_coords','YA08_coords','YA09_coords','YA10_coords','YA11_coords','YA12_coords','YA13_coords','YA14_coords','YA15_coords','YA16_coords','YA17_coords','YA18_coords','YA19_coords','YA20_coords','YA21_coords','YA22_coords','YA23_coords','YA24_coords','YA25_coords','YA26_coords','YA27_coords','YA28_coords','YA29_coords','YA30_coords','YA31_coords','YA32_coords','YA33_coords','YA34_coords','YA35_coords','YA36_coords','YA37_coords','YA38_coords','YA39_coords','YA40_coords','YA41_coords','YA42_coords','YA43_coords','YA44_coords','YA45_coords','YA46_coords','YA47_coords','YA48_coords','YA49_coords','YA50_coords','YA51_coords','YA52_coords','YA53_coords','YA54_coords','YA55_coords','YA56_coords','YA57_coords','MA01_coords','MA02_coords','MA03_coords','MA04_coords','MA05_coords','MA06_coords','MA07_coords','MA08_coords','MA09_coords','MA10_coords','MA11_coords','MA12_coords','MA13_coords','MA14_coords','MA15_coords','MA16_coords','MA17_coords','MA18_coords','MA19_coords','MA20_coords','MA21_coords','MA22_coords','MA23_coords','MA24_coords','MA25_coords','MA26_coords','MA27_coords','MA28_coords','MA29_coords','MA30_coords','MA31_coords','MA32_coords','MA33_coords','MA34_coords','MA35_coords','MA36_coords','MA37_coords','MA38_coords','MA39_coords','MA40_coords','MA41_coords','MA42_coords','MA43_coords','MA44_coords','MA45_coords','MA46_coords','MA47_coords','MA48_coords','MA49_coords','MA50_coords','MA51_coords','MA52_coords','MA53_coords','MA54_coords','MA55_coords','MA56_coords','MA57_coords','MA58_coords','MA59_coords','OA01_coords','OA02_coords','OA03_coords','OA04_coords','OA05_coords','OA06_coords','OA07_coords','OA08_coords','OA09_coords','OA10_coords','OA11_coords','OA12_coords','OA13_coords','OA14_coords','OA15_coords','OA16_coords','OA17_coords','OA18_coords','OA19_coords','OA20_coords','OA21_coords','OA22_coords','OA23_coords','OA24_coords','OA25_coords','OA26_coords','OA27_coords','OA28_coords','OA29_coords','OA30_coords','OA31_coords','OA32_coords','OA33_coords','OA34_coords','OA35_coords','OA36_coords','OA37_coords','OA38_coords','OA39_coords','OA40_coords','OA41_coords','OA42_coords','OA43_coords','OA44_coords','OA45_coords','OA46_coords','OA47_coords','OA48_coords','OA49_coords','OA50_coords','OA51_coords','OA52_coords','OA53_coords'

### Make dictionaries

#### Make age dictionary

In [3]:
subject_ids = ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57', 'MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']
ages = [20, 19, 19, 18, 20, 22, 21, 20, 19, 19, 20, 19, 20, 24, 20, 19, 21, 19, 21, 19, 20, 22, 20, 19, 20, 26, 19, 19, 18, 20, 20, 19, 21, 20, 18, 19, 20, 20, 22, 20, 20, 21, 22, 19, 19, 21, 19, 19, 20, 20, 20, 20, 21, 20, 20, 21, 21, 44, 42, 42, 48, 35, 45, 39, 39, 39, 37, 40, 38, 38, 40, 39, 39, 43, 37, 46, 36, 39, 42, 45, 38, 35, 38, 38, 47, 37, 40, 41, 43, 48, 40, 41, 43, 36, 43, 43, 48, 43, 42, 38, 38, 48, 43, 38, 37, 40, 43, 36, 47, 44, 46, 39, 40, 45, 37, 39, 54, 64, 51, 53, 71, 58, 55, 52, 52, 57, 51, 55, 52, 60, 62, 56, 56, 60, 57, 59, 63, 64, 61, 57, 59, 56, 56, 56, 59, 52, 56, 71, 55, 64, 50, 54, 56, 53, 65, 70, 54, 51, 66, 54, 50, 51, 57, 53, 56, 51, 52, 57, 51]

# create a dictionary mapping subject ids to ages
subject_age_dict = dict(zip(subject_ids, ages))

#### Make changepoint dictionary

In [4]:
subject_ids = ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57', 'MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']
cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33, 51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50, 82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

# create a dictionary mapping subject ids to cpts
subject_cpt_dict = dict(zip(subject_ids, cpts))

## Wrangle the Raw Mousetracking Data

In [6]:
# example for 1 YA participant
# import data
output=pd.read_csv('YA01.csv') # uses the pandas library to read the YA01 csv and name this "output"

# wrangle data
df=pd.DataFrame({'time':output.last_name[0].split(',')}) # create dataframe named "df" using the data from the "output" df, 
# splitting the data in the 'last_name' column by commas and putting the result into a 'time' column.

df[['time','x','y','id']]=df['time'].str.split(';',expand=True) # splits the 'time' column of "df" by the ';' delimiter and expands
#it into four separate columns: 'time', 'x', 'y', and 'id'. 

# export new dataframe
#df.to_csv('YA01_mousetrack.csv') # saves this new dataframe "df" as a csv named "YA01_mousetrack"

In [8]:
df.head(15)

Unnamed: 0,time,x,y,id
0,1482,170,499,
1,1488,167,466,
2,1496,165,433,
3,1503,164,400,
4,1509,164,374,
5,1516,164,348,
6,1523,167,328,
7,1531,170,310,
8,1537,174,291,
9,1544,178,276,


In [24]:
# now do this in a loop to do all participants at once
# YA and MA/OA adults need two separate loops because their data output is slightly different

# make for loop that will do this for all YA participants
for s in ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57']:
    output=pd.read_csv(s+'.csv')
    df=pd.DataFrame({'time':output.last_name[0].split(',')})
    df[['time','x','y','id']]=df['time'].str.split(';',expand=True)
    #df.to_csv(s+'_mousetrack.csv')

# make for loop that will do this for all MA and OA participants
for s in ['MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']:
    output=pd.read_csv(s+'.csv')
    df=pd.DataFrame({'time':output.task_data[0].split(',')})
    df[['time','x','y','id']]=df['time'].str.split(';',expand=True)
    #df.to_csv(s+'_mousetrack.csv')

### Calculate the trial and choice numbers
- For calculating choice number, count starting from 1 and restart the count every time either MHS or MMS is encountered
- For calculating choice number, start with 1 and count up each time CGB is encountered

**Fill in trial number column -- THIS WORKS FOR 1**

initialize a new column with NaN values
`df['trial_num'] = None`

initialize count variable
`count = 1`

`for index, row in df.iterrows():` <br>
    `if row['x'] == "CGB":` <br>
        `count += 1  # increase count by 1 when "CGB" is encountered` <br>
        `continue  # skip this row and move to the next iteration` <br>
    `df.loc[index, 'trial_num'] = count` <br>
    
there will look like there is 1 extra trial than shown in the trials completed analyses. this is because this final trial was not completed. if you look at the last CGB occurance, it should be at the end of the number of trials reported in other analyses

In [25]:
# fill in trial number column
for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['trial_num'] = None # initialize a new column with NaN values
    count = 1 # initialize count variable
    for index, row in df.iterrows():
        if row['x'] == "CGB":
            count += 1  # increase count by 1 when "CGB" is encountered
            continue  # skip this row and move to the next iteration
        df.loc[index, 'trial_num'] = count
    #df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column
    
# there will look like there is 1 extra trial than shown in the trials completed analyses. this is because this final trial was not completed. if you look at the last CGB occurance, it should be at the end of the number of trials reported in other analyses

In [26]:
for s in ['MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['trial_num'] = None # initialize a new column with NaN values
    count = 1 # initialize count variable
    for index, row in df.iterrows():
        if row['x'] == "CGB":
            count += 1  # increase count by 1 when "CGB" is encountered
            continue  # skip this row and move to the next iteration
        df.loc[index, 'trial_num'] = count
    #df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column

**Fill in choice number column -- THIS WORKS FOR 1**

choice numbers within a trial

initialize a new column with NaN values
`df['choice_num'] = None`

initialize count variable
`count = 1`

`for index, row in df.iterrows():` <br>
    `if (row['x'] == "MMS")|(row['x'] == "MHS"):` <br>
        `count += 1  # increase count by 1 when "MMS" or "MHS" is encountered` <br>
        `continue  # skip this row and move to the next iteration` <br>
    `if row['x'] == "CGB":` <br>
        `count = 1  # reset the choice count to 1 when "CGB" occurs, aka when a new trial starts` <br>
        `continue  # skip this row and move to the next iteration` <br>
   ` df.loc[index, 'choice_num'] = count` <br>

In [27]:
# fill in choice number column
# choice numbers within a trial

for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['choice_num'] = None # initialize a new column with NaN values
    count = 1 # initialize count variable
    for index, row in df.iterrows():
        if (row['x'] == "MMS")|(row['x'] == "MHS"):
            count += 1  # increase count by 1 when "MMS" or "MHS" is encountered
            continue  # skip this row and move to the next iteration
        if row['x'] == "CGB":
            count = 1  # reset the choice count to 1 when "CGB" occurs, aka when a new trial starts
            continue  # skip this row and move to the next iteration
        df.loc[index, 'choice_num'] = count
    #df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column


In [28]:
for s in ['MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack']:
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['choice_num'] = None # initialize a new column with NaN values
    count = 1 # initialize count variable
    for index, row in df.iterrows():
        if (row['x'] == "MMS")|(row['x'] == "MHS"):
            count += 1  # increase count by 1 when "MMS" or "MHS" is encountered
            continue  # skip this row and move to the next iteration
        if row['x'] == "CGB":
            count = 1  # reset the choice count to 1 when "CGB" occurs, aka when a new trial starts
            continue  # skip this row and move to the next iteration
        df.loc[index, 'choice_num'] = count
    #df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column

In [29]:
for s in ['OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['choice_num'] = None # initialize a new column with NaN values
    count = 1 # initialize count variable
    for index, row in df.iterrows():
        if (row['x'] == "MMS")|(row['x'] == "MHS"):
            count += 1  # increase count by 1 when "MMS" or "MHS" is encountered
            continue  # skip this row and move to the next iteration
        if row['x'] == "CGB":
            count = 1  # reset the choice count to 1 when "CGB" occurs, aka when a new trial starts
            continue  # skip this row and move to the next iteration
        df.loc[index, 'choice_num'] = count
    #df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column

**Make column for total number of choices (NOT WITHIN A TRIAL) -- THIS WORKS FOR 1**

initialize a new column with NaN values
`df['tot_choices'] = None`

initialize count variable
`count = 1`

`for index, row in df.iterrows():`<br>
    `if (row['x'] == "MMS")|(row['x'] == "MHS"):`<br>
        `count += 1  # increase count by 1 when "MMS" or "MHS" is encountered`<br>
        `continue  # skip this row and move to the next iteration`<br>
    `df.loc[index, 'tot_choices'] = count`<br>

In [None]:
# count every choice to get total choice numbers for each participant

for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['tot_choices'] = None
    count = 1
    for index, row in df.iterrows():
        if (row['x'] == "MMS")|(row['x'] == "MHS"):
            count += 1  # increase count by 1 when "MMS" or "MHS" is encountered
            continue  # skip this row and move to the next iteration
        df.loc[index, 'tot_choices'] = count
    #df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column

In [None]:
for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack'
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['tot_choices'] = None
    count = 1
    for index, row in df.iterrows():
        if (row['x'] == "MMS")|(row['x'] == "MHS"):
            count += 1  # increase count by 1 when "MMS" or "MHS" is encountered
            continue  # skip this row and move to the next iteration
        df.loc[index, 'tot_choices'] = count
    #df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column

In [None]:
for s in ['MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack']:
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['tot_choices'] = None
    count = 1
    for index, row in df.iterrows():
        if (row['x'] == "MMS")|(row['x'] == "MHS"):
            count += 1  # increase count by 1 when "MMS" or "MHS" is encountered
            continue  # skip this row and move to the next iteration
        df.loc[index, 'tot_choices'] = count
    #df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column

In [None]:
for s in ['OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['tot_choices'] = None
    count = 1
    for index, row in df.iterrows():
        if (row['x'] == "MMS")|(row['x'] == "MHS"):
            count += 1  # increase count by 1 when "MMS" or "MHS" is encountered
            continue  # skip this row and move to the next iteration
        df.loc[index, 'tot_choices'] = count
    #df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column

**Edit columns to be integers instead of floats**

In [None]:
for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    file = pd.read_csv(s+'.csv')
    df = pd.DataFrame(file)
    df['trial_num'] = df['trial_num'].astype(int) 
    df['choice_num'] = df['choice_num'].astype(int)
    df['tot_choices'] = df['tot_choices'].astype(int)
    df.to_csv(s+'.csv', index=False) # Set index=False to prevent saving the index column

### Create Separate Dataframes for Events and Position Coordinates
Further wrangle the mousetracking data from above to split the dataframes into two separate dataframes, one for all movement events, and another for all mouse position coordinates

- Input: Mousetrack dataframes created in the above chunk
- Output:
    - YA/MA/OA(number)_events (e.g, YA01_events) = all of a given subject's "events" (CFS, MMS, MOS, CFD, etc.) that has time, x, y, and id columns
    - YA/MA/OA(number)_coords (e.g, YA01_coords) = all of a given subject's x-y coordinates and NO events (CFS, MMS, MOS, CFD, etc.) that has time, x, y, and id columns

In [None]:
# make events dataframes for participants 
for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    mousetrack=pd.read_csv(s+'.csv')
    events_df = mousetrack[mousetrack['x'].isin(['CFS','CFB','CFD','CFE','MOS','MHS','MMS','CGB'])]
    filename = s.replace("_mousetrack", "") + '_events.csv'
    events_df.to_csv(filename)

# make x-y coordinate dataframes for participants
for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    mousetrack=pd.read_csv(s+'.csv')
    coords_df = mousetrack[~mousetrack['x'].isin(['CFS','CFB','CFD','CFE','MOS','MHS','MMS','CGB'])]
    filename = s.replace("_mousetrack", "") + '_coords.csv'
    coords_df.to_csv(filename)

### Create Totals Dataframe
- Total time within each event range
- Total trial number
- Total choice number
- Average number of choices per trial (total choice/total trial)
- Age
- Subject id
- Cpt
- Total information
- Total reward
- Total path length
- Total speed

In [6]:
# combine all mousetracking dataframes into one
all_mtrack = []

for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    mtrack = pd.read_csv(s+'.csv')
    mtrack['subject_id'] = s.replace("_mousetrack", "") + '' # store subject id in temporary dataframe
    all_mtrack.append(mtrack)

# concatenate all dataframes in the list into one large dataframe
all_mtrack_df = pd.concat(all_mtrack, ignore_index=True)
all_mtrack_df

Unnamed: 0.1,Unnamed: 0,time,x,y,id,subject_id,trial_num,choice_num,tot_choices
0,0,1482,170,499,,YA01,,,
1,1,1488,167,466,,YA01,,,
2,2,1496,165,433,,YA01,,,
3,3,1503,164,400,,YA01,,,
4,4,1509,164,374,,YA01,,,
...,...,...,...,...,...,...,...,...,...
11010384,38581,2929686,MOS,2,4.0,OA53,50.0,8.0,467.0
11010385,38582,2929686,359,194,,OA53,50.0,8.0,467.0
11010386,38583,2929719,360,194,,OA53,50.0,8.0,467.0
11010387,38584,2930189,MHS,2,4.0,OA53,50.0,,


In [7]:
len(all_mtrack_df)

11010389

In [None]:
# first time to first cfs

sub_grouped = all_mtrack_df.groupby('subject_id')

# function to calculate the time difference
def start_time(group):
    first_row_time = group.iloc[0]['time']
    first_cfs_time = group[group['x'] == 'CFS'].iloc[0]['time']
    return first_cfs_time - first_row_time

# apply the function to each group and store the result in a dictionary
start_time_dict = sub_grouped.apply(start_time).to_dict()

# create new df to store the results
start_time__df = pd.DataFrame(list(start_time_dict.items()), columns=['subject_id', 'time_to_first_cfs'])

# add ages using dictionary
start_time__df['age'] = start_time__df['subject_id'].map(subject_age_dict)

print(start_time__df)

In [None]:
# CFS to CFE
cfs_to_cfe_df = pd.DataFrame() # stores all RTs for each choice and each trial per participant

for s in ['YA01_events','YA02_events','YA03_events','YA04_events','YA05_events','YA06_events','YA07_events','YA08_events','YA09_events','YA10_events','YA11_events','YA12_events','YA13_events','YA14_events','YA15_events','YA16_events','YA17_events','YA18_events','YA19_events','YA20_events','YA21_events','YA22_events','YA23_events','YA24_events','YA25_events','YA26_events','YA27_events','YA28_events','YA29_events','YA30_events','YA31_events','YA32_events','YA33_events','YA34_events','YA35_events','YA36_events','YA37_events','YA38_events','YA39_events','YA40_events','YA41_events','YA42_events','YA43_events','YA44_events','YA45_events','YA46_events','YA47_events','YA48_events','YA49_events','YA50_events','YA51_events','YA52_events','YA53_events','YA54_events','YA55_events','YA56_events','YA57_events','MA01_events','MA02_events','MA03_events','MA04_events','MA05_events','MA06_events','MA07_events','MA08_events','MA09_events','MA10_events','MA11_events','MA12_events','MA13_events','MA14_events','MA15_events','MA16_events','MA17_events','MA18_events','MA19_events','MA20_events','MA21_events','MA22_events','MA23_events','MA24_events','MA25_events','MA26_events','MA27_events','MA28_events','MA29_events','MA30_events','MA31_events','MA32_events','MA33_events','MA34_events','MA35_events','MA36_events','MA37_events','MA38_events','MA39_events','MA40_events','MA41_events','MA42_events','MA43_events','MA44_events','MA45_events','MA46_events','MA47_events','MA48_events','MA49_events','MA50_events','MA51_events','MA52_events','MA53_events','MA54_events','MA55_events','MA56_events','MA57_events','MA58_events','MA59_events','OA01_events','OA02_events','OA03_events','OA04_events','OA05_events','OA06_events','OA07_events','OA08_events','OA09_events','OA10_events','OA11_events','OA12_events','OA13_events','OA14_events','OA15_events','OA16_events','OA17_events','OA18_events','OA19_events','OA20_events','OA21_events','OA22_events','OA23_events','OA24_events','OA25_events','OA26_events','OA27_events','OA28_events','OA29_events','OA30_events','OA31_events','OA32_events','OA33_events','OA34_events','OA35_events','OA36_events','OA37_events','OA38_events','OA39_events','OA40_events','OA41_events','OA42_events','OA43_events','OA44_events','OA45_events','OA46_events','OA47_events','OA48_events','OA49_events','OA50_events','OA51_events','OA52_events','OA53_events']:
    events = pd.read_csv(s+'.csv')
    
    # filter out the last CFS occurrence if necessary
    # might need to filter out the last two CFSs, if that participant ended with 2 CFSs (example is CFS, CFD, CFB, CFS, then CFE)
    # instead of this code, I am just going to remove all rows after the final CFE row
    if len(events.loc[events['x'] == 'CFE', 'time']) < len(events.loc[events['x'] == 'CFS', 'time']):
        # find the index of the last CFE row
        last_cfe_index = events[events['x'] == 'CFE'].index[-1]
        # remove all rows after the last CFE row
        events = events.iloc[:last_cfe_index + 1]
    
   # if len(events.loc[events['x'] == 'CFE', 'time']) < len(events.loc[events['x'] == 'CFS', 'time']):
    #    last_cfs = events[events['x'] == 'CFS'].index[-1]
     #   events = events.drop(last_cfs)
    
    # subset for only CFS and CFE rows
    cf_events_df = events[(events['x'].isin(['CFS', 'CFE']))]

    # fill NaN values in 'choice_num' column; ffill fills NA/NaN values by propagating the last valid observation to next valid
    cf_events_df['choice_num'] = cf_events_df['choice_num'].fillna(method='ffill')

    cf_events_df['time'] = cf_events_df['time'].astype(int)
    
    # remove the CFS rows when there are two CFS's back-to-back because of CFBs
    # there is an issue where the code cant run with back to back CFSs
    # Compare consecutive rows and mark the first of two consecutive duplicates
    duplicates_mask = (cf_events_df['x'].shift(1) == cf_events_df['x'])

    # Drop rows where duplicates_mask is True, keeping the second occurrence
    cf_events_df = cf_events_df[~duplicates_mask].reset_index(drop=True)
    
    tmp_df = cf_events_df[['trial_num', 'choice_num']].drop_duplicates()

    # go through all CFS values 
    cfs_df = cf_events_df[cf_events_df['x'] == 'CFS'].rename(columns={'time': 'cfs_time'}).drop(columns=['x'])
    cfs_times = cfs_df['cfs_time']
    
    # go through all CFE values
    cfe_df = cf_events_df[cf_events_df['x'] == 'CFE'].rename(columns={'time': 'cfe_time'}).drop(columns=['x'])
    cfe_times = cfe_df['cfe_time']

    # add cfe_times and cfs times into the dataframe
    tmp_df['cfs_time'] = cfs_times.values
    tmp_df['cfe_time'] = cfe_times.values

    # calculate cf time
    tmp_df['cf_time'] = tmp_df['cfe_time']-tmp_df['cfs_time']

    # make subject_id column
    tmp_df['subject_id'] = s.replace("_events", "")

    tmp_df.reset_index(drop=True, inplace=True)

    cfs_to_cfe_df = pd.concat([cfs_to_cfe_df, tmp_df])

cfs_to_cfe_df

In [None]:
# CFE to MMS/MHS (aka reaction time)

# create reaction time that stores all reaction times, not just the mean per participant
all_rts = pd.DataFrame() # create empty dataframe
#tmp_all_rts= pd.DataFrame(index = range(len(rt_df)), columns=["subject_id", "all_rts"]) # tmp df

for s in ['YA01_events','YA02_events','YA03_events','YA04_events','YA05_events','YA06_events','YA07_events','YA08_events','YA09_events','YA10_events','YA11_events','YA12_events','YA13_events','YA14_events','YA15_events','YA16_events','YA17_events','YA18_events','YA19_events','YA20_events','YA21_events','YA22_events','YA23_events','YA24_events','YA25_events','YA26_events','YA27_events','YA28_events','YA29_events','YA30_events','YA31_events','YA32_events','YA33_events','YA34_events','YA35_events','YA36_events','YA37_events','YA38_events','YA39_events','YA40_events','YA41_events','YA42_events','YA43_events','YA44_events','YA45_events','YA46_events','YA47_events','YA48_events','YA49_events','YA50_events','YA51_events','YA52_events','YA53_events','YA54_events','YA55_events','YA56_events','YA57_events','MA01_events','MA02_events','MA03_events','MA04_events','MA05_events','MA06_events','MA07_events','MA08_events','MA09_events','MA10_events','MA11_events','MA12_events','MA13_events','MA14_events','MA15_events','MA16_events','MA17_events','MA18_events','MA19_events','MA20_events','MA21_events','MA22_events','MA23_events','MA24_events','MA25_events','MA26_events','MA27_events','MA28_events','MA29_events','MA30_events','MA31_events','MA32_events','MA33_events','MA34_events','MA35_events','MA36_events','MA37_events','MA38_events','MA39_events','MA40_events','MA41_events','MA42_events','MA43_events','MA44_events','MA45_events','MA46_events','MA47_events','MA48_events','MA49_events','MA50_events','MA51_events','MA52_events','MA53_events','MA54_events','MA55_events','MA56_events','MA57_events','MA58_events','MA59_events','OA01_events','OA02_events','OA03_events','OA04_events','OA05_events','OA06_events','OA07_events','OA08_events','OA09_events','OA10_events','OA11_events','OA12_events','OA13_events','OA14_events','OA15_events','OA16_events','OA17_events','OA18_events','OA19_events','OA20_events','OA21_events','OA22_events','OA23_events','OA24_events','OA25_events','OA26_events','OA27_events','OA28_events','OA29_events','OA30_events','OA31_events','OA32_events','OA33_events','OA34_events','OA35_events','OA36_events','OA37_events','OA38_events','OA39_events','OA40_events','OA41_events','OA42_events','OA43_events','OA44_events','OA45_events','OA46_events','OA47_events','OA48_events','OA49_events','OA50_events','OA51_events','OA52_events','OA53_events']:
    events = pd.read_csv(s+'.csv')
    if len(events.loc[(events['x'] == 'MHS') | (events['x'] == 'MMS'), 'time']) < len(events.loc[events['x'] == 'CFE', 'time']): # check to see if length of CFE > length of MMS/MHS; if it is, remove the last CFE occurrence 
        last_cfe = events[events['x'] == 'CFE'].index[-1]
        events = events.drop(last_cfe)
    # give an array of all RTs for the current subject
    rt_df = events.loc[(events['x']=='MHS')|(events['x']=='MMS'), 'time'].values.astype('int')-events.loc[events['x']=='CFE', 'time'].values.astype('int') # rt_df is a numpy array
    # create tmp df in the loop because the number of rows is unique to subject
    tmp_all_rts= pd.DataFrame(index = range(len(rt_df)), columns=["subject_id", "rt"]) # tmp df
    tmp_all_rts['rt'] = pd.DataFrame(rt_df) # store the array of rts in a column of the tmp df
    tmp_all_rts['subject_id'] = s.replace("_events", "") + '' # store the proper subject ID in the tmp df column
    all_rts = pd.concat([all_rts, tmp_all_rts]) # append row of this to dataframe 
    all_rts.to_csv('all_rts')
all_rts

In [None]:
# mms/mhs to next CFS (next trial, intertrial time)

it_time_df = pd.DataFrame() 

for s in ['YA01_events','YA02_events','YA03_events','YA04_events','YA05_events','YA06_events','YA07_events','YA08_events','YA09_events','YA10_events','YA11_events','YA12_events','YA13_events','YA14_events','YA15_events','YA16_events','YA17_events','YA18_events','YA19_events','YA20_events','YA21_events','YA22_events','YA23_events','YA24_events','YA25_events','YA26_events','YA27_events','YA28_events','YA29_events','YA30_events','YA31_events','YA32_events','YA33_events','YA34_events','YA35_events','YA36_events','YA37_events','YA38_events','YA39_events','YA40_events','YA41_events','YA42_events','YA43_events','YA44_events','YA45_events','YA46_events','YA47_events','YA48_events','YA49_events','YA50_events','YA51_events','YA52_events','YA53_events','YA54_events','YA55_events','YA56_events','YA57_events','MA01_events','MA02_events','MA03_events','MA04_events','MA05_events','MA06_events','MA07_events','MA08_events','MA09_events','MA10_events','MA11_events','MA12_events','MA13_events','MA14_events','MA15_events','MA16_events','MA17_events','MA18_events','MA19_events','MA20_events','MA21_events','MA22_events','MA23_events','MA24_events','MA25_events','MA26_events','MA27_events','MA28_events','MA29_events','MA30_events','MA31_events','MA32_events','MA33_events','MA34_events','MA35_events','MA36_events','MA37_events','MA38_events','MA39_events','MA40_events','MA41_events','MA42_events','MA43_events','MA44_events','MA45_events','MA46_events','MA47_events','MA48_events','MA49_events','MA50_events','MA51_events','MA52_events','MA53_events','MA54_events','MA55_events','MA56_events','MA57_events','MA58_events','MA59_events','OA01_events','OA02_events','OA03_events','OA04_events','OA05_events','OA06_events','OA07_events','OA08_events','OA09_events','OA10_events','OA11_events','OA12_events','OA13_events','OA14_events','OA15_events','OA16_events','OA17_events','OA18_events','OA19_events','OA20_events','OA21_events','OA22_events','OA23_events','OA24_events','OA25_events','OA26_events','OA27_events','OA28_events','OA29_events','OA30_events','OA31_events','OA32_events','OA33_events','OA34_events','OA35_events','OA36_events','OA37_events','OA38_events','OA39_events','OA40_events','OA41_events','OA42_events','OA43_events','OA44_events','OA45_events','OA46_events','OA47_events','OA48_events','OA49_events','OA50_events','OA51_events','OA52_events','OA53_events']:
    events = pd.read_csv(s+'.csv')
    
    # fill NaN values in 'choice_num' column; ffill fills NA/NaN values by propagating the last valid observation to next valid
    events['choice_num'] = events['choice_num'].fillna(method='ffill')
    events['time'] = events['time'].astype(int)
    
    # remove all rows after the final CFS row
    if len(events.loc[events['x'] == 'CFS', 'time']) < len(events.loc[(events['x'] == 'MHS') | (events['x'] == 'MMS'), 'time']):
        # find the index of the last CFS row
        last_cfs_index = events[events['x'] == 'CFS'].index[-1]
        # remove all rows after the last CFS row
        events = events.iloc[:last_cfs_index + 1]

    it_events_df = events[(events['x'].isin(['CFS', 'MMS', 'MHS']))]
    
    # fill NaN values in 'choice_num' column; ffill fills NA/NaN values by propagating the last valid observation to next valid
    #it_events_df['choice_num'] = it_events_df['choice_num'].fillna(method='ffill')

    #it_events_df['time'] = it_events_df['time'].astype(int)
    
    # remove the CFS rows when there are two CFS's back-to-back because of CFBs
    # there is an issue where the code cant run with back to back CFSs
    # compare consecutive rows and mark the first of two consecutive duplicates
    duplicates_mask = (it_events_df['x'].shift(1) == it_events_df['x'])

    # drop rows where duplicates_mask is true, keeping the second occurrence... dealing with situations with CFBs/CFDs.. keeping second will get whole time
    it_events_df = it_events_df[~duplicates_mask].reset_index(drop=True)
    
    # make df to store results in
    tmp_df = it_events_df[['trial_num', 'choice_num']].drop_duplicates()

    # go through all CFS values 
    cfs_df = it_events_df[it_events_df['x'] == 'CFS'].rename(columns={'time': 'cfs_time'}).drop(columns=['x'])
    cfs_times = cfs_df['cfs_time']
    
    # go through all MMS and MHS values
    mms_mhs_df = it_events_df[(it_events_df['x'] == 'MMS')|(it_events_df['x'] == 'MHS')].rename(columns={'time': 'mms_mhs_time'})
    mms_mhs_times = mms_mhs_df['mms_mhs_time']
    
    # add an NaN at the end of the MMS/MHS values if it is shorter for indexing
    if len(mms_mhs_times) < len(cfs_times):
        nan_series = pd.Series([float('nan')])
        mms_mhs_times = pd.concat([mms_mhs_times, nan_series], ignore_index=True)

    # add cfe_times and cfs times into the dataframe
    tmp_df['cfs_time'] = cfs_times.values
    tmp_df['mms_mhs_time'] = mms_mhs_times.values

    # calculate the difference between the next 'cfs_time' and the mms/mhs value
    tmp_df['it_time'] = (tmp_df['cfs_time'].shift(-1)) - tmp_df['mms_mhs_time']

    # drop the last row (nan)
    tmp_df = tmp_df.iloc[:-1]

    # make subject_id column
    tmp_df['subject_id'] = s.replace("_events", "")

    tmp_df.reset_index(drop=True, inplace=True)

    it_time_df = pd.concat([it_time_df, tmp_df])

it_time_df

In [None]:
# time between last event label and last time

# first time to first cfs

sub_grouped = all_mtrack_df.groupby('subject_id', sort=False)

def last_event_to_end_time(group):
    last_row_time = group.iloc[-1]['time']
    last_event_time = group[group['x'].isin(['CFS','CFB','CFD','CFE','MOS','MHS','MMS','CGB'])].iloc[-1]['time']
    return last_row_time - last_event_time

# apply the function to each group and store the result in a dictionary
last_event_to_end_dict = sub_grouped.apply(last_event_to_end_time).to_dict()

# create new df to store the results
last_event_to_end_df = pd.DataFrame(list(last_event_to_end_dict.items()), columns=['subject_id', 'last_event_to_end_time'])

print(last_event_to_end_df)


In [None]:
# total task time 

# make dataframe with maximum time for each participant (e.g. how long the task lasted)
tot_task_time_df = all_mtrack_df.groupby('subject_id')['time'].max().reset_index()
tot_task_time_df

# seeing if it differed by age group because tasks were slightly different on different servers...so seeing if that made a difference
# separate by age group, as these were collected on different servers/with different methods
#ya_tot_time = tot_task_time_df[tot_task_time_df['subject_id'].str.startswith('YA')][['subject_id', 'time']].reset_index(drop=True)
#ma_tot_time = tot_task_time_df[tot_task_time_df['subject_id'].str.startswith('MA')][['subject_id', 'time']].reset_index(drop=True)
#oa_tot_time = tot_task_time_df[tot_task_time_df['subject_id'].str.startswith('OA')][['subject_id', 'time']].reset_index(drop=True)

# mind mean total task time per group 
#mean_ya_tot_time = ya_tot_time['time'].mean()
#mean_ma_tot_time = ma_tot_time['time'].mean()
#mean_oa_tot_time = oa_tot_time['time'].mean()

#print(mean_ya_tot_time) # 3003512.7192982454
#print(mean_ma_tot_time) # 2995265.694915254
#print(mean_oa_tot_time) # 2996712.886792453

# 3000000 ms = 50 minutes
# 3600000 ms = 60 minutes

In [None]:
# total path length/speed

# initialize an empty df
speed_df = pd.DataFrame()

for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    mtrack = pd.read_csv(s + '.csv')

    # convert 'x' and 'y' columns to numeric (for event labels)
    mtrack['x'] = pd.to_numeric(mtrack['x'], errors='coerce')
    mtrack['y'] = pd.to_numeric(mtrack['y'], errors='coerce')

    # group by 'trial_num' and 'choice_num'
    grouped = mtrack.groupby(['trial_num', 'choice_num'])

    # define a function to calculate distance
    def calculate_distance_and_time(group):
        group['x_diff'] = group['x'].diff()
        group['y_diff'] = group['y'].diff()
        group['distance'] = np.sqrt(group['x_diff']**2 + group['y_diff']**2)
        group['time_diff'] = group['time'].diff()
        return group

    mtrack = grouped.apply(calculate_distance_and_time)
    
    tmp_speed_df = mtrack.groupby(['trial_num', 'choice_num']).agg(
        path_length=('distance', 'sum'),
        time_diff=('time_diff', 'sum')
    ).reset_index()

    tmp_speed_df['speed'] = tmp_speed_df['path_length']/tmp_speed_df['time_diff']
    tmp_speed_df['subject_id'] = s.replace("_mousetrack", "") + '' # store the proper subject ID in the tmp df column

    # append dataframes 
    speed_df = pd.concat([speed_df, tmp_speed_df], ignore_index=True)

print(speed_df)
speed_df.to_csv('speed_df')

In [None]:
# total CFBs
# (same code down below)

total_cfb_df = pd.DataFrame(columns=['subject_id'])

for s in ['YA01_events','YA02_events','YA03_events','YA04_events','YA05_events','YA06_events','YA07_events','YA08_events','YA09_events','YA10_events','YA11_events','YA12_events','YA13_events','YA14_events','YA15_events','YA16_events','YA17_events','YA18_events','YA19_events','YA20_events','YA21_events','YA22_events','YA23_events','YA24_events','YA25_events','YA26_events','YA27_events','YA28_events','YA29_events','YA30_events','YA31_events','YA32_events','YA33_events','YA34_events','YA35_events','YA36_events','YA37_events','YA38_events','YA39_events','YA40_events','YA41_events','YA42_events','YA43_events','YA44_events','YA45_events','YA46_events','YA47_events','YA48_events','YA49_events','YA50_events','YA51_events','YA52_events','YA53_events','YA54_events','YA55_events','YA56_events','YA57_events','MA01_events','MA02_events','MA03_events','MA04_events','MA05_events','MA06_events','MA07_events','MA08_events','MA09_events','MA10_events','MA11_events','MA12_events','MA13_events','MA14_events','MA15_events','MA16_events','MA17_events','MA18_events','MA19_events','MA20_events','MA21_events','MA22_events','MA23_events','MA24_events','MA25_events','MA26_events','MA27_events','MA28_events','MA29_events','MA30_events','MA31_events','MA32_events','MA33_events','MA34_events','MA35_events','MA36_events','MA37_events','MA38_events','MA39_events','MA40_events','MA41_events','MA42_events','MA43_events','MA44_events','MA45_events','MA46_events','MA47_events','MA48_events','MA49_events','MA50_events','MA51_events','MA52_events','MA53_events','MA54_events','MA55_events','MA56_events','MA57_events','MA58_events','MA59_events','OA01_events','OA02_events','OA03_events','OA04_events','OA05_events','OA06_events','OA07_events','OA08_events','OA09_events','OA10_events','OA11_events','OA12_events','OA13_events','OA14_events','OA15_events','OA16_events','OA17_events','OA18_events','OA19_events','OA20_events','OA21_events','OA22_events','OA23_events','OA24_events','OA25_events','OA26_events','OA27_events','OA28_events','OA29_events','OA30_events','OA31_events','OA32_events','OA33_events','OA34_events','OA35_events','OA36_events','OA37_events','OA38_events','OA39_events','OA40_events','OA41_events','OA42_events','OA43_events','OA44_events','OA45_events','OA46_events','OA47_events','OA48_events','OA49_events','OA50_events','OA51_events','OA52_events','OA53_events']:
    sub_df = pd.read_csv(s+'.csv')
    subject_id = s.replace("_events", "")
    cfb_count = (sub_df['x'] == 'CFB').sum()
    total_cfb_df = total_cfb_df.append({'subject_id': subject_id, 'total_cfb': cfb_count}, ignore_index=True)

# add age
total_cfb_df = total_cfb_df.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add changepoint 
YA_cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33]
MA_cpts = [51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50]
OA_cpts = [82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

all_cpts = MA_cpts + OA_cpts + YA_cpts

total_cfb_df['cpt'] = all_cpts

total_cfb_df['age'] = pd.to_numeric(total_cfb_df['age'], errors='coerce')
total_cfb_df['total_cfb'] = pd.to_numeric(total_cfb_df['total_cfb'], errors='coerce')

total_cfb_df

In [None]:
# total MOSs
total_mos_df = pd.DataFrame(columns=['subject_id', 'total_mos'])

for s in ['YA01_events','YA02_events','YA03_events','YA04_events','YA05_events','YA06_events','YA07_events','YA08_events','YA09_events','YA10_events','YA11_events','YA12_events','YA13_events','YA14_events','YA15_events','YA16_events','YA17_events','YA18_events','YA19_events','YA20_events','YA21_events','YA22_events','YA23_events','YA24_events','YA25_events','YA26_events','YA27_events','YA28_events','YA29_events','YA30_events','YA31_events','YA32_events','YA33_events','YA34_events','YA35_events','YA36_events','YA37_events','YA38_events','YA39_events','YA40_events','YA41_events','YA42_events','YA43_events','YA44_events','YA45_events','YA46_events','YA47_events','YA48_events','YA49_events','YA50_events','YA51_events','YA52_events','YA53_events','YA54_events','YA55_events','YA56_events','YA57_events','MA01_events','MA02_events','MA03_events','MA04_events','MA05_events','MA06_events','MA07_events','MA08_events','MA09_events','MA10_events','MA11_events','MA12_events','MA13_events','MA14_events','MA15_events','MA16_events','MA17_events','MA18_events','MA19_events','MA20_events','MA21_events','MA22_events','MA23_events','MA24_events','MA25_events','MA26_events','MA27_events','MA28_events','MA29_events','MA30_events','MA31_events','MA32_events','MA33_events','MA34_events','MA35_events','MA36_events','MA37_events','MA38_events','MA39_events','MA40_events','MA41_events','MA42_events','MA43_events','MA44_events','MA45_events','MA46_events','MA47_events','MA48_events','MA49_events','MA50_events','MA51_events','MA52_events','MA53_events','MA54_events','MA55_events','MA56_events','MA57_events','MA58_events','MA59_events','OA01_events','OA02_events','OA03_events','OA04_events','OA05_events','OA06_events','OA07_events','OA08_events','OA09_events','OA10_events','OA11_events','OA12_events','OA13_events','OA14_events','OA15_events','OA16_events','OA17_events','OA18_events','OA19_events','OA20_events','OA21_events','OA22_events','OA23_events','OA24_events','OA25_events','OA26_events','OA27_events','OA28_events','OA29_events','OA30_events','OA31_events','OA32_events','OA33_events','OA34_events','OA35_events','OA36_events','OA37_events','OA38_events','OA39_events','OA40_events','OA41_events','OA42_events','OA43_events','OA44_events','OA45_events','OA46_events','OA47_events','OA48_events','OA49_events','OA50_events','OA51_events','OA52_events','OA53_events']:
    sub_df = pd.read_csv(s+'.csv')
    subject_id = s.replace("_events", "")
    mos_count = (sub_df['x'] == 'MOS').sum()
    total_mos_df = total_mos_df.append({'subject_id': subject_id, 'total_mos': mos_count}, ignore_index=True)

# add age
total_mos_df = total_mos_df.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add changepoint 
YA_cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33]
MA_cpts = [51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50]
OA_cpts = [82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

all_cpts = MA_cpts + OA_cpts + YA_cpts

total_mos_df['cpt'] = all_cpts

total_mos_df['age'] = pd.to_numeric(total_mos_df['age'], errors='coerce')
total_mos_df['total_mos'] = pd.to_numeric(total_mos_df['total_mos'], errors='coerce')

total_mos_df

In [None]:
total_mos_df

In [None]:
# make dictionaries for trial and choice counts

# total trial count
subject_ids = ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57', 'MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']

tot_trials = all_mtrack_df.groupby('subject_id', sort=False)['trial_num'].max()
total_trials_dict = dict(zip(subject_ids, tot_trials))

# add total trials using dictionary
# df['tot_trials'] = df['subject_id'].map(total_trials_dict)


# total choice count
subject_ids = ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57', 'MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']

tot_choices = all_mtrack_df.groupby('subject_id', sort=False)['tot_choices'].max()
total_choices_dict = dict(zip(subject_ids, tot_choices))

# add total trials using dictionary
# df['tot_choices'] = df['subject_id'].map(total_choices_dict)


# cfs to cfe total
tot_cfs_to_cfe_df = cfs_to_cfe_df.groupby('subject_id', sort=False)['cf_time'].sum()
tot_cfs_to_cfe_dict = dict(zip(subject_ids, tot_cfs_to_cfe_df))
# df['tot_cfs_to_cfe'] = df['subject_id'].map(tot_cfs_to_cfe_dict)


# cfe to mms/mhs total (aka reaction time)
tot_cfe_to_mms_mhs_df = all_rts.groupby('subject_id', sort=False)['rt'].sum()
tot_cfe_to_mms_mhs_dict = dict(zip(subject_ids, tot_cfe_to_mms_mhs_df))
# df['tot_cfe_to_mms_mhs'] = df['subject_id'].map(tot_cfe_to_mms_mhs_dict)


# mms/mhs to next cfs total (aka intertrial time)
tot_mms_mhs_to_n_cfs_df = it_time_df.groupby('subject_id', sort=False)['it_time'].sum()
tot_mms_mhs_to_n_cfs_dict = dict(zip(subject_ids, tot_mms_mhs_to_n_cfs_df))
# df['tot_mms_mhs_to_n_cfs'] = df['subject_id'].map(tot_mms_mhs_to_n_cfs_dict)


# mms/mhs to next cfs total (aka intertrial time)
tot_last_event_to_end_df = last_event_to_end_df.groupby('subject_id', sort=False)['last_event_to_end_time'].sum()
tot_last_event_to_end_dict = dict(zip(subject_ids, tot_last_event_to_end_df))
# df['tot_last_event_to_end'] = df['subject_id'].map(tot_last_event_to_end_dict)


# path length
tot_path_len_df = speed_df.groupby('subject_id', sort=False)['path_length'].sum()
tot_path_len_dict = dict(zip(subject_ids, tot_path_len_df))
# df['tot_path_len'] = df['subject_id'].map(tot_path_len_dict)


# speed
tot_speed_df = speed_df.groupby('subject_id', sort=False)['speed'].sum()
tot_speed_dict = dict(zip(subject_ids, tot_speed_df))
# df['tot_speed'] = df['subject_id'].map(tot_speed_dict)


# information
tot_info_df = info_rew_df.groupby('Subject', sort=False)['Information'].sum()
tot_info_dict = dict(zip(subject_ids, tot_info_df))
# df['tot_info'] = df['subject_id'].map(tot_info_dict)


# reward
tot_rew_df = info_rew_df.groupby('Subject', sort=False)['Reward'].sum()
tot_rew_dict = dict(zip(subject_ids, tot_rew_df))
# df['tot_rew'] = df['subject_id'].map(tot_rew_dict)


In [None]:
info_rew_df[info_rew_df['Subject'] == 'MA43']['Reward'].sum()

In [None]:
event_totals_df = pd.DataFrame()

# fill in subject information
event_totals_df['subject_id'] = subject_ids
event_totals_df['age'] = event_totals_df['subject_id'].map(subject_age_dict)
event_totals_df['cpt'] = event_totals_df['subject_id'].map(subject_cpt_dict)
event_totals_df['tot_trials'] = event_totals_df['subject_id'].map(total_trials_dict)
event_totals_df['tot_choices'] = event_totals_df['subject_id'].map(total_choices_dict)
event_totals_df['avg_choices'] = event_totals_df['tot_choices']/event_totals_df['tot_trials']

# fill in event time totals
event_totals_df['first_time_to_first_cfs'] = start_time__df['time_to_first_cfs']
event_totals_df['cfs_to_cfe'] = event_totals_df['subject_id'].map(tot_cfs_to_cfe_dict)
event_totals_df['cfe_to_mms_mhs'] = event_totals_df['subject_id'].map(tot_cfe_to_mms_mhs_dict)
event_totals_df['mms_mhs_to_n_cfs'] = event_totals_df['subject_id'].map(tot_mms_mhs_to_n_cfs_dict)
event_totals_df['tot_last_event_to_end'] = event_totals_df['subject_id'].map(tot_last_event_to_end_dict)
event_totals_df['tot_path_len'] = event_totals_df['subject_id'].map(tot_path_len_df)
event_totals_df['tot_speed'] = event_totals_df['subject_id'].map(tot_speed_dict)
event_totals_df['tot_rew'] = event_totals_df['subject_id'].map(tot_rew_dict)
event_totals_df['tot_info'] = event_totals_df['subject_id'].map(tot_info_dict)
event_totals_df['tot_cfb'] = total_cfb_df['total_cfb']
event_totals_df['tot_mos'] = total_mos_df['total_mos']

# add proportion to trials (respective total column divided by number of trials)
event_totals_df['t_prop_first_time_to_first_cfs'] = event_totals_df['first_time_to_first_cfs']/event_totals_df['tot_trials']
event_totals_df['t_prop_cfs_to_cfe'] = event_totals_df['cfs_to_cfe']/event_totals_df['tot_trials']
event_totals_df['t_prop_cfe_to_mms_mhs'] = event_totals_df['cfe_to_mms_mhs']/event_totals_df['tot_trials']
event_totals_df['t_prop_mms_mhs_to_n_cfs'] = event_totals_df['mms_mhs_to_n_cfs']/event_totals_df['tot_trials']
event_totals_df['t_prop_tot_last_event_to_end'] = event_totals_df['tot_last_event_to_end']/event_totals_df['tot_trials']
event_totals_df['t_prop_tot_path_len'] = event_totals_df['tot_path_len']/event_totals_df['tot_trials']
event_totals_df['t_prop_tot_speed'] = event_totals_df['tot_speed']/event_totals_df['tot_trials']
event_totals_df['t_prop_tot_rew'] = event_totals_df['tot_rew']/event_totals_df['tot_trials']
event_totals_df['t_prop_tot_info'] = event_totals_df['tot_info']/event_totals_df['tot_trials']
event_totals_df['t_prop_tot_cfb'] = event_totals_df['tot_cfb']/event_totals_df['tot_trials']
event_totals_df['t_prop_tot_mos'] = event_totals_df['tot_mos']/event_totals_df['tot_trials']

# add proportion to choices (respective total column divided by number of choices)
event_totals_df['c_prop_first_time_to_first_cfs'] = event_totals_df['first_time_to_first_cfs']/event_totals_df['tot_choices']
event_totals_df['c_prop_cfs_to_cfe'] = event_totals_df['cfs_to_cfe']/event_totals_df['tot_choices']
event_totals_df['c_prop_cfe_to_mms_mhs'] = event_totals_df['cfe_to_mms_mhs']/event_totals_df['tot_choices']
event_totals_df['c_prop_mms_mhs_to_n_cfs'] = event_totals_df['mms_mhs_to_n_cfs']/event_totals_df['tot_choices']
event_totals_df['c_prop_tot_last_event_to_end'] = event_totals_df['tot_last_event_to_end']/event_totals_df['tot_choices']
event_totals_df['c_prop_tot_path_len'] = event_totals_df['tot_path_len']/event_totals_df['tot_choices']
event_totals_df['c_prop_tot_speed'] = event_totals_df['tot_speed']/event_totals_df['tot_choices']
event_totals_df['c_prop_tot_rew'] = event_totals_df['tot_rew']/event_totals_df['tot_choices']
event_totals_df['c_prop_tot_info'] = event_totals_df['tot_info']/event_totals_df['tot_choices']
event_totals_df['c_prop_tot_cfb'] = event_totals_df['tot_cfb']/event_totals_df['tot_choices']
event_totals_df['c_prop_tot_mos'] = event_totals_df['tot_mos']/event_totals_df['tot_choices']

event_totals_df['total_task_time'] = tot_task_time_df['time']
event_totals_df['total_event_time'] = event_totals_df['first_time_to_first_cfs'] + event_totals_df['cfs_to_cfe'] + event_totals_df['cfe_to_mms_mhs'] + event_totals_df['mms_mhs_to_n_cfs'] + event_totals_df['tot_last_event_to_end']


# save df
event_totals_df.to_csv('event_totals_df.csv', index=False)

event_totals_df

#### 3-Sigma Rule-Out for Reaction Times
Check to see if each RT observation is 3 or more standard deviations away from the mean. If it is, exclude it from the dataframe and calculate a new mean with only significant RT observations

In [None]:
three_sig_all_rts = pd.DataFrame()

for s in ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57', 'MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']:
    sub_df = all_rts.loc[all_rts['subject_id'] == s]
    mean_rt = np.mean(sub_df['rt'])
    sd_rt = np.std(sub_df['rt'])
    sub_df_3sig = sub_df[~((sub_df['rt'] >= (mean_rt+(3*sd_rt))) | (sub_df['rt'] <= (mean_rt-(3*sd_rt))))] # (keep only the values in between but arent more than 3 SD away from mean
    three_sig_all_rts = pd.concat([three_sig_all_rts, sub_df_3sig]) # append row of this to dataframe
    
three_sig_all_rts

In [None]:
# path length and sigma rule-outs, but only with RT values that are MORE than 3SD away from the mean, not less
all_pos_sig_rts = pd.DataFrame()

for s in ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57', 'MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']:
    sub_df = all_rts.loc[all_rts['subject_id'] == s]
    mean_rt = np.mean(sub_df['rt'])
    sd_rt = np.std(sub_df['rt'])
    sub_df_3sig = sub_df[~((sub_df['rt'] >= (mean_rt+(3*sd_rt))))] # (keep only the values in between but arent more than 3 SD away from mean
    all_pos_sig_rts = pd.concat([all_pos_sig_rts, sub_df_3sig]) # append row of this to dataframe 
    
all_pos_sig_rts

In [None]:
mean_rt = three_sig_all_rts.groupby('subject_id').mean()
median_rt = three_sig_all_rts.groupby('subject_id').median()

three_sig_rts = pd.concat([mean_rt, median_rt], axis=1)
three_sig_rts.columns = ['mean_rt', 'median_rt']

three_sig_rts = pd.DataFrame(three_sig_rts)

# add age
ya_ages = [20, 19, 19, 18, 20, 22, 21, 20, 19, 19, 20, 19, 20, 24, 20, 19, 21, 19, 21, 19, 20, 22, 20, 19, 20, 26, 19, 19, 18, 20, 20, 19, 21, 20, 18, 19, 20, 20, 22, 20, 20, 21, 22, 19, 19, 21, 19, 19, 20, 20, 20, 20, 21, 20, 20, 21, 21]
ma_ages = [44, 42, 42, 48, 35, 45, 39, 39, 39, 37, 40, 38, 38, 40, 39, 39, 43, 37, 46, 36, 39, 42, 45, 38, 35, 38, 38, 47, 37, 40, 41, 43, 48, 40, 41, 43, 36, 43, 43, 48, 43, 42, 38, 38, 48, 43, 38, 37, 40, 43, 36, 47, 44, 46, 39, 40, 45, 37, 39]
oa_ages = [54, 64, 51, 53, 71, 58, 55, 52, 52, 57, 51, 55, 52, 60, 62, 56, 56, 60, 57, 59, 63, 64, 61, 57, 59, 56, 56, 56, 59, 52, 56, 71, 55, 64, 50, 54, 56, 53, 65, 70, 54, 51, 66, 54, 50, 51, 57, 53, 56, 51, 52, 57, 51]

all_ages = ma_ages + oa_ages + ya_ages

three_sig_rts['age'] = all_ages

print(three_sig_rts)

In [None]:
mean_rt = all_pos_sig_rts.groupby('subject_id').mean()
median_rt = all_pos_sig_rts.groupby('subject_id').median()

pos_sig_rts = pd.concat([mean_rt, median_rt], axis=1)
pos_sig_rts.columns = ['mean_rt', 'median_rt']

pos_sig_rts = pd.DataFrame(pos_sig_rts)

# add age
ya_ages = [20, 19, 19, 18, 20, 22, 21, 20, 19, 19, 20, 19, 20, 24, 20, 19, 21, 19, 21, 19, 20, 22, 20, 19, 20, 26, 19, 19, 18, 20, 20, 19, 21, 20, 18, 19, 20, 20, 22, 20, 20, 21, 22, 19, 19, 21, 19, 19, 20, 20, 20, 20, 21, 20, 20, 21, 21]
ma_ages = [44, 42, 42, 48, 35, 45, 39, 39, 39, 37, 40, 38, 38, 40, 39, 39, 43, 37, 46, 36, 39, 42, 45, 38, 35, 38, 38, 47, 37, 40, 41, 43, 48, 40, 41, 43, 36, 43, 43, 48, 43, 42, 38, 38, 48, 43, 38, 37, 40, 43, 36, 47, 44, 46, 39, 40, 45, 37, 39]
oa_ages = [54, 64, 51, 53, 71, 58, 55, 52, 52, 57, 51, 55, 52, 60, 62, 56, 56, 60, 57, 59, 63, 64, 61, 57, 59, 56, 56, 56, 59, 52, 56, 71, 55, 64, 50, 54, 56, 53, 65, 70, 54, 51, 66, 54, 50, 51, 57, 53, 56, 51, 52, 57, 51]

all_ages = ma_ages + oa_ages + ya_ages

pos_sig_rts['age'] = all_ages

print(pos_sig_rts)

#### Z-scoring Reaction Times

In [None]:
# add column to all_rts dataframe that has the z-score of each RT
all_rts['z_score'] = all_rts.groupby('subject_id')['rt'].transform(lambda x: np.abs(stats.zscore(x)))

# remove rows where z-score > 3
sig_z_score_rts = all_rts[all_rts['z_score'] <= 3]

sig_z_score_rts

In [None]:
z_score_mean_rts = sig_z_score_rts.groupby('subject_id').agg(
    z_mean_rt = ('rt', 'mean')
).reset_index()

# merge the 'age' column from the original DataFrame
z_score_mean_rts = z_score_mean_rts.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add age
ya_ages = [20, 19, 19, 18, 20, 22, 21, 20, 19, 19, 20, 19, 20, 24, 20, 19, 21, 19, 21, 19, 20, 22, 20, 19, 20, 26, 19, 19, 18, 20, 20, 19, 21, 20, 18, 19, 20, 20, 22, 20, 20, 21, 22, 19, 19, 21, 19, 19, 20, 20, 20, 20, 21, 20, 20, 21, 21]
ma_ages = [44, 42, 42, 48, 35, 45, 39, 39, 39, 37, 40, 38, 38, 40, 39, 39, 43, 37, 46, 36, 39, 42, 45, 38, 35, 38, 38, 47, 37, 40, 41, 43, 48, 40, 41, 43, 36, 43, 43, 48, 43, 42, 38, 38, 48, 43, 38, 37, 40, 43, 36, 47, 44, 46, 39, 40, 45, 37, 39]
oa_ages = [54, 64, 51, 53, 71, 58, 55, 52, 52, 57, 51, 55, 52, 60, 62, 56, 56, 60, 57, 59, 63, 64, 61, 57, 59, 56, 56, 56, 59, 52, 56, 71, 55, 64, 50, 54, 56, 53, 65, 70, 54, 51, 66, 54, 50, 51, 57, 53, 56, 51, 52, 57, 51]

all_ages = ma_ages + oa_ages + ya_ages

z_score_mean_rts['age'] = all_ages

print(z_score_mean_rts)

In [None]:
# all raw data 
raw_plot = sns.scatterplot(x='subject_id', y='rt', data=all_rts, label='raw')
print(raw_plot)

# 3 sig above or below mean
sig_plot = sns.scatterplot(x='subject_id', y='rt', data=three_sig_all_rts, label='+/- 3-sigma')
print(sig_plot)

# 3 sig above mean only 
pos_sig_plot = sns.scatterplot(x='subject_id', y='rt', data=all_pos_sig_rts, label='+ 3-sigma')
print(pos_sig_plot)


## Path Length and Mouse Speeds 
Now we are going to look into various measures of mouse movement and speed to better undersand what the participants were doing during the task and determine if and how these movements and speeds differed based on age

### Calculate path length 

In [None]:
# total path length/speed

# initialize an empty df
speed_df = pd.DataFrame()

for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    mtrack = pd.read_csv(s + '.csv')

    # convert 'x' and 'y' columns to numeric (for event labels)
    mtrack['x'] = pd.to_numeric(mtrack['x'], errors='coerce')
    mtrack['y'] = pd.to_numeric(mtrack['y'], errors='coerce')

    # group by 'trial_num' and 'choice_num'
    grouped = mtrack.groupby(['trial_num', 'choice_num'])

    # define a function to calculate distance
    def calculate_distance_and_time(group):
        group['x_diff'] = group['x'].diff()
        group['y_diff'] = group['y'].diff()
        group['distance'] = np.sqrt(group['x_diff']**2 + group['y_diff']**2)
        group['time_diff'] = group['time'].diff()
        return group

    mtrack = grouped.apply(calculate_distance_and_time)
    
    tmp_speed_df = mtrack.groupby(['trial_num', 'choice_num']).agg(
        path_length=('distance', 'sum'),
        time_diff=('time_diff', 'sum')
    ).reset_index()

    tmp_speed_df['speed'] = tmp_speed_df['path_length']/tmp_speed_df['time_diff']
    tmp_speed_df['subject_id'] = s.replace("_mousetrack", "") + '' # store the proper subject ID in the tmp df column

    # append dataframes 
    speed_df = pd.concat([speed_df, tmp_speed_df], ignore_index=True)

print(speed_df)
speed_df.to_csv('speed_df')

In [None]:
merged_df = speed_df.merge(rts, on='subject_id', how='left')
merged_df

In [None]:
# add age to the speed_df dataframe
# the rts dataframe has subject ID and age in it
merged_df = speed_df.merge(rts, on='subject_id', how='left')

speed_df['age'] = merged_df['age']
#speed_df.to_csv('speed_df.csv')

### Make mean speed and path length dataframe 

In [None]:
# group by 'subject_id' and calculate the average and median path length and speed
mean_speed_df = speed_df.groupby('subject_id').agg(
    mean_path_length=('path_length', 'mean'),
    mean_speed=('speed', 'mean'),
    med_path_length=('path_length', 'median'),
    med_speed=('speed', 'median'),
).reset_index()

# merge the 'age' column from the original DataFrame
mean_speed_df = mean_speed_df.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')
mean_speed_df

In [None]:
# add changepoint to dataframe 
YA_cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33]
MA_cpts = [51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50]
OA_cpts = [82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

all_cpts = MA_cpts + OA_cpts + YA_cpts

mean_speed_df['cpt'] = all_cpts
mean_speed_df

In [None]:
pd.DataFrame(mean_speed_df)
#mean_speed_df.to_csv('mean_speed_df')

### Calculate distance from each point to the CF
CF coordinates = (199.71440661348086, 159.97512720217148)

In [None]:
# combine all mousetracking dataframes into one
all_mtrack = []

for s in ['YA01_mousetrack','YA02_mousetrack','YA03_mousetrack','YA04_mousetrack','YA05_mousetrack','YA06_mousetrack','YA07_mousetrack','YA08_mousetrack','YA09_mousetrack','YA10_mousetrack','YA11_mousetrack','YA12_mousetrack','YA13_mousetrack','YA14_mousetrack','YA15_mousetrack','YA16_mousetrack','YA17_mousetrack','YA18_mousetrack','YA19_mousetrack','YA20_mousetrack','YA21_mousetrack','YA22_mousetrack','YA23_mousetrack','YA24_mousetrack','YA25_mousetrack','YA26_mousetrack','YA27_mousetrack','YA28_mousetrack','YA29_mousetrack','YA30_mousetrack','YA31_mousetrack','YA32_mousetrack','YA33_mousetrack','YA34_mousetrack','YA35_mousetrack','YA36_mousetrack','YA37_mousetrack','YA38_mousetrack','YA39_mousetrack','YA40_mousetrack','YA41_mousetrack','YA42_mousetrack','YA43_mousetrack','YA44_mousetrack','YA45_mousetrack','YA46_mousetrack','YA47_mousetrack','YA48_mousetrack','YA49_mousetrack','YA50_mousetrack','YA51_mousetrack','YA52_mousetrack','YA53_mousetrack','YA54_mousetrack','YA55_mousetrack','YA56_mousetrack','YA57_mousetrack','MA01_mousetrack','MA02_mousetrack','MA03_mousetrack','MA04_mousetrack','MA05_mousetrack','MA06_mousetrack','MA07_mousetrack','MA08_mousetrack','MA09_mousetrack','MA10_mousetrack','MA11_mousetrack','MA12_mousetrack','MA13_mousetrack','MA14_mousetrack','MA15_mousetrack','MA16_mousetrack','MA17_mousetrack','MA18_mousetrack','MA19_mousetrack','MA20_mousetrack','MA21_mousetrack','MA22_mousetrack','MA23_mousetrack','MA24_mousetrack','MA25_mousetrack','MA26_mousetrack','MA27_mousetrack','MA28_mousetrack','MA29_mousetrack','MA30_mousetrack','MA31_mousetrack','MA32_mousetrack','MA33_mousetrack','MA34_mousetrack','MA35_mousetrack','MA36_mousetrack','MA37_mousetrack','MA38_mousetrack','MA39_mousetrack','MA40_mousetrack','MA41_mousetrack','MA42_mousetrack','MA43_mousetrack','MA44_mousetrack','MA45_mousetrack','MA46_mousetrack','MA47_mousetrack','MA48_mousetrack','MA49_mousetrack','MA50_mousetrack','MA51_mousetrack','MA52_mousetrack','MA53_mousetrack','MA54_mousetrack','MA55_mousetrack','MA56_mousetrack','MA57_mousetrack','MA58_mousetrack','MA59_mousetrack','OA01_mousetrack','OA02_mousetrack','OA03_mousetrack','OA04_mousetrack','OA05_mousetrack','OA06_mousetrack','OA07_mousetrack','OA08_mousetrack','OA09_mousetrack','OA10_mousetrack','OA11_mousetrack','OA12_mousetrack','OA13_mousetrack','OA14_mousetrack','OA15_mousetrack','OA16_mousetrack','OA17_mousetrack','OA18_mousetrack','OA19_mousetrack','OA20_mousetrack','OA21_mousetrack','OA22_mousetrack','OA23_mousetrack','OA24_mousetrack','OA25_mousetrack','OA26_mousetrack','OA27_mousetrack','OA28_mousetrack','OA29_mousetrack','OA30_mousetrack','OA31_mousetrack','OA32_mousetrack','OA33_mousetrack','OA34_mousetrack','OA35_mousetrack','OA36_mousetrack','OA37_mousetrack','OA38_mousetrack','OA39_mousetrack','OA40_mousetrack','OA41_mousetrack','OA42_mousetrack','OA43_mousetrack','OA44_mousetrack','OA45_mousetrack','OA46_mousetrack','OA47_mousetrack','OA48_mousetrack','OA49_mousetrack','OA50_mousetrack','OA51_mousetrack','OA52_mousetrack','OA53_mousetrack']:
    mtrack = pd.read_csv(s+'.csv')
    mtrack['subject_id'] = s.replace("_mousetrack", "") + '' # store subject id in temporary dataframe
    all_mtrack.append(mtrack)
    

# concatenate all dataframes in the list into one large dataframe
all_mtrack_df = pd.concat(all_mtrack, ignore_index=True)

all_mtrack_df

In [None]:
# since there isn't one definted central fixation point, find the mean x and mean y of every row after CFS
cf_coords = pd.DataFrame()

cfs_indices = all_mtrack_df.index[all_mtrack_df['x'] == 'CFS'] # get list of indices where x=CFS
cf_coords_ind = cfs_indices + 1 # make list of indices for the rows after CFS
cf_coords= all_mtrack_df.loc[cf_coords_ind]
cf_coords = cf_coords[cf_coords['x'] != 'CFE']
cf_coords = cf_coords[cf_coords['x'] != 'CFD']
cf_coords['x'] = pd.to_numeric(cf_coords['x'])
cf_coords['y'] = pd.to_numeric(cf_coords['y'])
cf_coords.to_csv('cf_coords')

# find mean x and y CF coordinates
cf_x = cf_coords['x'].mean()
cf_y = cf_coords['y'].mean()

print(cf_coords)
print('CF X:', cf_x)
print('CF Y:', cf_y)

# CF coordinates = (199.71440661348086, 159.97512720217148)

In [None]:
# combine all coordinate dfs into one large df to apply functions on
# removed all event labels 

all_coords = []

for s in ['YA01_coords','YA02_coords','YA03_coords','YA04_coords','YA05_coords','YA06_coords','YA07_coords','YA08_coords','YA09_coords','YA10_coords','YA11_coords','YA12_coords','YA13_coords','YA14_coords','YA15_coords','YA16_coords','YA17_coords','YA18_coords','YA19_coords','YA20_coords','YA21_coords','YA22_coords','YA23_coords','YA24_coords','YA25_coords','YA26_coords','YA27_coords','YA28_coords','YA29_coords','YA30_coords','YA31_coords','YA32_coords','YA33_coords','YA34_coords','YA35_coords','YA36_coords','YA37_coords','YA38_coords','YA39_coords','YA40_coords','YA41_coords','YA42_coords','YA43_coords','YA44_coords','YA45_coords','YA46_coords','YA47_coords','YA48_coords','YA49_coords','YA50_coords','YA51_coords','YA52_coords','YA53_coords','YA54_coords','YA55_coords','YA56_coords','YA57_coords','MA01_coords','MA02_coords','MA03_coords','MA04_coords','MA05_coords','MA06_coords','MA07_coords','MA08_coords','MA09_coords','MA10_coords','MA11_coords','MA12_coords','MA13_coords','MA14_coords','MA15_coords','MA16_coords','MA17_coords','MA18_coords','MA19_coords','MA20_coords','MA21_coords','MA22_coords','MA23_coords','MA24_coords','MA25_coords','MA26_coords','MA27_coords','MA28_coords','MA29_coords','MA30_coords','MA31_coords','MA32_coords','MA33_coords','MA34_coords','MA35_coords','MA36_coords','MA37_coords','MA38_coords','MA39_coords','MA40_coords','MA41_coords','MA42_coords','MA43_coords','MA44_coords','MA45_coords','MA46_coords','MA47_coords','MA48_coords','MA49_coords','MA50_coords','MA51_coords','MA52_coords','MA53_coords','MA54_coords','MA55_coords','MA56_coords','MA57_coords','MA58_coords','MA59_coords','OA01_coords','OA02_coords','OA03_coords','OA04_coords','OA05_coords','OA06_coords','OA07_coords','OA08_coords','OA09_coords','OA10_coords','OA11_coords','OA12_coords','OA13_coords','OA14_coords','OA15_coords','OA16_coords','OA17_coords','OA18_coords','OA19_coords','OA20_coords','OA21_coords','OA22_coords','OA23_coords','OA24_coords','OA25_coords','OA26_coords','OA27_coords','OA28_coords','OA29_coords','OA30_coords','OA31_coords','OA32_coords','OA33_coords','OA34_coords','OA35_coords','OA36_coords','OA37_coords','OA38_coords','OA39_coords','OA40_coords','OA41_coords','OA42_coords','OA43_coords','OA44_coords','OA45_coords','OA46_coords','OA47_coords','OA48_coords','OA49_coords','OA50_coords','OA51_coords','OA52_coords','OA53_coords']:
    coords = pd.read_csv(s+'.csv')
    coords['subject_id'] = s.replace("_coords", "") + '' # store subject id in temporary dataframe
    all_coords.append(coords)

all_coords_df = pd.concat(all_coords, ignore_index=True)
all_coords_df.to_csv('all_coords_df')
all_coords_df

In [None]:
# make function that calculates the distance of x and y to the CF coordinates 
def calc_dist_to_cf(row):
    dist_to_cf_x = np.abs(row['x'] - cf_x)
    dist_to_cf_y = np.abs(row['y'] - cf_y)
    dist_to_cf = np.sqrt(dist_to_cf_x ** 2 + dist_to_cf_y ** 2)
    return dist_to_cf

# apply the function to each row of the df
all_coords_df['dist_to_cf'] = all_coords_df.apply(calc_dist_to_cf, axis=1)
all_coords_df
all_coords_df.to_csv('all_coords_df')

### Make CF distance dataframes for across trials

#### Make CF distance dataframe (across trials) with mean and median stats


In [None]:
# calculate mean CF per participant
stats_cf_dist_ac = all_coords_df.groupby('subject_id').agg(
    mean_cf_dist = ('dist_to_cf', 'mean'),
    med_cf_dist = ('dist_to_cf', 'median'),
).reset_index()

# add age
stats_cf_dist_ac = stats_cf_dist_ac.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add changepoint 
YA_cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33]
MA_cpts = [51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50]
OA_cpts = [82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

all_cpts = MA_cpts + OA_cpts + YA_cpts

stats_cf_dist_ac['cpt'] = all_cpts

stats_cf_dist_ac

#### Make mean 3-sigma CF distance dataframe 

In [None]:
# remove rows of speed_df where speed is 3SD above or below mean on subject-by-subject basis 
sig_cf_dist_ac = pd.DataFrame()

for s in ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57', 'MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']:
    sub_df = all_coords_df.loc[all_coords_df['subject_id'] == s]
    mean_cf_dist = np.mean(sub_df['dist_to_cf'])
    sd_cf_dist = np.std(sub_df['dist_to_cf'])
    sub_df_3sig = sub_df[~((sub_df['dist_to_cf'] >= (mean_cf_dist+(3*sd_cf_dist))) | (sub_df['dist_to_cf'] <= (mean_cf_dist-(3*sd_cf_dist))))] # (keep only the values in between but arent more than 3 SD away from mean
    sig_cf_dist_ac = pd.concat([sig_cf_dist_ac, sub_df_3sig]) # append row of this to dataframe 
    
sig_cf_dist_ac

In [None]:
# calculate mean CF per participant
mean_sig_cf_dist_ac = sig_cf_dist_ac.groupby('subject_id').agg(
    mean_cf_dist = ('dist_to_cf', 'mean'),
).reset_index()

# add age
mean_sig_cf_dist_ac = mean_sig_cf_dist_ac.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add changepoint 
YA_cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33]
MA_cpts = [51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50]
OA_cpts = [82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

all_cpts = MA_cpts + OA_cpts + YA_cpts

mean_sig_cf_dist_ac['cpt'] = all_cpts

mean_sig_cf_dist_ac

#### Make mean significant data, excluding outliers only 3-sigma ABOVE mean

In [None]:
# remove rows of speed_df where speed is 3SD above or below mean on subject-by-subject basis 
pos_sig_cf_dist_ac = pd.DataFrame()

for s in ['YA01', 'YA02', 'YA03', 'YA04', 'YA05', 'YA06', 'YA07', 'YA08', 'YA09', 'YA10', 'YA11', 'YA12', 'YA13', 'YA14', 'YA15', 'YA16', 'YA17', 'YA18', 'YA19', 'YA20', 'YA21', 'YA22', 'YA23', 'YA24', 'YA25', 'YA26', 'YA27', 'YA28', 'YA29', 'YA30', 'YA31', 'YA32', 'YA33', 'YA34', 'YA35', 'YA36', 'YA37', 'YA38', 'YA39', 'YA40', 'YA41', 'YA42', 'YA43', 'YA44', 'YA45', 'YA46', 'YA47', 'YA48', 'YA49', 'YA50', 'YA51', 'YA52', 'YA53', 'YA54', 'YA55', 'YA56', 'YA57', 'MA01', 'MA02', 'MA03', 'MA04', 'MA05', 'MA06', 'MA07', 'MA08', 'MA09', 'MA10', 'MA11', 'MA12', 'MA13', 'MA14', 'MA15', 'MA16', 'MA17', 'MA18', 'MA19', 'MA20', 'MA21', 'MA22', 'MA23', 'MA24', 'MA25', 'MA26', 'MA27', 'MA28', 'MA29', 'MA30', 'MA31', 'MA32', 'MA33', 'MA34', 'MA35', 'MA36', 'MA37', 'MA38', 'MA39', 'MA40', 'MA41', 'MA42', 'MA43', 'MA44', 'MA45', 'MA46', 'MA47', 'MA48', 'MA49', 'MA50', 'MA51', 'MA52', 'MA53', 'MA54', 'MA55', 'MA56', 'MA57', 'MA58', 'MA59', 'OA01', 'OA02', 'OA03', 'OA04', 'OA05', 'OA06', 'OA07', 'OA08', 'OA09', 'OA10', 'OA11', 'OA12', 'OA13', 'OA14', 'OA15', 'OA16', 'OA17', 'OA18', 'OA19', 'OA20', 'OA21', 'OA22', 'OA23', 'OA24', 'OA25', 'OA26', 'OA27', 'OA28', 'OA29', 'OA30', 'OA31', 'OA32', 'OA33', 'OA34', 'OA35', 'OA36', 'OA37', 'OA38', 'OA39', 'OA40', 'OA41', 'OA42', 'OA43', 'OA44', 'OA45', 'OA46', 'OA47', 'OA48', 'OA49', 'OA50', 'OA51', 'OA52', 'OA53']:
    sub_df = all_coords_df.loc[all_coords_df['subject_id'] == s]
    mean_cf_dist = np.mean(sub_df['dist_to_cf'])
    sd_cf_dist = np.std(sub_df['dist_to_cf'])
    sub_df_3sig = sub_df[~((sub_df['dist_to_cf'] >= (mean_cf_dist+(3*sd_cf_dist))))] # (keep only the values in between but arent more than 3 SD away from mean
    pos_sig_cf_dist_ac = pd.concat([pos_sig_cf_dist_ac, sub_df_3sig]) # append row of this to dataframe 
    
pos_sig_cf_dist_ac

In [None]:
# calculate mean CF per participant
mean_pos_sig_cf_dist_ac = pos_sig_cf_dist_ac.groupby('subject_id').agg(
    mean_cf_dist = ('dist_to_cf', 'mean'),
).reset_index()

# add age
mean_pos_sig_cf_dist_ac = mean_pos_sig_cf_dist_ac.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add changepoint 
YA_cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33]
MA_cpts = [51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50]
OA_cpts = [82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

all_cpts = MA_cpts + OA_cpts + YA_cpts

mean_pos_sig_cf_dist_ac['cpt'] = all_cpts

mean_pos_sig_cf_dist_ac

#### Make CF distance dataframes with no CPT=0 (across trials)

In [None]:
# for mean and median
stats_cf_dist_nz_ac = stats_cf_dist_ac.drop(stats_cf_dist_df[stats_cf_dist_df['cpt'] == 0].index)
stats_cf_dist_nz_ac

mean_sig_cf_dist_nz_ac = mean_sig_cf_dist_ac.drop(mean_sig_cf_dist[mean_sig_cf_dist['cpt'] == 0].index)
mean_sig_cf_dist_nz_ac

### Make CF distance dataframes for within a trial

#### Make CF distance dataframe (within trials) with mean and median stats


In [None]:
# calculate mean CF per participant
stats_cf_dist_w = all_coords_df.groupby(['subject_id', 'trial_num']).agg(
    mean_cf_dist = ('dist_to_cf', 'mean'),
    med_cf_dist = ('dist_to_cf', 'median'),
).reset_index()

# add age
stats_cf_dist_w = stats_cf_dist_w.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add changepoint 
stats_cf_dist_w = stats_cf_dist_w.merge(mean_sig_cf_dist_nz_ac[['subject_id', 'cpt']].drop_duplicates(), on='subject_id', how='left')

stats_cf_dist_w

In [None]:
# do mean CF distance per participant based on trials
mean_stats_cf_w = stats_cf_dist_w.groupby('subject_id').agg(
    mean_cf_dist = ('mean_cf_dist', 'mean'),
    med_cf_dist = ('mean_cf_dist', 'median'),
).reset_index()
mean_stats_cf_w

# add age
mean_stats_cf_w = mean_stats_cf_w.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add changepoint 
YA_cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33]
MA_cpts = [51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50]
OA_cpts = [82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

all_cpts = MA_cpts + OA_cpts + YA_cpts

mean_stats_cf_w['cpt'] = all_cpts

mean_stats_cf_w

#### Make mean 3-sigma CF distance dataframe (within trials)

In [None]:
def filter_outliers(group):
    mean_cf_dist = np.mean(group['dist_to_cf'])
    sd_cf_dist = np.std(group['dist_to_cf'])
    return group[~((group['dist_to_cf'] >= (mean_cf_dist + 3 * sd_cf_dist)) | (group['dist_to_cf'] <= (mean_cf_dist - 3 * sd_cf_dist)))]

sig_cf_dist_w = all_coords_df.groupby(['subject_id', 'trial_num']).apply(filter_outliers).reset_index(drop=True)
sig_cf_dist_w

In [None]:
# calculate mean CF per participant
mean_sig_cf_dist_w = sig_cf_dist_w.groupby(['subject_id', 'trial_num']).agg(
    mean_cf_dist = ('dist_to_cf', 'mean')
).reset_index()

mean_sig_cf_dist_w = mean_sig_cf_dist_w.groupby('subject_id').agg(
    mean_cf_dist = ('mean_cf_dist', 'mean'),
).reset_index()
mean_sig_cf_dist_w

In [None]:
# calculate mean CF per participant
mean_sig_cf_dist_w = sig_cf_dist_w.groupby(['subject_id', 'trial_num']).agg(
    mean_cf_dist = ('dist_to_cf', 'mean')
).reset_index()

mean_sig_cf_dist_w = mean_sig_cf_dist_w.groupby('subject_id').agg(
    mean_cf_dist = ('mean_cf_dist', 'mean'),
).reset_index()
mean_sig_cf_dist_w

# add age
mean_sig_cf_dist_w = mean_sig_cf_dist_w.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add changepoint 
YA_cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33]
MA_cpts = [51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50]
OA_cpts = [82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

all_cpts = MA_cpts + OA_cpts + YA_cpts

mean_sig_cf_dist_w['cpt'] = all_cpts

mean_sig_cf_dist_w

#### Make CF distance dataframes with no CPT=0 (within trials)

In [None]:
mean_stats_cf_w_nz = mean_stats_cf_w.drop(mean_stats_cf_w[mean_stats_cf_w['cpt'] == 0].index)
mean_stats_cf_w_nz

mean_sig_cf_dist_w_nz = mean_sig_cf_dist_w.drop(mean_sig_cf_dist_w[mean_sig_cf_dist_w['cpt'] == 0].index)
mean_sig_cf_dist_w_nz

### Mean CFB per choice
Count how many MOSs there are between each CFE and MMS/MHS and take mean to find average MOS in a trial per subject and then regress that against age

1. For each trial, count number of CFBs and put in a table
2. Take mean for each participant
3. Regression

In [None]:
cfb_count_df = pd.DataFrame()

for s in ['YA01_events','YA02_events','YA03_events','YA04_events','YA05_events','YA06_events','YA07_events','YA08_events','YA09_events','YA10_events','YA11_events','YA12_events','YA13_events','YA14_events','YA15_events','YA16_events','YA17_events','YA18_events','YA19_events','YA20_events','YA21_events','YA22_events','YA23_events','YA24_events','YA25_events','YA26_events','YA27_events','YA28_events','YA29_events','YA30_events','YA31_events','YA32_events','YA33_events','YA34_events','YA35_events','YA36_events','YA37_events','YA38_events','YA39_events','YA40_events','YA41_events','YA42_events','YA43_events','YA44_events','YA45_events','YA46_events','YA47_events','YA48_events','YA49_events','YA50_events','YA51_events','YA52_events','YA53_events','YA54_events','YA55_events','YA56_events','YA57_events','MA01_events','MA02_events','MA03_events','MA04_events','MA05_events','MA06_events','MA07_events','MA08_events','MA09_events','MA10_events','MA11_events','MA12_events','MA13_events','MA14_events','MA15_events','MA16_events','MA17_events','MA18_events','MA19_events','MA20_events','MA21_events','MA22_events','MA23_events','MA24_events','MA25_events','MA26_events','MA27_events','MA28_events','MA29_events','MA30_events','MA31_events','MA32_events','MA33_events','MA34_events','MA35_events','MA36_events','MA37_events','MA38_events','MA39_events','MA40_events','MA41_events','MA42_events','MA43_events','MA44_events','MA45_events','MA46_events','MA47_events','MA48_events','MA49_events','MA50_events','MA51_events','MA52_events','MA53_events','MA54_events','MA55_events','MA56_events','MA57_events','MA58_events','MA59_events','OA01_events','OA02_events','OA03_events','OA04_events','OA05_events','OA06_events','OA07_events','OA08_events','OA09_events','OA10_events','OA11_events','OA12_events','OA13_events','OA14_events','OA15_events','OA16_events','OA17_events','OA18_events','OA19_events','OA20_events','OA21_events','OA22_events','OA23_events','OA24_events','OA25_events','OA26_events','OA27_events','OA28_events','OA29_events','OA30_events','OA31_events','OA32_events','OA33_events','OA34_events','OA35_events','OA36_events','OA37_events','OA38_events','OA39_events','OA40_events','OA41_events','OA42_events','OA43_events','OA44_events','OA45_events','OA46_events','OA47_events','OA48_events','OA49_events','OA50_events','OA51_events','OA52_events','OA53_events']:
    sub_df = pd.read_csv(s+'.csv')
    subject_id = s.replace("_events", "")
    cfb_series = sub_df[sub_df['x'] == 'CFB'].groupby(['choice_num', 'trial_num']).size()
    sub_cfb = cfb_series.reset_index()
    sub_cfb.columns = ['choice_num', 'trial_num', 'cfb_count']
    sub_cfb['subject_id'] = subject_id
    cfb_count_df = pd.concat([cfb_count_df, sub_cfb])

cfb_count_df.to_csv('cfb_count_df.csv', index=False)
cfb_count_df

In [None]:
# take mean for each subject for each trial number
mean_cfb_count = cfb_count_df.groupby(['subject_id', 'trial_num', 'choice_num']).agg(
    mean_cfb_count = ('cfb_count', 'mean'),
    med_cfb_count = ('cfb_count', 'median'),
).reset_index()
mean_cfb_count

mean_cfb_count = cfb_count_df.groupby(['subject_id', 'trial_num']).agg(
    mean_cfb_count = ('cfb_count', 'mean'),
    med_cfb_count = ('cfb_count', 'median'),
).reset_index()
mean_cfb_count

# take mean for each subject (to get overall mean of cfb per trial number)
mean_cfb_count = mean_cfb_count.groupby('subject_id').agg(
    mean_cfb_count = ('mean_cfb_count', 'mean'),
    med_cfb_count = ('med_cfb_count', 'median'),
).reset_index()
mean_cfb_count

# add age
mean_cfb_count = mean_cfb_count.merge(speed_df[['subject_id', 'age']].drop_duplicates(), on='subject_id', how='left')

# add changepoint 
YA_cpts = [84, 33, 49, 67, 39, 66, 36, 68, 64, 11, 58, 36, 81, 24, 31, 26, 77, 0, 65, 84, 49, 24, 21, 51, 72, 12, 69, 60, 58, 24, 51, 0, 76, 22, 58, 36, 72, 52, 59, 53, 55, 22, 62, 30, 44, 51, 42, 9, 0, 66, 27, 67, 38, 40, 3, 49, 33]
MA_cpts = [51, 80, 29, 13, 43, 83, 35, 58, 55, 0, 31, 43, 33, 33, 41, 37, 11, 57, 34, 56, 63, 12, 13, 35, 60, 0, 58, 15, 11, 31, 33, 36, 31, 64, 29, 36, 46, 36, 55, 39, 0, 37, 8, 77, 0, 46, 55, 22, 67, 29, 53, 30, 16, 28, 38, 23, 33, 74, 50]
OA_cpts = [82, 26, 25, 64, 52, 26, 40, 0, 2, 17, 0, 60, 53, 25, 5, 44, 16, 50, 15, 24, 13, 0, 21, 32, 23, 16, 59, 48, 0, 63, 56, 45, 0, 62, 7, 53, 70, 57, 31, 0, 15, 30, 30, 44, 51, 68, 48, 73, 44, 76, 67, 35, 36]

all_cpts = MA_cpts + OA_cpts + YA_cpts

mean_cfb_count['cpt'] = all_cpts

mean_cfb_count['age'] = pd.to_numeric(mean_cfb_count['age'], errors='coerce')
mean_cfb_count['mean_cfb_count'] = pd.to_numeric(mean_cfb_count['mean_cfb_count'], errors='coerce')

mean_cfb_count.to_csv('mean_cfb_count', index=False)

mean_cfb_count

### Time between time = 0 and first central fixation
This is how many seconds it took to acquire central fixation

In [None]:
sub_grouped = all_mtrack_df.groupby('subject_id')

# function to calculate the time difference between start and first central fixation
def time_0_cfs(group):
    time_0 = 0
    first_cfs_time = group[group['x'] == 'CFS'].iloc[0]['time']
    return first_cfs_time - time_0

# apply the function to each group and store the result in a dictionary
time_0_dict = sub_grouped.apply(time_0_cfs).to_dict()

# create new df to store the results
time_0_cfs__df = pd.DataFrame(list(time_0_dict.items()), columns=['subject_id', 'time_0_to_first_cfs'])

# add ages using dictionary
time_0_cfs__df['age'] = time_0_cfs__df['subject_id'].map(subject_age_dict)

print(time_0_cfs__df)

### Time to reveal shape targets
This is the time difference between each pair of CFS --> CFE per choice, per trial, per subject

In [None]:
cfs_to_cfe_df = pd.DataFrame() # stores all RTs for each choice and each trial per participant

for s in ['YA01_events','YA02_events','YA03_events','YA04_events','YA05_events','YA06_events','YA07_events','YA08_events','YA09_events','YA10_events','YA11_events','YA12_events','YA13_events','YA14_events','YA15_events','YA16_events','YA17_events','YA18_events','YA19_events','YA20_events','YA21_events','YA22_events','YA23_events','YA24_events','YA25_events','YA26_events','YA27_events','YA28_events','YA29_events','YA30_events','YA31_events','YA32_events','YA33_events','YA34_events','YA35_events','YA36_events','YA37_events','YA38_events','YA39_events','YA40_events','YA41_events','YA42_events','YA43_events','YA44_events','YA45_events','YA46_events','YA47_events','YA48_events','YA49_events','YA50_events','YA51_events','YA52_events','YA53_events','YA54_events','YA55_events','YA56_events','YA57_events','MA01_events','MA02_events','MA03_events','MA04_events','MA05_events','MA06_events','MA07_events','MA08_events','MA09_events','MA10_events','MA11_events','MA12_events','MA13_events','MA14_events','MA15_events','MA16_events','MA17_events','MA18_events','MA19_events','MA20_events','MA21_events','MA22_events','MA23_events','MA24_events','MA25_events','MA26_events','MA27_events','MA28_events','MA29_events','MA30_events','MA31_events','MA32_events','MA33_events','MA34_events','MA35_events','MA36_events','MA37_events','MA38_events','MA39_events','MA40_events','MA41_events','MA42_events','MA43_events','MA44_events','MA45_events','MA46_events','MA47_events','MA48_events','MA49_events','MA50_events','MA51_events','MA52_events','MA53_events','MA54_events','MA55_events','MA56_events','MA57_events','MA58_events','MA59_events','OA01_events','OA02_events','OA03_events','OA04_events','OA05_events','OA06_events','OA07_events','OA08_events','OA09_events','OA10_events','OA11_events','OA12_events','OA13_events','OA14_events','OA15_events','OA16_events','OA17_events','OA18_events','OA19_events','OA20_events','OA21_events','OA22_events','OA23_events','OA24_events','OA25_events','OA26_events','OA27_events','OA28_events','OA29_events','OA30_events','OA31_events','OA32_events','OA33_events','OA34_events','OA35_events','OA36_events','OA37_events','OA38_events','OA39_events','OA40_events','OA41_events','OA42_events','OA43_events','OA44_events','OA45_events','OA46_events','OA47_events','OA48_events','OA49_events','OA50_events','OA51_events','OA52_events','OA53_events']:
    events = pd.read_csv(s+'.csv')
    
    # filter out the last CFS occurrence if necessary
    # might need to filter out the last two CFSs, if that participant ended with 2 CFSs (example is CFS, CFD, CFB, CFS, then CFE)
    # instead of this code, I am just going to remove all rows after the final CFE row
    if len(events.loc[events['x'] == 'CFE', 'time']) < len(events.loc[events['x'] == 'CFS', 'time']):
        # find the index of the last CFE row
        last_cfe_index = events[events['x'] == 'CFE'].index[-1]
        # remove all rows after the last CFE row
        events = events.iloc[:last_cfe_index + 1]
    
   # if len(events.loc[events['x'] == 'CFE', 'time']) < len(events.loc[events['x'] == 'CFS', 'time']):
    #    last_cfs = events[events['x'] == 'CFS'].index[-1]
     #   events = events.drop(last_cfs)
    
    # subset for only CFS and CFE rows
    cf_events_df = events[(events['x'].isin(['CFS', 'CFE']))]

    # fill NaN values in 'choice_num' column; ffill fills NA/NaN values by propagating the last valid observation to next valid
    cf_events_df['choice_num'] = cf_events_df['choice_num'].fillna(method='ffill')

    cf_events_df['time'] = cf_events_df['time'].astype(int)
    
    # remove the CFS rows when there are two CFS's back-to-back because of CFBs
    # there is an issue where the code cant run with back to back CFSs
    # compare consecutive rows and mark the first of two consecutive duplicates
    duplicates_mask = (cf_events_df['x'].shift(1) == cf_events_df['x'])

    # drop rows where duplicates_mask is True, keeping the second occurrence
    cf_events_df = cf_events_df[~duplicates_mask].reset_index(drop=True)
    
    tmp_df = cf_events_df[['trial_num', 'choice_num']].drop_duplicates()

    # go through all CFS values 
    cfs_df = cf_events_df[cf_events_df['x'] == 'CFS'].rename(columns={'time': 'cfs_time'}).drop(columns=['x'])
    cfs_times = cfs_df['cfs_time']
    
    # go through all CFE values
    cfe_df = cf_events_df[cf_events_df['x'] == 'CFE'].rename(columns={'time': 'cfe_time'}).drop(columns=['x'])
    cfe_times = cfe_df['cfe_time']

    # add cfe_times and cfs times into the dataframe
    tmp_df['cfs_time'] = cfs_times.values
    tmp_df['cfe_time'] = cfe_times.values

    # calculate cf time
    tmp_df['cf_time'] = tmp_df['cfe_time']-tmp_df['cfs_time']

    # make subject_id column
    tmp_df['subject_id'] = s.replace("_events", "")

    tmp_df.reset_index(drop=True, inplace=True)

    cfs_to_cfe_df = pd.concat([cfs_to_cfe_df, tmp_df])

cfs_to_cfe_df
# this is a dataframe that has the cf time for each participant, for each trial, for each choice

#cfs_to_cfe_mean_df = cfs_to_cfe_df.groupby('subject_id').agg(
#    mean_cfs_to_cfe_time = ('cf_time', 'mean'),
#).reset_index()

# add ages using dictionary
#cfs_to_cfe_mean_df['age'] = cfs_to_cfe_mean_df['subject_id'].map(subject_age_dict)

#cfs_to_cfe_mean_df

### Time between tile reveal and next central fixation ON
MMS/MHS --> CFS

In [None]:
# MMS/MHS --> CFS
it_time_df = pd.DataFrame() 

for s in ['YA01_events','YA02_events','YA03_events','YA04_events','YA05_events','YA06_events','YA07_events','YA08_events','YA09_events','YA10_events','YA11_events','YA12_events','YA13_events','YA14_events','YA15_events','YA16_events','YA17_events','YA18_events','YA19_events','YA20_events','YA21_events','YA22_events','YA23_events','YA24_events','YA25_events','YA26_events','YA27_events','YA28_events','YA29_events','YA30_events','YA31_events','YA32_events','YA33_events','YA34_events','YA35_events','YA36_events','YA37_events','YA38_events','YA39_events','YA40_events','YA41_events','YA42_events','YA43_events','YA44_events','YA45_events','YA46_events','YA47_events','YA48_events','YA49_events','YA50_events','YA51_events','YA52_events','YA53_events','YA54_events','YA55_events','YA56_events','YA57_events','MA01_events','MA02_events','MA03_events','MA04_events','MA05_events','MA06_events','MA07_events','MA08_events','MA09_events','MA10_events','MA11_events','MA12_events','MA13_events','MA14_events','MA15_events','MA16_events','MA17_events','MA18_events','MA19_events','MA20_events','MA21_events','MA22_events','MA23_events','MA24_events','MA25_events','MA26_events','MA27_events','MA28_events','MA29_events','MA30_events','MA31_events','MA32_events','MA33_events','MA34_events','MA35_events','MA36_events','MA37_events','MA38_events','MA39_events','MA40_events','MA41_events','MA42_events','MA43_events','MA44_events','MA45_events','MA46_events','MA47_events','MA48_events','MA49_events','MA50_events','MA51_events','MA52_events','MA53_events','MA54_events','MA55_events','MA56_events','MA57_events','MA58_events','MA59_events','OA01_events','OA02_events','OA03_events','OA04_events','OA05_events','OA06_events','OA07_events','OA08_events','OA09_events','OA10_events','OA11_events','OA12_events','OA13_events','OA14_events','OA15_events','OA16_events','OA17_events','OA18_events','OA19_events','OA20_events','OA21_events','OA22_events','OA23_events','OA24_events','OA25_events','OA26_events','OA27_events','OA28_events','OA29_events','OA30_events','OA31_events','OA32_events','OA33_events','OA34_events','OA35_events','OA36_events','OA37_events','OA38_events','OA39_events','OA40_events','OA41_events','OA42_events','OA43_events','OA44_events','OA45_events','OA46_events','OA47_events','OA48_events','OA49_events','OA50_events','OA51_events','OA52_events','OA53_events']:
    events = pd.read_csv(s+'.csv')
    
    # fill NaN values in 'choice_num' column; ffill fills NA/NaN values by propagating the last valid observation to next valid
    events['choice_num'] = events['choice_num'].fillna(method='ffill')
    events['time'] = events['time'].astype(int)
    
    # instead of this code, I am just going to remove all rows after the final CFS row
    if len(events.loc[events['x'] == 'CFS', 'time']) < len(events.loc[(events['x'] == 'MHS') | (events['x'] == 'MMS'), 'time']):
        # find the index of the last CFS row
        last_cfs_index = events[events['x'] == 'CFS'].index[-1]
        # remove all rows after the last CFS row
        events = events.iloc[:last_cfs_index + 1]
    
    #if len(events.loc[(events['x'] == 'MHS') | (events['x'] == 'MMS'), 'time']) < len(events.loc[events['x'] == 'CFS', 'time']): # check to see if length of CFE > length of MMS/MHS; if it is, remove the last CFE occurrence 
     #   last_cfs = events[events['x'] == 'CFS'].index[-1]
      #  events = events.drop(last_cfs)
    
    it_events_df = events[(events['x'].isin(['CFS', 'MMS', 'MHS']))]
    
    # fill NaN values in 'choice_num' column; ffill fills NA/NaN values by propagating the last valid observation to next valid
    #it_events_df['choice_num'] = it_events_df['choice_num'].fillna(method='ffill')

    #it_events_df['time'] = it_events_df['time'].astype(int)
    
    # remove the CFS rows when there are two CFS's back-to-back because of CFBs
    # there is an issue where the code cant run with back to back CFSs
    # Compare consecutive rows and mark the first of two consecutive duplicates
    duplicates_mask = (it_events_df['x'].shift(1) == it_events_df['x'])

    # Drop rows where duplicates_mask is True, keeping the second occurrence
    it_events_df = it_events_df[~duplicates_mask].reset_index(drop=True)
    
    # make df to store results in
    # find length that the tmp df needs to be
    tmp_df = it_events_df[['trial_num', 'choice_num']].drop_duplicates()
    #tmp_df_length = len(it_events_df[['trial_num', 'choice_num']].drop_duplicates())

    # subtract 1 from the length when creating the df because that is what we need to do to look at two separate trials at once 
    #tmp_df = pd.DataFrame(index = range(tmp_df_length))

    # go through all CFS values 
    cfs_df = it_events_df[it_events_df['x'] == 'CFS'].rename(columns={'time': 'cfs_time'}).drop(columns=['x'])
    cfs_times = cfs_df['cfs_time']
    
    # go through all MMS and MHS values
    mms_mhs_df = it_events_df[(it_events_df['x'] == 'MMS')|(it_events_df['x'] == 'MHS')].rename(columns={'time': 'mms_mhs_time'})
    mms_mhs_times = mms_mhs_df['mms_mhs_time']
    
    # add an NaN at the end of the MMS/MHS values if it is shorter for indexing
    if len(mms_mhs_times) < len(cfs_times):
        nan_series = pd.Series([float('nan')])
        mms_mhs_times = pd.concat([mms_mhs_times, nan_series], ignore_index=True)

    # add cfe_times and cfs times into the dataframe
    tmp_df['cfs_time'] = cfs_times.values
    tmp_df['mms_mhs_time'] = mms_mhs_times.values

    # calculate the difference between the next 'cfs_time' and the mms/mhs value
    tmp_df['it_time'] = (tmp_df['cfs_time'].shift(-1)) - tmp_df['mms_mhs_time']

    # drop the last row (nan)
    tmp_df = tmp_df.iloc[:-1]

    # make subject_id column
    tmp_df['subject_id'] = s.replace("_events", "")

    tmp_df.reset_index(drop=True, inplace=True)

    it_time_df = pd.concat([it_time_df, tmp_df])

it_time_df
# this is a dataframe that has the cf time for each participant, for each trial, for each choice

it_mean_time_df = it_time_df.groupby('subject_id').agg(
    mean_it_time = ('it_time', 'mean'),
).reset_index()

# add ages using dictionary
it_mean_time_df['age'] = it_mean_time_df['subject_id'].map(subject_age_dict)
it_mean_time_df