__Data saved:__
-  __recordings_village_corr.csv__ --> remove participants leaving the main quare more than 10% of the total time

# Check how often participants left the main square

In [None]:
import copy  # copy big/deep objects by value
import datetime  # datetime operations
import itertools  # operate with iterators
import json  # read/write from/into json format
import math
import os  # OS operations (read/write files/folders)
import warnings  # hide warnings

# process parallelization
from multiprocessing import Manager, Pool, RawArray, cpu_count

import matplotlib.pyplot as plt  # mother of plots focr Python
import numpy as np  # array/matrix operations (e.g. linear algebra)
import pandas as pd  # operate with dataframes
import pyxdf  # read XDF files (LSL streams recordings)
import seaborn as sns  # matplotlib plotting nice with shortcuts
from IPython.display import Markdown, display  # print nicely
from matplotlib.pyplot import cm
from pandas import DataFrame, to_datetime, to_timedelta
from tqdm.notebook import tqdm  # mother of progressbars

In [None]:
# raw and processed data paths
PATH_RAW = "./data/raw"
PATH_PROC = "./data/processed"

# specify decimals format on pandas tables
pd.options.display.float_format = "{:.3f}".format

# progress bar customized format
B_FORMAT = """📄 {n_fmt} of {total_fmt} {desc} processed: {bar} 
            {percentage:3.0f}% ⏱️{elapsed} ⏳{remaining} ⚙️{rate_fmt}{postfix}"""

CORES = cpu_count()  # number of cpu threads for multiprocessing
print(f"Total CPU threads: {CORES}")

In [None]:
recordings = pd.read_csv("./recordings_village_corr.csv", index_col=0)
recordings

__Get the amount of time participants leave the central square__

The participants received the instruction to not leave the main square during the experiment. Surprisingly, a good amount of participtants, even after repeated reminders, validated this instruction. To correct for too bad behavior, we calculate the amount of samples and the percentage thereof during which the participants were leaving the main square. Participants with more then 10% of samples not located on the main square were excluded from further analysis.

In [None]:
# participants ids
ids = recordings.index.tolist()
idd = ids[:]

time_off_square = {}
cols_os = ["Total Time", "Off Square", "% Off Square", "Face Gazes"]
g_length = 0.250  # >250ms is a gaze (or 24 hits)
for i, uid in enumerate(idd):
    time_off_square[uid] = {}
    # create the list of indicies:
    f = open(f"{PATH_PROC}/Timestamps_overall_{uid}.json", "r")
    times_overall = json.load(f)  # load file content as JSON
    f.close()
    beh_df = pd.read_csv(
        f"{PATH_PROC}/Behavior_new_{uid}.csv", index_col=0, dtype=CUSTOM_DTYPES
    )
    hit_df = pd.read_csv(f"{PATH_PROC}/HitEvents_new_{uid}.csv")

    # variables
    time_off_square[uid]["Total Time"] = len(times_overall)
    player_pos = np.array(
        [beh_df["HToriginX"], beh_df["HToriginZ"]]
    ).transpose()

    gaze = hit_df[hit_df["length"] > g_length]
    face_cond = gaze["HON"].str.contains("face", regex=False, na=False)
    face_fix = gaze[face_cond]
    time_off_square[uid]["Face Gazes"] = face_fix.index.size

    off_squ = 0
    for i, t in enumerate(times_overall):
        if (
            # square coordinates defined by Unity
            player_pos[i, 0] < 515
            or player_pos[i, 0] > 631
            or player_pos[i, 1] < 554
            or player_pos[i, 1] > 620
        ):  # strict square
            off_squ += 1
        elif player_pos[i, 0] < 605 and player_pos[i, 1] < 564:
            off_squ += 1
    time_off_square[uid]["Off Square"] = off_squ
    time_off_square[uid]["% Off Square"] = round(
        off_squ * 100 / len(times_overall), 3
    )

time_off_square = pd.DataFrame(time_off_square).transpose()
time_off_square = time_off_square.reindex(
    columns=cols_os
)  # so they are ordered the correct way
display(time_off_square.sort_values(by=["% Off Square"]))
time_off_square.to_csv("./time_off_square.csv", index=True)

# display(time_off_square)

__Exclude Recordings with more than 10% off Square --> but only run this once!__

In [None]:
# you can only run this once 
ts = time_off_square[time_off_square["% Off Square"] > 10].index
recordings = recordings.drop(ts)
ids = recordings.index.tolist()
recordings.to_csv("./recordings_village_corr.csv", index=True)
display(len(recordings))
display(recordings)
