In [None]:
# cal_dist
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import image
import numpy as np
import cv2 as cv
import os
from datetime import datetime
import time
from scipy.spatial.distance import pdist, squareform

In [None]:
# laosheng_s1
# set dir
parent_folder = '/example_data'
save_folder = '/results'
os.makedirs(save_folder, exist_ok=True)

cal_save_path = os.path.join(save_folder, 'cal_dist.csv')
bidi_save_path = os.path.join(save_folder, 'bidi_raw.csv')

subfolders = [f.path for f in os.scandir(parent_folder) if f.is_dir()]

results = []
bidi_raw = pd.DataFrame()

for subfolder in subfolders:
    subjid = os.path.basename(subfolder)

    # read data
    trackinfo_file_path = os.path.join(subfolder, 'trackinfo.txt')
    dat = pd.read_json(trackinfo_file_path)
    userinfo_file_path = os.path.join(subfolder, 'userinfo.txt')
    layout_dat = pd.read_json(userinfo_file_path, orient='index').transpose()

    # basic info
    name = layout_dat['name'][0]
    sex = layout_dat['sex'][0]

    # age
    dob = layout_dat['dateOfBirth'][0]
    dob_year = int(dob[:4])
    dob_month = int(dob[4:6])
    test_time = layout_dat['beginTime'][0]
    test_year = int(test_time.split('/')[0])
    test_month = int(test_time.split('/')[1])
    age = round((test_year - dob_year) + (test_month - dob_month) / 12.0, 1)

    # Test_date
    test_time_dt = datetime.strptime(test_time, "%Y/%m/%d %H:%M:%S")
    test_date = test_time_dt.strftime("%Y/%m/%d")

    # Duration
    time_start = datetime.strptime(layout_dat['beginTime'][0], '%Y/%m/%d %H:%M:%S')
    time_end = datetime.strptime(layout_dat['endTime'][0], '%Y/%m/%d %H:%M:%S')
    time_dur = (time_end - time_start).total_seconds()


    # Dist_Avg, Dist_Std
    n_loc = len(layout_dat['localInfos'][0])
    xs, ys, x0s, y0s, dists = [], [], [], [], []

    for i_loc in range(n_loc):
        xs.append(layout_dat['localInfos'][0][i_loc]['x'])
        ys.append(layout_dat['localInfos'][0][i_loc]['y'])
        x0s.append(layout_dat['targetInfos'][0][i_loc]['x'])
        y0s.append(layout_dat['targetInfos'][0][i_loc]['y'])

        #time for each picture for everyone, 
        #track_dat = dat.trackInfos[i_loc]['localInfos']
        #times.append(len(track_dat)/10)
        
        dists.append(np.sqrt((xs[i_loc] - x0s[i_loc]) ** 2 + (ys[i_loc] - y0s[i_loc]) ** 2))

    dist_avg = np.mean(dists)
    dist_std = np.std(dists)

    # bidi_raw for bidiregression
    bidi_raw_0 = pd.DataFrame({
        'Name': [name] * n_loc,
        'xloc': xs,
        'yloc': ys,
        'xorig': x0s,
        'yorig': y0s,
        'distance': dists
        #'time': times
    })
    bidi_raw = pd.concat([bidi_raw, bidi_raw_0], ignore_index=True)

    # Track_Avg, Track_Std
    track_dist_list = []
    n_loc_track = dat.trackInfos.shape[0]
    for i_loc in range(n_loc_track):
        track_dat = dat.trackInfos[i_loc]['localInfos']
        n_point = len(track_dat)
        x_list = [p['x'] for p in track_dat]
        y_list = [p['y'] for p in track_dat]

        total_dist = sum(((x_list[i] - x_list[i-1]) ** 2 + (y_list[i] - y_list[i-1]) ** 2) ** 0.5 for i in range(1, n_point))
        track_dist_list.append(total_dist)

    mean_track_dist = np.mean(track_dist_list)
    std_track_dist = np.std(track_dist_list)

    # save data
    cal_out = {
        'Name': name,
        'Age': age,
        'Sex': sex,
        'Test_date': test_date,
        'Duration': time_dur,
        'Dist_Avg': dist_avg,
        'Dist_Std': dist_std,
        'Track_Avg': mean_track_dist,
        'Track_Std': std_track_dist
    }

    results.append(cal_out)
    print(f"Processed {subjid}")

# to DataFrame and save file
df = pd.DataFrame(results)
df.to_csv(cal_save_path, index=False)
bidi_raw.to_csv(bidi_save_path, index=False)

print(f"Saved combined results to {save_folder}")