In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import image
import numpy as np
import cv2 as cv
import os
from datetime import datetime
import time
from scipy.spatial.distance import pdist, squareform 
#from scipy.stats import spearmanr

In [2]:
# laosheng_s1
# set dir
parent_folder = '/bigvault/Projects/sea_hero/LOCdata/validation/laosheng_s1'
save_folder = '/bigvault/Projects/sea_hero/LOCdata/article/Remap_article/results/data/laosheng_s1'
os.makedirs(save_folder, exist_ok=True)

cal_save_path = os.path.join(save_folder, 'cal_dist.csv')
bidi_save_path = os.path.join(save_folder, 'bidi_raw.csv')

subfolders = [f.path for f in os.scandir(parent_folder) if f.is_dir()]

results = []
bidi_raw = pd.DataFrame()

for subfolder in subfolders:
    subjid = os.path.basename(subfolder)

    # read data
    trackinfo_file_path = os.path.join(subfolder, 'trackinfo.txt')
    dat = pd.read_json(trackinfo_file_path)
    userinfo_file_path = os.path.join(subfolder, 'userinfo.txt')
    layout_dat = pd.read_json(userinfo_file_path, orient='index').transpose()

    # basic info
    name = layout_dat['name'][0]
    sex = layout_dat['sex'][0]

    # age
    dob = layout_dat['dateOfBirth'][0]
    dob_year = int(dob[:4])
    dob_month = int(dob[4:6])
    test_time = layout_dat['beginTime'][0]
    test_year = int(test_time.split('/')[0])
    test_month = int(test_time.split('/')[1])
    age = round((test_year - dob_year) + (test_month - dob_month) / 12.0, 1)

    # Test_date
    test_time_dt = datetime.strptime(test_time, "%Y/%m/%d %H:%M:%S")
    test_date = test_time_dt.strftime("%Y/%m/%d")

    # Duration
    time_start = datetime.strptime(layout_dat['beginTime'][0], '%Y/%m/%d %H:%M:%S')
    time_end = datetime.strptime(layout_dat['endTime'][0], '%Y/%m/%d %H:%M:%S')
    time_dur = (time_end - time_start).total_seconds()


    # Dist_Avg, Dist_Std
    n_loc = len(layout_dat['localInfos'][0])
    xs, ys, x0s, y0s, dists = [], [], [], [], []

    for i_loc in range(n_loc):
        xs.append(layout_dat['localInfos'][0][i_loc]['x'])
        ys.append(layout_dat['localInfos'][0][i_loc]['y'])
        x0s.append(layout_dat['targetInfos'][0][i_loc]['x'])
        y0s.append(layout_dat['targetInfos'][0][i_loc]['y'])

        #time for each picture for everyone, 
        #track_dat = dat.trackInfos[i_loc]['localInfos']
        #times.append(len(track_dat)/10)
        
        dists.append(np.sqrt((xs[i_loc] - x0s[i_loc]) ** 2 + (ys[i_loc] - y0s[i_loc]) ** 2))

    dist_avg = np.mean(dists)
    dist_std = np.std(dists)

    # bidi_raw for bidiregression
    bidi_raw_0 = pd.DataFrame({
        'Name': [name] * n_loc,
        'xloc': xs,
        'yloc': ys,
        'xorig': x0s,
        'yorig': y0s,
        'distance': dists
        #'time': times
    })
    bidi_raw = pd.concat([bidi_raw, bidi_raw_0], ignore_index=True)

    # Track_Avg, Track_Std
    track_dist_list = []
    n_loc_track = dat.trackInfos.shape[0]
    for i_loc in range(n_loc_track):
        track_dat = dat.trackInfos[i_loc]['localInfos']
        n_point = len(track_dat)
        x_list = [p['x'] for p in track_dat]
        y_list = [p['y'] for p in track_dat]

        total_dist = sum(((x_list[i] - x_list[i-1]) ** 2 + (y_list[i] - y_list[i-1]) ** 2) ** 0.5 for i in range(1, n_point))
        track_dist_list.append(total_dist)

    mean_track_dist = np.mean(track_dist_list)
    std_track_dist = np.std(track_dist_list)

    # save data
    cal_out = {
        'Name': name,
        'Age': age,
        'Sex': sex,
        'Test_date': test_date,
        'Duration': time_dur,
        'Dist_Avg': dist_avg,
        'Dist_Std': dist_std,
        'Track_Avg': mean_track_dist,
        'Track_Std': std_track_dist
    }

    results.append(cal_out)
    print(f"Processed {subjid}")

# to DataFrame and save file
df = pd.DataFrame(results)
df.to_csv(cal_save_path, index=False)
bidi_raw.to_csv(bidi_save_path, index=False)

print(f"Saved combined results to {save_folder}")

Processed baoran_20240305-08-08-01
Processed caichenyang_20240119-05-58-17
Processed chenanqi_20240305-07-02-09
Processed chenqiyang_20231220-01-07-40
Processed chenquan_20231220-05-06-46
Processed chenrouhan_20240228-04-38-08
Processed chenxintong_20240302-11-10-13
Processed chenxin_20240229-07-57-28
Processed chenyingqi_20231201-04-29-44
Processed chenzhedong_20240118-03-36-57
Processed daiweitao_20240115-07-27-23
Processed dengrudan_20240118-05-47-21
Processed dengyang_20240229-06-06-00
Processed dingxukai_20240123-04-46-42
Processed duanjinghui_20240117-07-04-49
Processed dufeixuan_20240303-09-09-16
Processed dujunan_20240229-06-17-11
Processed dulijun_20240305-06-11-47
Processed fangjiayi_20231203-12-02-49
Processed fanzhiliang_20240320-06-36-38
Processed fuyihan_20231213-06-07-57
Processed gangchenming_20240302-10-02-03
Processed gaomeiyi_20231201-02-02-27
Processed gaoshuolian_20231203-09-38-11
Processed gaoyushi_20240115-08-15-16
Processed gesangciren_20240115-06-09-00
Processe

In [3]:
# laosheng_s2
# set dir
parent_folder = '/bigvault/Projects/sea_hero/LOCdata/validation/laosheng_s2'
save_folder = '/bigvault/Projects/sea_hero/LOCdata/article/Remap_article/results/data/laosheng_s2'
os.makedirs(save_folder, exist_ok=True)

cal_save_path = os.path.join(save_folder, 'cal_dist.csv')
bidi_save_path = os.path.join(save_folder, 'bidi_raw.csv')

subfolders = [f.path for f in os.scandir(parent_folder) if f.is_dir()]

results = []
bidi_raw = pd.DataFrame()

for subfolder in subfolders:
    subjid = os.path.basename(subfolder)

    # read data
    trackinfo_file_path = os.path.join(subfolder, 'trackinfo.txt')
    dat = pd.read_json(trackinfo_file_path)
    userinfo_file_path = os.path.join(subfolder, 'userinfo.txt')
    layout_dat = pd.read_json(userinfo_file_path, orient='index').transpose()

    # basic info
    name = layout_dat['name'][0]
    sex = layout_dat['sex'][0]

    # age
    dob = layout_dat['dateOfBirth'][0]
    dob_year = int(dob[:4])
    dob_month = int(dob[4:6])
    test_time = layout_dat['beginTime'][0]
    test_year = int(test_time.split('/')[0])
    test_month = int(test_time.split('/')[1])
    age = round((test_year - dob_year) + (test_month - dob_month) / 12.0, 1)

    # Test_date
    test_time_dt = datetime.strptime(test_time, "%Y/%m/%d %H:%M:%S")
    test_date = test_time_dt.strftime("%Y/%m/%d")

    # Duration
    time_start = datetime.strptime(layout_dat['beginTime'][0], '%Y/%m/%d %H:%M:%S')
    time_end = datetime.strptime(layout_dat['endTime'][0], '%Y/%m/%d %H:%M:%S')
    time_dur = (time_end - time_start).total_seconds()


    # Dist_Avg, Dist_Std
    n_loc = len(layout_dat['localInfos'][0])
    xs, ys, x0s, y0s, dists = [], [], [], [], []

    for i_loc in range(n_loc):
        xs.append(layout_dat['localInfos'][0][i_loc]['x'])
        ys.append(layout_dat['localInfos'][0][i_loc]['y'])
        x0s.append(layout_dat['targetInfos'][0][i_loc]['x'])
        y0s.append(layout_dat['targetInfos'][0][i_loc]['y'])

        #time for each picture for everyone, 
        #track_dat = dat.trackInfos[i_loc]['localInfos']
        #times.append(len(track_dat)/10)
        
        dists.append(np.sqrt((xs[i_loc] - x0s[i_loc]) ** 2 + (ys[i_loc] - y0s[i_loc]) ** 2))

    dist_avg = np.mean(dists)
    dist_std = np.std(dists)

    # bidi_raw for bidiregression
    bidi_raw_0 = pd.DataFrame({
        'Name': [name] * n_loc,
        'xloc': xs,
        'yloc': ys,
        'xorig': x0s,
        'yorig': y0s,
        'distance': dists
        #'time': times
    })
    bidi_raw = pd.concat([bidi_raw, bidi_raw_0], ignore_index=True)

    # Track_Avg, Track_Std
    track_dist_list = []
    n_loc_track = dat.trackInfos.shape[0]
    for i_loc in range(n_loc_track):
        track_dat = dat.trackInfos[i_loc]['localInfos']
        n_point = len(track_dat)
        x_list = [p['x'] for p in track_dat]
        y_list = [p['y'] for p in track_dat]

        total_dist = sum(((x_list[i] - x_list[i-1]) ** 2 + (y_list[i] - y_list[i-1]) ** 2) ** 0.5 for i in range(1, n_point))
        track_dist_list.append(total_dist)

    mean_track_dist = np.mean(track_dist_list)
    std_track_dist = np.std(track_dist_list)

    # save data
    cal_out = {
        'Name': name,
        'Age': age,
        'Sex': sex,
        'Test_date': test_date,
        'Duration': time_dur,
        'Dist_Avg': dist_avg,
        'Dist_Std': dist_std,
        'Track_Avg': mean_track_dist,
        'Track_Std': std_track_dist
    }

    results.append(cal_out)
    print(f"Processed {subjid}")

# to DataFrame and save file
df = pd.DataFrame(results)
df.to_csv(cal_save_path, index=False)
bidi_raw.to_csv(bidi_save_path, index=False)

print(f"Saved combined results to {save_folder}")

Processed baoran_20240305-08-23-25
Processed caichenyang_20240119-06-09-29
Processed chenanqi_20240305-07-28-40
Processed chengquan_20231220-05-33-06
Processed chenqiyang_20231220-01-18-34
Processed chenrouhan_20240228-04-54-24
Processed chenxintong_20240302-11-38-19
Processed chenxin_20240229-08-20-58
Processed chenyingqi_20231201-04-43-56
Processed chenzhedong_20240118-03-40-27
Processed daiweitao_20240115-07-37-35
Processed dengrudan_20240118-06-05-14
Processed dengyang_20240229-06-16-15
Processed dingxukai_20240123-05-03-40
Processed duanjinghui_20240117-07-15-43
Processed dufeixuan_20240303-09-41-09
Processed dujunan_20240229-06-41-53
Processed dulijun_20240305-06-22-18
Processed fangjiayi_20231203-12-12-08
Processed fanzhiliang_20240320-06-44-53
Processed fuyihan_20231213-06-25-52
Processed gangchenming_20240302-10-17-49
Processed gaomeiyi_20231201-02-14-06
Processed gaoshuolian_20231203-09-46-55
Processed gaoyushi_20240115-08-25-00
Processed gesangciren_20240115-06-23-11
Process

In [4]:
# freshman_s1
# set dir
parent_folder = '/bigvault/Projects/sea_hero/LOCdata/BEHAVIOR/bulidmap/freshman_s1'
save_folder = '/bigvault/Projects/sea_hero/LOCdata/article/Remap_article/results/data/freshman'
os.makedirs(save_folder, exist_ok=True)

cal_save_path = os.path.join(save_folder, 'cal_dist.csv')
bidi_save_path = os.path.join(save_folder, 'bidi_raw.csv')

subfolders = [f.path for f in os.scandir(parent_folder) if f.is_dir()]

results = []
bidi_raw = pd.DataFrame()

for subfolder in subfolders:
    subjid = os.path.basename(subfolder)

    # read data
    trackinfo_file_path = os.path.join(subfolder, 'trackinfo.txt')
    dat = pd.read_json(trackinfo_file_path)
    userinfo_file_path = os.path.join(subfolder, 'userinfo.txt')
    layout_dat = pd.read_json(userinfo_file_path, orient='index').transpose()

    # basic info
    name = layout_dat['name'][0]
    sex = layout_dat['sex'][0]

    # age
    dob = layout_dat['dateOfBirth'][0]
    dob_year = int(dob[:4])
    dob_month = int(dob[4:6])
    test_time = layout_dat['beginTime'][0]
    test_year = int(test_time.split('/')[0])
    test_month = int(test_time.split('/')[1])
    age = round((test_year - dob_year) + (test_month - dob_month) / 12.0, 1)

    # Test_date
    test_time_dt = datetime.strptime(test_time, "%Y/%m/%d %H:%M:%S")
    test_date = test_time_dt.strftime("%Y/%m/%d")

    # Duration
    time_start = datetime.strptime(layout_dat['beginTime'][0], '%Y/%m/%d %H:%M:%S')
    time_end = datetime.strptime(layout_dat['endTime'][0], '%Y/%m/%d %H:%M:%S')
    time_dur = (time_end - time_start).total_seconds()


    # Dist_Avg, Dist_Std
    n_loc = len(layout_dat['localInfos'][0])
    xs, ys, x0s, y0s, dists = [], [], [], [], []

    for i_loc in range(n_loc):
        xs.append(layout_dat['localInfos'][0][i_loc]['x'])
        ys.append(layout_dat['localInfos'][0][i_loc]['y'])
        x0s.append(layout_dat['targetInfos'][0][i_loc]['x'])
        y0s.append(layout_dat['targetInfos'][0][i_loc]['y'])

        #time for each picture for everyone, 
        #track_dat = dat.trackInfos[i_loc]['localInfos']
        #times.append(len(track_dat)/10)
        
        dists.append(np.sqrt((xs[i_loc] - x0s[i_loc]) ** 2 + (ys[i_loc] - y0s[i_loc]) ** 2))

    dist_avg = np.mean(dists)
    dist_std = np.std(dists)

    # bidi_raw for bidiregression
    bidi_raw_0 = pd.DataFrame({
        'Name': [name] * n_loc,
        'xloc': xs,
        'yloc': ys,
        'xorig': x0s,
        'yorig': y0s,
        'distance': dists
        #'time': times
    })
    bidi_raw = pd.concat([bidi_raw, bidi_raw_0], ignore_index=True)

    # Track_Avg, Track_Std
    track_dist_list = []
    n_loc_track = dat.trackInfos.shape[0]
    for i_loc in range(n_loc_track):
        track_dat = dat.trackInfos[i_loc]['localInfos']
        n_point = len(track_dat)
        x_list = [p['x'] for p in track_dat]
        y_list = [p['y'] for p in track_dat]

        total_dist = sum(((x_list[i] - x_list[i-1]) ** 2 + (y_list[i] - y_list[i-1]) ** 2) ** 0.5 for i in range(1, n_point))
        track_dist_list.append(total_dist)

    mean_track_dist = np.mean(track_dist_list)
    std_track_dist = np.std(track_dist_list)

    # save data
    cal_out = {
        'Name': name,
        'Age': age,
        'Sex': sex,
        'Test_date': test_date,
        'Duration': time_dur,
        'Dist_Avg': dist_avg,
        'Dist_Std': dist_std,
        'Track_Avg': mean_track_dist,
        'Track_Std': std_track_dist
    }

    results.append(cal_out)
    print(f"Processed {subjid}")

# to DataFrame and save file
df = pd.DataFrame(results)
df.to_csv(cal_save_path, index=False)
bidi_raw.to_csv(bidi_save_path, index=False)

print(f"Saved combined results to {save_folder}")

Processed chenshengjie_20231109-06-03-50
Processed chenxinyi_20231026-11-06-43
Processed fangzhiming_20231026-01-29-36
Processed fengloufei_20231024-06-55-10
Processed fengyinglin_20231020-10-17-26
Processed guoduxiao_20231027-01-20-09
Processed heyijun_20231118-08-34-37
Processed huangmingyang_20231118-10-31-35
Processed huangxun_20231102-02-12-57
Processed hugeliang_20231110-02-14-19
Processed huxiaolong_20231027-06-17-48
Processed jiangxinyang_20231110-04-13-47
Processed jiangyu_20231027-03-12-11
Processed jichengyuan_20231103-10-57-33
Processed kongdeyu_20231031-10-51-45
Processed linsiyan_20231102-01-12-33
Processed lishuyi_20231102-03-18-45
Processed luyi_20231027-08-12-26
Processed lvlingtao_20231101-11-01-21
Processed malihu_20231102-04-20-11
Processed qianying_20231103-09-35-35
Processed qinqin_20231101-06-13-22
Processed shenzhanghao_20231103-01-22-06
Processed songyiren_20231027-02-34-42
Processed tumarisi_20231025-10-54-35
Processed wangjiaqi_20231103-10-17-30
Processed wan