In [1]:
import os
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
raw_gt_dir = r'D:\ShanghaiASD_project\gazefollow_pattern_checkup\ENCU_base_annotations\raw'
cleaned_gt_dir = r'D:\ShanghaiASD_project\gazefollow_pattern_checkup\ENCU_base_annotations\cleaned'

In [3]:
os.listdir(raw_gt_dir)

['109-OK.txt', '41-OK.txt', '42-OK.txt', '62-OK.txt']

In [7]:
def clean_raw_content(txt_path):
    # remove header
    with open(txt_path, 'r', encoding='utf-8') as temp:
        header_row = 0
        line = temp.readline()
        while line:
            if line[:4]=='开始时间':
                break
            else:
                line = temp.readline()
                header_row+=1
            
    raw_content = pd.read_csv(txt_path, sep='\t', skiprows=range(header_row))
    cleaned_content = pd.DataFrame()
    cleaned_content['activity'] = raw_content['活动类型'].str.replace(' ', '')
    cleaned_content['stimulus'] = raw_content['刺激等级'].str.replace(' ', '')
    cleaned_content['start(ss.msec)'] = raw_content['开始时间 - ss.msec']
    cleaned_content['end(ss.msec)'] = raw_content['结束时间 - ss.msec']
    cleaned_content['start_frame'] = (raw_content['开始时间 - ss.msec']*30).astype(int)
    cleaned_content['end_frame'] = (raw_content['结束时间 - ss.msec']*30).astype(int)
    cleaned_content['SXC'] = raw_content['SXC']
    cleaned_content['SXA'] = raw_content['SXA']
    cleaned_content.dropna(subset=['SXC', 'SXA'], inplace=True, how='all')    
    return cleaned_content

def collect_lists(gt_df, pred_df):
    gt_ls = []
    for i in gt_df.index:
        start_f = gt_df.at[i, 'start_frame']
        end_f = gt_df.at[i, 'end_frame']
        for f in range(start_f, end_f):
            gt_ls.append(f)
    
    pred_ls = []
    for j in pred_df.index:
        start_f = pred_df.at[j, 'start_frame']
        end_f = pred_df.at[j, 'end_frame']
        for f in range(start_f, end_f):
            pred_ls.append(f)
    return gt_ls, pred_ls
            
def obtain_score_per_act(a_pred, a_gt):
    gt_ls, pred_ls = collect_lists(a_gt, a_pred)        
    tp_ls = []
    fp_ls = []
    fn_ls = []
    tn_ls = []
    for f in total_frames:
        if f in pred_ls: # a predicted positive
            if f in gt_ls: # true positive
                tp_ls.append(f)
            else: # false positive
                fp_ls.append(f)
        else: # a predicited negative
            if f in gt_ls: # false negative
                fn_ls.append(f)
            else: # true negative
                tn_ls.append(f)
    assert len(tp_ls)+len(fp_ls)+len(fn_ls)+len(tn_ls)==len(total_frames)
    acc_score = (len(tp_ls)+len(tn_ls))/len(total_frames)
    
    if len(gt_ls)==0:
        recall = np.nan
    else:
        recall = len(tp_ls)/(len(tp_ls)+len(fn_ls))
    if len(pred_ls)==0:
        precision = np.nan
    else:
        precision = len(tp_ls)/(len(tp_ls)+len(fp_ls))
    
    return acc_score, recall, precision

# CleanUp raw files

In [None]:
for txtfile in os.listdir(raw_gt_dir):
    txt_path = '%s/%s'%(raw_gt_dir, txtfile)
    if txt_path[-4:]!='.txt':
        print("[Warning] Having suspicious file under the raw folder. Please Check")
        print(txt_path)
        
    cleaned_file = clean_raw_content(txt_path)
    cleaned_file_name = '%03d.txt'%(int(txtfile.split('-')[0]))
    cleaned_file.to_csv('%s/%s'%(cleaned_gt_dir, cleaned_file_name), index=False)

# Procecss 2023-11-21 case

In [9]:
pred_files_dir =  r'D:\ShanghaiASD_project\gazefollow_pattern_checkup\pred_stats\2023-11-21'

In [10]:
kid_id = '109'
pred_dir = '%s/kid_look_at_teacher/%s'%(pred_files_dir, kid_id)
print(os.listdir(pred_dir))

['109小鸟.xlsx', '109瓢虫1号.xlsx', '109瓢虫2号.xlsx', '109考拉.xlsx', '109袋鼠.xlsx']


In [11]:
activity_dir = r'D:\ShanghaiASD_project\gazefollow_pattern_checkup\activity_mapping'
a_file = '%s/%s_activity.csv'%(activity_dir, kid_id)
a_df = pd.read_csv(a_file)

gt_df = pd.read_csv('%s/%s.txt'%(cleaned_gt_dir, kid_id))

stats_header = ['camera', 'activity', 'acc', 'recall', 'precision']
pd_stats = []
for cam in os.listdir(pred_dir):
    if cam[-4:]!='xlsx':
        continue
    cam_name = cam[:-5]
    pred_cam_df = pd.read_excel('%s/%s'%(pred_dir, cam))
    gt_kid_at_teacher = gt_df[gt_df.SXC=='face'].dropna(subset=['activity'])
    for i in range(len(a_df)):
        start_f, end_f, act = a_df.iloc[i]
        total_frames = range(start_f, end_f)
        a_gt = gt_kid_at_teacher[gt_kid_at_teacher.activity==act]
        a_pred = pred_cam_df[pred_cam_df.activity==act]
        acc_score, recall, precision = obtain_score_per_act(a_pred, a_gt)
#         print("At activity: %s, Acc: %.2f, Recall: %.2f, Precision: %.2f"%(act, acc_score*100, recall*100, precision*100))
        pd_stats.append([cam_name, act, acc_score*100, recall*100, precision*100])


In [12]:
pd.DataFrame(pd_stats, columns=stats_header)

Unnamed: 0,camera,activity,acc,recall,precision
0,109小鸟,Bamboo,67.965077,100.0,5.544554
1,109小鸟,Puzzle1,99.871918,60.0,60.000000
2,109小鸟,RJAposterA,100.000000,,
3,109小鸟,RJAposterB,95.294118,,0.000000
4,109小鸟,RJAduck,94.736842,,0.000000
...,...,...,...,...,...
60,109袋鼠,RJAposterC,24.056604,,0.000000
61,109袋鼠,RJAposterD,99.310345,,0.000000
62,109袋鼠,RJAcar,61.001789,,0.000000
63,109袋鼠,IJAcar,72.967374,,0.000000


In [150]:
gt_kid_at_teacher = gt_df[gt_df.SXC=='face'].dropna(subset=['activity'])
stats_required = ['camera', 'activity', 'acc', 'recall', 'precision']
pd_stats = []
for i in range(len(a_df)):
    start_f, end_f, act = a_df.iloc[i]
    total_frames = range(start_f, end_f)
    a_gt = gt_kid_at_teacher[gt_kid_at_teacher.activity==act]
    a_pred = pred_cam_df[pred_cam_df.activity==act]
    acc_score, recall, precision = obtain_score_per_act(a_pred, a_gt)
    print("At activity: %s, Acc: %.2f, Recall: %.2f, Precision: %.2f"%(act, acc_score*100, recall*100, precision*100))
    pd_stats.append([cam_name, act, acc_score*100, recall*100, precision*100])
start_f, end_f, act

At activity: Bamboo, Acc: 67.97, Recall: 100.00, Precision: 5.54
At activity: Puzzle1, Acc: 99.87, Recall: 60.00, Precision: 60.00
At activity: RJAposterA, Acc: 100.00, Recall: nan, Precision: nan
At activity: RJAposterB, Acc: 95.29, Recall: nan, Precision: 0.00
At activity: RJAduck, Acc: 94.74, Recall: nan, Precision: 0.00
At activity: IJAduck, Acc: 99.98, Recall: nan, Precision: 0.00
At activity: peekaboo, Acc: 99.50, Recall: nan, Precision: 0.00
At activity: Puzzle2, Acc: 100.00, Recall: nan, Precision: nan
At activity: RJAposterC, Acc: 40.09, Recall: nan, Precision: 0.00
At activity: RJAposterD, Acc: 100.00, Recall: nan, Precision: nan
At activity: RJAcar, Acc: 99.11, Recall: nan, Precision: 0.00
At activity: IJAcar, Acc: 99.64, Recall: nan, Precision: 0.00
At activity: Bubble, Acc: 65.67, Recall: nan, Precision: 0.00


(17900, 22986, 'Bubble')

In [152]:
pd.DataFrame(pd_stats, columns=stats_required)

Unnamed: 0,camera,activity,acc,recall,precision
0,109小鸟,Bamboo,67.965077,100.0,5.544554
1,109小鸟,Puzzle1,99.871918,60.0,60.0
2,109小鸟,RJAposterA,100.0,,
3,109小鸟,RJAposterB,95.294118,,0.0
4,109小鸟,RJAduck,94.736842,,0.0
5,109小鸟,IJAduck,99.975137,,0.0
6,109小鸟,peekaboo,99.501247,,0.0
7,109小鸟,Puzzle2,100.0,,
8,109小鸟,RJAposterC,40.09434,,0.0
9,109小鸟,RJAposterD,100.0,,


In [148]:
def collect_lists(gt_df, pred_df):
    gt_ls = []
    for i in gt_df.index:
        start_f = gt_df.at[i, 'start_frame']
        end_f = gt_df.at[i, 'end_frame']
        for f in range(start_f, end_f):
            gt_ls.append(f)
    
    pred_ls = []
    for j in pred_df.index:
        start_f = pred_df.at[j, 'start_frame']
        end_f = pred_df.at[j, 'end_frame']
        for f in range(start_f, end_f):
            pred_ls.append(f)
    return gt_ls, pred_ls
            
def obtain_score_per_act(a_pred, a_gt):
    gt_ls, pred_ls = collect_lists(a_gt, a_pred)        
    tp_ls = []
    fp_ls = []
    fn_ls = []
    tn_ls = []
    for f in total_frames:
        if f in pred_ls: # a predicted positive
            if f in gt_ls: # true positive
                tp_ls.append(f)
            else: # false positive
                fp_ls.append(f)
        else: # a predicited negative
            if f in gt_ls: # false negative
                fn_ls.append(f)
            else: # true negative
                tn_ls.append(f)
    assert len(tp_ls)+len(fp_ls)+len(fn_ls)+len(tn_ls)==len(total_frames)
    acc_score = (len(tp_ls)+len(tn_ls))/len(total_frames)
    
    if len(gt_ls)==0:
        recall = np.nan
    else:
        recall = len(tp_ls)/(len(tp_ls)+len(fn_ls))
    if len(pred_ls)==0:
        precision = np.nan
    else:
        precision = len(tp_ls)/(len(tp_ls)+len(fp_ls))
    
    return acc_score, recall, precision

In [104]:
a_gt

Unnamed: 0,activity,stimulus,start(ss.msec),end(ss.msec),start_frame,end_frame,SXC,SXA
26,Bamboo,trial-landing,49.727,50.657,1491,1519,face,


In [136]:
gt_ls, pred_ls = collect_lists(a_gt, a_pred)

In [137]:
pred_ls

[352,
 353,
 354,
 355,
 356,
 357,
 358,
 359,
 360,
 361,
 362,
 363,
 364,
 403,
 404,
 405,
 406,
 407,
 408,
 409,
 410,
 411,
 412,
 413,
 414,
 415,
 416,
 417,
 418,
 419,
 420,
 421,
 422,
 423,
 424,
 425,
 426,
 427,
 428,
 429,
 430,
 431,
 432,
 433,
 434,
 435,
 436,
 437,
 438,
 439,
 440,
 441,
 442,
 443,
 444,
 445,
 446,
 447,
 448,
 449,
 450,
 451,
 452,
 453,
 454,
 455,
 456,
 457,
 458,
 459,
 460,
 461,
 462,
 463,
 464,
 465,
 466,
 467,
 468,
 469,
 470,
 471,
 472,
 473,
 474,
 475,
 476,
 477,
 478,
 479,
 480,
 481,
 482,
 483,
 484,
 485,
 486,
 487,
 488,
 489,
 490,
 491,
 492,
 493,
 494,
 495,
 496,
 497,
 498,
 499,
 500,
 501,
 502,
 503,
 504,
 505,
 506,
 507,
 508,
 509,
 510,
 511,
 512,
 513,
 514,
 515,
 516,
 517,
 518,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 529,
 530,
 531,
 532,
 533,
 534,
 535,
 536,
 537,
 538,
 539,
 540,
 636,
 637,
 638,
 639,
 640,
 641,
 642,
 643,
 644,
 645,
 646,
 647,
 648,
 649,
 650,
 651

In [114]:
len(fp_ls)

477