In [1]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
from glob import glob

# 분석 대상 경로
ROOT_TRAIN = Path(r"D:\golfDataset\dataset\train")
ROOT_TEST  = Path(r"D:\golfDataset\dataset\test")
CATEGORIES = ['balanced_true', 'false']

def collect_keypoint_statistics(root_dir):
    results = []
    for label in CATEGORIES:
        path = root_dir / label / 'crop_keypoint'
        for csv_path in path.glob("*.csv"):
            df = pd.read_csv(csv_path)
            T = len(df)
            arr = df.values.reshape(T, 25, 3)
            coords = arr[:, :, :2]
            confs = arr[:, :, 2]
            result = {
                'split': root_dir.name,
                'label': label,
                'file': csv_path.name,
                'frames': T,
                'mean_x': coords[:,:,0].mean(),
                'mean_y': coords[:,:,1].mean(),
                'std_x':  coords[:,:,0].std(),
                'std_y':  coords[:,:,1].std(),
                'conf_mean': confs.mean(),
                'conf_std':  confs.std()
            }
            results.append(result)
    return pd.DataFrame(results)

df_train = collect_keypoint_statistics(ROOT_TRAIN)
df_test  = collect_keypoint_statistics(ROOT_TEST)
df_all = pd.concat([df_train, df_test], ignore_index=True)

# 요약
summary = df_all.groupby(['split', 'label']).agg({
    'frames': ['mean', 'std'],
    'mean_x': 'mean',
    'mean_y': 'mean',
    'std_x': 'mean',
    'std_y': 'mean',
    'conf_mean': 'mean',
    'conf_std': 'mean'
})
print(summary)


                         frames               mean_x    mean_y     std_x  \
                           mean        std      mean      mean      mean   
split label                                                                
test  balanced_true  169.511111  30.293802  0.091567  0.134576  0.478340   
      false          164.182482  31.547168  0.052550  0.118809  0.430992   
train balanced_true  187.146657  62.088682  0.062349  0.101669  0.456826   
      false          197.764706  67.610741  0.079889  0.117252  0.478723   

                        std_y conf_mean  conf_std  
                         mean      mean      mean  
split label                                        
test  balanced_true  1.331917  0.541361  0.251375  
      false          1.345588  0.535493  0.243918  
train balanced_true  1.341218  0.543732  0.251619  
      false          1.331488  0.564273  0.243516  
