In [1]:
# 总目标：分析VoTT标注结果，汇总质量管理信息
import pandas as pd
import numpy as np
import os
from collections import defaultdict

In [2]:
# 输入数据
csv_dir = "/hdd02/zhangyiyang/dataset_summary/labels"  # csv 文件所在路径，当前目录以及子目录下所有csv文件作为输入
videos_dir = "./renames/step1"  # 视频所在路径
# 输入数据（每个csv的结构都一样，结果如下）
# 每一行有5个元素，分别是 image xmin ymin xmax ymax label
# image 是图片名，或者视频名+时间点（如 `close_2m_stand_dimlight_shortsleeve_frontshot_withshelter_CJ01_P0037.mp4#t=1.933333`）
# 每一个bbox可能对应多个label

# 一共有三类bbox
# 第一类：目标人物中间帧，包括四类标签
# 第二类：目标任务结尾帧，包括三类标签（不包括质量管理）
# 第三类：非目标人物，进包括`人体姿态`标签

# label的类别包括四类：
# 1. 帧种类(单选)：中间帧(medium)，结尾帧(end)
# 2. 人体姿态(单选): 站(stand)、坐(sit)、蹲(squat)、躺(lie)、中间状态(half_lie)
# 3. 要求行为(单选)：原地软倒(stillfall)、行进软倒(walkingfall)、推倒(pushfall)、绊倒(tripfall)、拿手机(takephone)、拿水杯(takecup)、吃饭(eat)、喝水(drink)、吃药(medicine)、磕碰(knock)、关门(close)、开门(open)
# 4. 质量管理(多选，但如果选择了qualified，其他的不能选择)：合格(qualified)、画面缺失(err_camera_lacking)、视频缺失(err_action_lacking)、光线错误(err_light)、姿态错误(err_pose)、衣着错误(err_sleeve)、视角错误(err_view)、遮挡错误(err_shelter)、未知错误(err_unknown)

frame_type = {'medium', 'end'}
pose_type = {'stand', 'sit', 'squat', 'lie', 'half_lie'}
action_type = {
    'stillfall', 'walkingfall', 'pushoverfall', 'tripfall', 'takephone',
    'takecup', 'eating', 'drinking', 'medicine', 'knock', 'close', 'open'
}
quality_type = {
    'qualified', 'err_camera_lacking', 'err_action_lacking', 'err_light',
    'err_pose', 'err_sleeve', 'err_view', 'err_shelter', 'err_unknown'
}
total_type = frame_type | pose_type | action_type | quality_type


In [3]:
# 输出数据
labeling_error_csv = "./labeling_error.csv"  # 输出的 标记错误 集合
not_or_wrong_labeled_csv = "./not_or_wrong_labeled.csv"  # 漏标或错标结果集合
summary_csv = "./summary.csv"  # 汇总结果

# summary 输出数据
# 每行的名称由两个部分组成：pid(形如`P0001`)与camera(2m/3m/4m)，行名总体形式为`P0001_2m`
# 列名即label标签中 `要求行为` 共12个动作
# 元素内容为质量管理标签：要么是合格，要么有其他各种错误，要么是视频缺失，要么是标注缺失

# not_or_wrong_labeled 输出数据
# 每行包括两个数据，`{pid}_{camera}` 和 action_name，表示对应的数据标记错误

# labeling_error_csv 输出数据
# 标记错误的细节，包括两列，第一列为文件名称，第二列为错误细节

In [4]:
# 第一步：拼接多个csv文件，后续只处理摄像头为 2m/3m/4m 的数据

def _search_csv(src_dir):
    df_list = []
    for file_name in os.listdir(src_dir):
        full_file_name = os.path.join(src_dir, file_name)
        if os.path.isdir(full_file_name):
            df_list += _search_csv(full_file_name)
        elif not file_name.endswith(".csv"):
            continue
        else:
            df_list.append(pd.read_csv(full_file_name))
    return df_list

df = pd.concat(_search_csv(csv_dir))
df['camera'] = df['image'].str.split('_').map(lambda x:x[1])
df = df[(df['camera'] == '2m') | (df['camera'] == '3m') | (df['camera'] == '4m')]
df.head()

Unnamed: 0,image,xmin,ymin,xmax,ymax,label,camera
0,close_2m_stand_dimlight_shortsleeve_frontshot_...,71.640909,415.379441,235.757618,1080.0,stand,2m
1,close_2m_stand_dimlight_shortsleeve_frontshot_...,237.335307,397.850329,535.780903,1075.692434,err_unknown,2m
2,close_2m_stand_dimlight_shortsleeve_frontshot_...,237.335307,397.850329,535.780903,1075.692434,stand,2m
3,close_2m_stand_dimlight_shortsleeve_frontshot_...,237.335307,397.850329,535.780903,1075.692434,medium,2m
4,close_2m_stand_dimlight_shortsleeve_frontshot_...,237.335307,397.850329,535.780903,1075.692434,close,2m


In [5]:
# 第二步：综合所有bbox的数据
# 每个bbox可能有多个标签，但csv数据一行只对应一个标签，所以要进行综合
# 输出 df 每行代表一个bbox，包括的列有
# 'image', 'action_labels', 'quality_labels', 'pose_labels', 'unknown_labels', 'is_medium', 'label_err'
# 文件名，要求行为标签，质量标签，姿态标签，未知标签，是否是中间帧，打标签的错误

df['bbox'] = df['image'] + df['xmin'].astype(str) + df['ymin'].astype(
    str) + df['xmax'].astype(str) + df['ymax'].astype(str)
df['is_medium'] = df['label'].map(lambda x: 1 if x == 'medium' else 0)


def labeling_error(action_labels, pose_labels, quality_labels, unknown_labels,
                   frame_labels, is_medium, is_unqualified):
    # 标记可能出现的错误
    # 1. 中间帧不包含4类标签。
    # 2. 存在未知标签。
    result = 0
    if is_unqualified:
        return result
    if len(pose_labels) != 1:
        result |= 0b1
    if len(unknown_labels) > 1:
        result |= 0b10
    if len(frame_labels) > 1:
        result |= 0b100
    if len(frame_labels) == 0:
        return result
    if len(action_labels) != 1:
        result |= 0b1000
    if not is_medium:
        return result
    if len(quality_labels) == 0 | (len(quality_labels) > 1
                                   and 'qualified' in quality_labels):
        result |= 0b10000
    return result


def group_by_bbox(x):
    """
    根据 bbox 进行group操作，获取group后每行的结果
    'image', 'action_labels', 'quality_labels', 'pose_labels', 'unknown_labels', 'is_medium', 'label_err'
    文件名，要求行为标签，质量标签，姿态标签，未知标签，是否是中间帧，打标签的错误

    """
    img_name = x.iloc[0, 0]
    cur_action_labels = {img_name.split("_")[0]} & action_type

    # 获取各类标签
    labels = set(list(x['label']))
    cur_quality_labels = labels & quality_type
    cur_pose_labels = labels & pose_type
    cur_unknown_labels = labels - total_type
    cur_frame_labels = labels & frame_type

    # 当前bbox是否存在问题
    is_unqualified = False
    if len(cur_quality_labels) >= 1 and 'qualified' not in cur_quality_labels:
        is_unqualified = True
    # 当前bbox是否是中间帧

    is_medium = np.array(x['is_medium']).sum()

    # 判断labels是否符合要求
    label_err = labeling_error(cur_action_labels, cur_pose_labels,
                               cur_quality_labels, cur_unknown_labels,
                               cur_frame_labels, is_medium, is_unqualified)
    return pd.Series([
        img_name, 
        ' '.join(cur_action_labels), 
        ' '.join(cur_quality_labels),
        ' '.join(cur_pose_labels), 
        ' '.join(cur_unknown_labels), 
        is_medium,
        label_err
    ])


group_by_bbox_df = df.groupby("bbox").apply(group_by_bbox)
group_by_bbox_df.columns = [
    'image', 'action_labels', 'quality_labels', 'pose_labels',
    'unknown_labels', 'is_medium', 'label_err'
]
group_by_bbox_df.head()

Unnamed: 0_level_0,image,action_labels,quality_labels,pose_labels,unknown_labels,is_medium,label_err
bbox,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
close_2m_stand_coldlight_longsleeve_frontshot_withshelter_CJ01_P0001.mp4#t=1.266667287.38274666849577169.41176470588258825.9354689290061080.0,close_2m_stand_coldlight_longsleeve_frontshot_...,close,err_shelter,stand,,1,0
close_2m_stand_coldlight_longsleeve_frontshot_withshelter_CJ01_P0001.mp4#t=1.266667405.61174551386637253.2717391304348590.40783034257751080.0,close_2m_stand_coldlight_longsleeve_frontshot_...,close,,stand,,0,0
close_2m_stand_coldlight_longsleeve_frontshot_withshelter_CJ01_P0006.mp4#t=0.1666671099.6002756719504201.300551470588261497.89110957960021065.5652573529412,close_2m_stand_coldlight_longsleeve_frontshot_...,close,qualified,stand,,1,0
close_2m_stand_coldlight_longsleeve_frontshot_withshelter_CJ01_P0011.mp4#t=1.2666671024.1764300482428106.006433823529421533.6181943487252947.7711397058824,close_2m_stand_coldlight_longsleeve_frontshot_...,close,err_camera_lacking,,,0,0
close_2m_stand_coldlight_longsleeve_frontshot_withshelter_CJ01_P0011.mp4#t=2961.9848380427292157.62408088235291634.18332184700221041.7417279411766,close_2m_stand_coldlight_longsleeve_frontshot_...,close,err_action_lacking,,,0,0


In [6]:
# 第三步：获取标注错误，给各位标注的同事


def _get_error_details(x):
    """
    获取标注错误详情
    """
    results = ''
    if x & 0b1:
        results += '人体姿态标签要么没有，要么不止一个。' + ' '
    if x & 0b10:
        results += '存在未知标签，请查看 unknown_labels 列信息。' + ' '
    if x & 0b100:
        results += '帧种类标签要么没有，要么不止一个。' + ' '
    if x & 0b1000:
        results += '中间帧要求行为标签要么没有，要么不止一个。' + ' '
    if x & 0b10000:
        results += '中间帧质量标签要么没标，要么在有qualified标签的情况下还有其他错误标签。' + ' '

    return results


labeling_error_df = group_by_bbox_df[group_by_bbox_df['label_err'] != 0]
labeling_error_df['details'] = labeling_error_df['label_err'].apply(
    _get_error_details)
labeling_error_df.to_csv(labeling_error_csv, index=False, columns=['image', 'details'])

In [7]:
# 第四步：获取反馈结果
editing_df = group_by_bbox_df[(group_by_bbox_df['label_err'] == 0)
                              & group_by_bbox_df['is_medium']]
editing_df['index_name'] = editing_df['image'].str.split('_').map(
    lambda x: x[-1][:5] + '_' + x[1])
# key: {pid}_{camera}
# value: list(actions)
total_samples = defaultdict(list)

def _go_through_videos(src_dir):
    if not os.path.isdir(src_dir):
        return
    for file_name in os.listdir(src_dir):
        cur_file = os.path.join(src_dir, file_name)
        if os.path.isdir(cur_file):
            _go_through_videos(cur_file)
        elif file_name.endswith(".mp4"):
            row = file_name.split("_")
            action = row[0]
            camera = row[1]
            person = row[-1][:5]
            key = "{}_{}".format(person, camera)
            total_samples[key].append(action)

_go_through_videos(videos_dir)
actions = list(action_type)


def _generate_action_results(x):
    """
    获取每行(pid+camera)、每列(12个动作)的质量管理结果
    """
    data = [''] * 12
    img_split = x['image'][0].split("_")
    key = img_split[-1][:5] + "_" + img_split[1]
    for row in x.iterrows():
        data[actions.index(row[1][1])] += row[1][2] + ' '
    for idx, ele in enumerate(data):
        if ele == '':
            if actions[idx] in total_samples[key]:
                data[idx] = 'Not Labeled or Wrong Labeled'
            else:
                data[idx] = 'No Data'
    return pd.Series(data)


summary_df = editing_df.groupby('index_name').apply(_generate_action_results)
summary_df.columns = actions
for column in actions:
    summary_df[column] = summary_df[column].str.strip()
summary_df.head()
summary_df.to_csv(summary_csv)

In [8]:
# 第五步：漏标或错标集合
not_labeled_df = (summary_df == 'Not Labeled or Wrong Labeled')
writer = open(not_or_wrong_labeled_csv, "w")
for row in not_labeled_df.iterrows():
    for action, not_labled_flag in zip(list(row[1].index), list(row[1].values)):
        if not_labled_flag:
            writer.write('{} {}\n'.format(row[0], action))
writer.close()

In [9]:
# 第六步：给定数据的采集/编辑准确率
qualified_cnt = (summary_df == 'qualified').astype(int).sum().sum()
not_labeled = ['No Data', 'Not Labeled or Wrong Labeled']


def test(x):
    cur_cnt = np.sum([1 if e in not_labeled else 0 for e in list(x)])
    return pd.Series([cur_cnt])


total_cnt = len(summary_df) * len(summary_df.columns) - summary_df.apply(test).sum().sum()
qualified_cnt, total_cnt, qualified_cnt * 1.0 / total_cnt

(282, 379, 0.7440633245382586)