# 学習/評価のデータで分割を実施する
2023/12/24時点のアノテーションを用います。  
`crassone-analysis/data/annotation/`にlabelstudioからexportしたjsonファイルを置いてください。

3-cross-validationを実施して、  
`crassone-analysis/data/outputs/01.format_and_cv/`に`画像データの相対パス`・`アノテーション`・`撮影環境`・`バリデーション値`を持ったcsvを配置します。  
また、`画像データの相対パスキー` + `アノテーション`のjsonを配置します。

In [1]:
from copy import copy
import glob
import json
import os
import pandas as pd
from sklearn.model_selection import KFold

In [2]:
pattern_date = '2023-12-24'
n_splits = 3
bucket_name = 's3://crassone-annotation-202310-202401/'
root_dir = "/".join(os.getcwd().split("/")[:-1])
anno_info_dir = f'{root_dir}/data/annotation'
output_dir = f'{root_dir}/data/outputs/01.format_and_cv'

In [3]:
anno_info_path = [path for path in glob.glob(f'{anno_info_dir}/*.json') if pattern_date in path and 'project' in path][0]
anno_dict = json.load(open(anno_info_path))

In [4]:
anno_dict_not_cancelled = [anno for anno in anno_dict if not anno['annotations'][0]['was_cancelled']]

In [5]:
anno_dict_not_cancelled_df = pd.DataFrame(
    [[anno['data']['image'], anno['annotations'][0]['result']] for anno in anno_dict_not_cancelled],
    index=[anno['id'] for anno in anno_dict_not_cancelled],
    columns=['s3_path', 'annotation']
)

anno_dict_not_cancelled_df['learning_indoor'] = anno_dict_not_cancelled_df['s3_path'].apply(
    lambda x: 'for-learning' in x and not ('2023-11-19-omaezaki-500' in x or '2023-11-23-mie-safetybelt' in x)
)
anno_dict_not_cancelled_df['learning_outdoor'] = anno_dict_not_cancelled_df['s3_path'].apply(
    lambda x: 'for-learning' in x and ('2023-11-19-omaezaki-500' in x or '2023-11-23-mie-safetybelt' in x)
)
anno_dict_not_cancelled_df['fixed_point_camera_indoor'] = anno_dict_not_cancelled_df['s3_path'].apply(
    lambda x: 'fixed-point-camera' in x and 'indoor' in x
)
anno_dict_not_cancelled_df['fixed_point_camera_outdoor'] = anno_dict_not_cancelled_df['s3_path'].apply(
    lambda x: 'fixed-point-camera' in x and 'outdoor' in x
)
anno_dict_not_cancelled_df['evaluation'] = anno_dict_not_cancelled_df['s3_path'].apply(lambda x: 'evaluation' in x)

anno_dict_learning_indoor_df = anno_dict_not_cancelled_df[anno_dict_not_cancelled_df['learning_indoor']]
anno_dict_learning_outdoor_df = anno_dict_not_cancelled_df[anno_dict_not_cancelled_df['learning_outdoor']]
anno_dict_fixed_point_camera_indoor_df = anno_dict_not_cancelled_df[anno_dict_not_cancelled_df['fixed_point_camera_indoor']]
anno_dict_fixed_point_camera_outdoor_df = anno_dict_not_cancelled_df[anno_dict_not_cancelled_df['fixed_point_camera_outdoor']]
anno_dict_evaluation_df = anno_dict_not_cancelled_df[anno_dict_not_cancelled_df['evaluation']]

In [6]:
validation_idx = {
    'learning_indoor': [],
    'anno_dict_learning_outdoor_df': [],
    'anno_dict_fixed_point_camera_indoor_df': [],
    'anno_dict_fixed_point_camera_outdoor_df': [],
}

kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

for train_index, test_index in kf.split(anno_dict_learning_indoor_df):
    validation_idx['learning_indoor'] += [test_index]
for train_index, test_index in kf.split(anno_dict_learning_outdoor_df):
    validation_idx['anno_dict_learning_outdoor_df'] += [test_index]
for train_index, test_index in kf.split(anno_dict_fixed_point_camera_indoor_df):
    validation_idx['anno_dict_fixed_point_camera_indoor_df'] += [test_index]
for train_index, test_index in kf.split(anno_dict_fixed_point_camera_outdoor_df):
    validation_idx['anno_dict_fixed_point_camera_outdoor_df'] += [test_index]

In [7]:
anno_dict_add_validation_df_list = []
for i in range(n_splits):
    _anno_dict_add_validation_df = pd.concat([
        anno_dict_learning_indoor_df.iloc[validation_idx['learning_indoor'][i]],
        anno_dict_learning_outdoor_df.iloc[validation_idx['anno_dict_learning_outdoor_df'][i]],
        anno_dict_fixed_point_camera_indoor_df.iloc[validation_idx['anno_dict_fixed_point_camera_indoor_df'][i]],
        anno_dict_fixed_point_camera_outdoor_df.iloc[validation_idx['anno_dict_fixed_point_camera_outdoor_df'][i]],
    ])
    _anno_dict_add_validation_df['validation'] = i
    anno_dict_add_validation_df_list.append(_anno_dict_add_validation_df)

_anno_dict_evaluation_df = copy(anno_dict_evaluation_df)
_anno_dict_evaluation_df["validation"] = 999
anno_dict_add_validation_df_list.append(_anno_dict_evaluation_df)
anno_dict_add_validation_df = pd.concat(anno_dict_add_validation_df_list)
anno_dict_add_validation_df

Unnamed: 0,s3_path,annotation,learning_indoor,learning_outdoor,fixed_point_camera_indoor,fixed_point_camera_outdoor,evaluation,validation
1725,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0
1726,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0
1728,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0
1729,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0
1734,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0
...,...,...,...,...,...,...,...,...
40234,s3://crassone-annotation-202310-202401/evaluat...,[],False,False,False,False,True,999
40292,s3://crassone-annotation-202310-202401/evaluat...,"[{'original_width': 1280, 'original_height': 9...",False,False,False,False,True,999
40293,s3://crassone-annotation-202310-202401/evaluat...,"[{'original_width': 1280, 'original_height': 9...",False,False,False,False,True,999
40294,s3://crassone-annotation-202310-202401/evaluat...,"[{'original_width': 1280, 'original_height': 9...",False,False,False,False,True,999


In [8]:
labels_list = [
    "helmet",
    "unsafe-helmet (no chin strap)",
    "unsafe-helmet (inadequate covering)",
    "no-helmet",
    "helmet unknown (small)",
    "helmet unknown (occlusion)",
    "safety belt",
    "person",
    "person in high place",
    "no person",
]

In [9]:
def has_label(annotation, target_label):
    for anno in annotation:
        if target_label in anno['value']['rectanglelabels']:
            return True
    return False

for label in labels_list:
    if label == 'no person':
        anno_dict_add_validation_df[label] = anno_dict_add_validation_df["annotation"].apply(lambda x: not x)
    else:
        anno_dict_add_validation_df[label] = anno_dict_add_validation_df["annotation"].apply(lambda x: has_label(x, label))

In [10]:
val_list = [0, 1, 2, 999]
for label in labels_list:
    print('###############################')
    print(label)
    print()
    for spot in ['learning_indoor', 'learning_outdoor', 'fixed_point_camera_indoor', 'fixed_point_camera_outdoor', 'evaluation']:
        print(spot)
        print()
        
        for i in val_list:
            print(f'validation: {i}')
            display(pd.DataFrame(
                anno_dict_add_validation_df[
                    (anno_dict_add_validation_df[spot]) &(anno_dict_add_validation_df['validation'] == i)
                ][label].value_counts()
            ))
            print()

###############################
helmet

learning_indoor

validation: 0


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,215
True,208



validation: 1


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
True,220
False,203



validation: 2


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
True,211
False,211



validation: 999


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,202
True,59



validation: 1


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,204
True,57



validation: 2


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,207
True,53



validation: 999


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,50
True,3



validation: 1


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,51
True,1



validation: 2


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,51
True,1



validation: 999


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,180
True,9



validation: 1


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,180
True,9



validation: 2


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,173
True,15



validation: 999


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
helmet,Unnamed: 1_level_1
False,82
True,8



###############################
unsafe-helmet (no chin strap)

learning_indoor

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,378
True,45



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,387
True,36



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,397
True,25



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,172
True,89



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,164
True,97



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,171
True,89



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,50
True,3



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,50
True,2



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,51
True,1



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,180
True,9



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,182
True,7



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,176
True,12



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (no chin strap),Unnamed: 1_level_1
False,89
True,1



###############################
unsafe-helmet (inadequate covering)

learning_indoor

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,422
True,1



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,423



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,422



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,204
True,57



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,215
True,46



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,195
True,65



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
True,42
False,11



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
True,42
False,10



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
True,41
False,11



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,117
True,72



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,113
True,76



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,96
True,92



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
unsafe-helmet (inadequate covering),Unnamed: 1_level_1
False,61
True,29



###############################
no-helmet

learning_indoor

validation: 0


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,362
True,61



validation: 1


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,354
True,69



validation: 2


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,342
True,80



validation: 999


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,245
True,16



validation: 1


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,240
True,21



validation: 2


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,250
True,10



validation: 999


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,49
True,4



validation: 1


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,50
True,2



validation: 2


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,50
True,2



validation: 999


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,143
True,46



validation: 1


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,149
True,40



validation: 2


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,154
True,34



validation: 999


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
no-helmet,Unnamed: 1_level_1
False,80
True,10



###############################
helmet unknown (small)

learning_indoor

validation: 0


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,407
True,16



validation: 1


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,406
True,17



validation: 2


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,402
True,20



validation: 999


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,252
True,9



validation: 1


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,248
True,13



validation: 2


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,246
True,14



validation: 999


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,49
True,4



validation: 1


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,46
True,6



validation: 2


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,49
True,3



validation: 999


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,150
True,39



validation: 1


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,163
True,26



validation: 2


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,158
True,30



validation: 999


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
helmet unknown (small),Unnamed: 1_level_1
False,75
True,15



###############################
helmet unknown (occlusion)

learning_indoor

validation: 0


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,310
True,113



validation: 1


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,321
True,102



validation: 2


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,309
True,113



validation: 999


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,222
True,39



validation: 1


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,227
True,34



validation: 2


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,222
True,38



validation: 999


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
True,30
False,23



validation: 1


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
True,29
False,23



validation: 2


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
True,30
False,22



validation: 999


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,135
True,54



validation: 1


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,133
True,56



validation: 2


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,135
True,53



validation: 999


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
helmet unknown (occlusion),Unnamed: 1_level_1
False,66
True,24



###############################
safety belt

learning_indoor

validation: 0


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
True,350
False,73



validation: 1


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
True,342
False,81



validation: 2


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
True,369
False,53



validation: 999


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,142
True,119



validation: 1


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,143
True,118



validation: 2


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,131
True,129



validation: 999


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,53



validation: 1


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,52



validation: 2


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,52



validation: 999


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,189



validation: 1


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,189



validation: 2


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,188



validation: 999


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
safety belt,Unnamed: 1_level_1
False,83
True,7



###############################
person

learning_indoor

validation: 0


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,420
False,3



validation: 1


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,418
False,5



validation: 2


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,420
False,2



validation: 999


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,250
False,11



validation: 1


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,250
False,11



validation: 2


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,252
False,8



validation: 999


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,51
False,2



validation: 1


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,48
False,4



validation: 2


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,49
False,3



validation: 999


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,148
False,41



validation: 1


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,143
False,46



validation: 2


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
True,151
False,37



validation: 999


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
person,Unnamed: 1_level_1
False,53
True,37



###############################
person in high place

learning_indoor

validation: 0


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,423



validation: 1


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,423



validation: 2


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,422



validation: 999


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,250
True,11



validation: 1


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,250
True,11



validation: 2


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,252
True,8



validation: 999


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,53



validation: 1


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,52



validation: 2


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,52



validation: 999


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,165
True,24



validation: 1


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,157
True,32



validation: 2


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,164
True,24



validation: 999


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
person in high place,Unnamed: 1_level_1
False,62
True,28



###############################
no person

learning_indoor

validation: 0


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,420
True,3



validation: 1


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,418
True,5



validation: 2


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,420
True,2



validation: 999


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1



learning_outdoor

validation: 0


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,261



validation: 1


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,261



validation: 2


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,260



validation: 999


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1



fixed_point_camera_indoor

validation: 0


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,51
True,2



validation: 1


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,48
True,4



validation: 2


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,49
True,3



validation: 999


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1



fixed_point_camera_outdoor

validation: 0


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,162
True,27



validation: 1


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,165
True,24



validation: 2


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,167
True,21



validation: 999


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1



evaluation

validation: 0


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1



validation: 1


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1



validation: 2


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1



validation: 999


Unnamed: 0_level_0,count
no person,Unnamed: 1_level_1
False,57
True,33





In [11]:
anno_dict_add_validation_df["relative_img_path"] = anno_dict_add_validation_df["s3_path"].apply(lambda x: x.split(bucket_name)[-1])
anno_dict_add_validation_df["unique_key"] = anno_dict_add_validation_df["s3_path"].apply(lambda x: x.split(bucket_name)[-1].split(".")[0])

In [12]:
anno_dict_add_validation_df.head()

Unnamed: 0,s3_path,annotation,learning_indoor,learning_outdoor,fixed_point_camera_indoor,fixed_point_camera_outdoor,evaluation,validation,helmet,unsafe-helmet (no chin strap),unsafe-helmet (inadequate covering),no-helmet,helmet unknown (small),helmet unknown (occlusion),safety belt,person,person in high place,no person,relative_img_path,unique_key
1725,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0,True,False,False,False,False,False,True,True,False,False,for-learning/helmet-on-head/IMAG0006.JPG,for-learning/helmet-on-head/IMAG0006
1726,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0,False,False,False,False,False,True,True,True,False,False,for-learning/helmet-on-head/IMAG0007.JPG,for-learning/helmet-on-head/IMAG0007
1728,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0,False,False,False,False,False,True,True,True,False,False,for-learning/helmet-on-head/IMAG0009.JPG,for-learning/helmet-on-head/IMAG0009
1729,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0,False,False,False,False,False,True,True,True,False,False,for-learning/helmet-on-head/IMAG0010.JPG,for-learning/helmet-on-head/IMAG0010
1734,s3://crassone-annotation-202310-202401/for-lea...,"[{'original_width': 5664, 'original_height': 4...",True,False,False,False,False,0,False,False,False,False,False,True,False,True,False,False,for-learning/helmet-on-head/IMAG0328.JPG,for-learning/helmet-on-head/IMAG0328


In [13]:
def format_bbox(annotations):
    format_annos = []
    for anno in annotations:
        bbox = [
            anno['value']['x'] / 100 * anno['original_width'],
            anno['value']['y'] / 100 * anno['original_height'],
            (anno['value']['x'] + anno['value']['width']) / 100 * anno['original_width'],
            (anno['value']['y'] + anno['value']['height']) / 100 * anno['original_height'],
        ]
        
        if bbox[0] < 0:
            bbox[0] = 0
        if bbox[1] < 0:
            bbox[1] = 0
        if bbox[2] > anno['original_width']:
            bbox[2] = anno['original_width']
        if bbox[3] > anno['original_height']:
            bbox[3] = anno['original_height']
        
        for value in anno['value']['rectanglelabels']:
            format_annos.append({value: bbox})
    return format_annos

anno_dict_add_validation_df['format_annotation'] = anno_dict_add_validation_df['annotation'].apply(format_bbox)

In [14]:
annotation_list = []
for unique_key, relative_img_path, format_annotation, validation in (
    anno_dict_add_validation_df[['unique_key', 'relative_img_path', 'format_annotation', 'validation']].values):
    if len(format_annotation) == 0:
        annotation_list.append([unique_key, relative_img_path, None, None, None, None, None, validation])
    for anno in format_annotation:
        for key, value in anno.items():
            annotation_list.append([unique_key, relative_img_path, key] + value + [validation])

In [15]:
annotation_df = pd.DataFrame(
    annotation_list,
    columns=['unique_key', 'relative_img_path', 'label', 'left', 'top', 'right', 'bottom', 'validation']
)
annotation_df

Unnamed: 0,unique_key,relative_img_path,label,left,top,right,bottom,validation
0,for-learning/helmet-on-head/IMAG0006,for-learning/helmet-on-head/IMAG0006.JPG,helmet,2917.367124,1435.063828,3629.453867,2142.732403,0
1,for-learning/helmet-on-head/IMAG0006,for-learning/helmet-on-head/IMAG0006.JPG,safety belt,2867.396125,2121.918622,3741.888616,3970.182430,0
2,for-learning/helmet-on-head/IMAG0006,for-learning/helmet-on-head/IMAG0006.JPG,person,2979.830874,1426.738316,3725.231616,3982.670699,0
3,for-learning/helmet-on-head/IMAG0007,for-learning/helmet-on-head/IMAG0007.JPG,helmet unknown (occlusion),2950.889128,1413.968436,3627.642626,2066.100430,0
4,for-learning/helmet-on-head/IMAG0007,for-learning/helmet-on-head/IMAG0007.JPG,safety belt,2999.664155,2084.384505,3755.677072,4150.484935,0
...,...,...,...,...,...,...,...,...
8532,evaluation/2023-12-16-shizuokahamamatsu/202312...,evaluation/2023-12-16-shizuokahamamatsu/202312...,person,888.666733,532.802013,944.926433,658.215103,999
8533,evaluation/2023-12-16-shizuokahamamatsu/202312...,evaluation/2023-12-16-shizuokahamamatsu/202312...,helmet unknown (occlusion),888.055214,532.802013,905.789250,554.214004,999
8534,evaluation/2023-12-16-shizuokahamamatsu/202312...,evaluation/2023-12-16-shizuokahamamatsu/202312...,unsafe-helmet (inadequate covering),536.432090,497.319285,560.892830,521.790132,999
8535,evaluation/2023-12-16-shizuokahamamatsu/202312...,evaluation/2023-12-16-shizuokahamamatsu/202312...,person,823.924805,505.752075,877.813721,605.081543,999


In [16]:
annotation_df.to_csv(f'{output_dir}/annotation-{pattern_date}.csv', index=False)