In [56]:
%run set_environment.py

## vars established by set_environment.py
# base_dir - root directory for working files
# image_folder - fully qualified folder where images are stored
# image_metadata_folder - fully qualified folder where image_metadata is stored
# sqlite3_dbname - fully qualified database file name
# output_folder - default location to write output files

Folder 'D://projects_working_directories//imagery_analysis//20250210_indonesia//' already exists.
Folder 'D://projects_working_directories//imagery_analysis//20250210_indonesia////images' already exists.
Folder 'D://projects_working_directories//imagery_analysis//20250210_indonesia////images_metadata' already exists.
Folder 'D://projects_working_directories//imagery_analysis//20250210_indonesia////output' already exists.


In [57]:
import pandas as pd

In [58]:
metadata_file = 'sample_image_metadata.xlsx'
detection_file = 'sample_image_detections.xlsx'
df_metadata = pd.read_excel(f"{image_metadata_folder}//{metadata_file}")
df_detections = pd.read_excel(f"{image_metadata_folder}//{detection_file}")


In [59]:
df_consolidated_detections = df_detections[['image_id','detection_label','percent_of_image']].groupby(['image_id','detection_label']).sum('percent_of_image') \
    .sort_values(by=['image_id','percent_of_image'], ascending=False).reset_index()

In [60]:
def separate_detection_levels(detection_label):
    hierarchy = detection_label.split('--')
    hierarchy.extend([''] * (5 - len(hierarchy)))
    return pd.Series(hierarchy)

df_consolidated_detections[['detection_lvl_0','detection_lvl_1','detection_lvl_2','detection_lvl_3','detection_lvl_4']] = df_consolidated_detections.detection_label.apply(separate_detection_levels)

In [61]:
df_consolidated_detections = df_consolidated_detections[['image_id','detection_label','detection_lvl_0','detection_lvl_1','detection_lvl_2','detection_lvl_3','detection_lvl_4','percent_of_image']]

In [62]:
df_consolidated_detections[df_consolidated_detections.image_id == 1192053898413677]

Unnamed: 0,image_id,detection_label,detection_lvl_0,detection_lvl_1,detection_lvl_2,detection_lvl_3,detection_lvl_4,percent_of_image
0,1192053898413677,nature--vegetation,nature,vegetation,,,,21.245164
1,1192053898413677,construction--structure--building,construction,structure,building,,,13.577369
2,1192053898413677,void--dynamic,void,dynamic,,,,10.744681
3,1192053898413677,void--static,void,static,,,,9.426015
4,1192053898413677,void--unlabeled,void,unlabeled,,,,9.0353
5,1192053898413677,object--wire-group,object,wire-group,,,,8.595745
6,1192053898413677,void--ground,void,ground,,,,8.546905
7,1192053898413677,construction--barrier--fence,construction,barrier,fence,,,6.593327
8,1192053898413677,nature--sky,nature,sky,,,,5.860735
9,1192053898413677,object--support--pole,object,support,pole,,,4.639749


## different ways to search or aggregate

In [63]:
## return all detection_labels that are in the "construction" category
df_consolidated_detections[df_consolidated_detections.detection_label.str.contains('construction')]

Unnamed: 0,image_id,detection_label,detection_lvl_0,detection_lvl_1,detection_lvl_2,detection_lvl_3,detection_lvl_4,percent_of_image
1,1192053898413677,construction--structure--building,construction,structure,building,,,13.577369
7,1192053898413677,construction--barrier--fence,construction,barrier,fence,,,6.593327
11,1192053898413677,construction--flat--sidewalk,construction,flat,sidewalk,,,1.367505
14,935066530801561,construction--structure--building,construction,structure,building,,,14.391951
17,935066530801561,construction--flat--road,construction,flat,road,,,11.608774
36,817255772895577,construction--structure--building,construction,structure,building,,,20.393683
38,817255772895577,construction--flat--road,construction,flat,road,,,10.99436
44,817255772895577,construction--barrier--wall,construction,barrier,wall,,,3.702764
51,817255772895577,construction--barrier--fence,construction,barrier,fence,,,0.797518
56,817255772895577,construction--flat--sidewalk,construction,flat,sidewalk,,,0.170897


In [64]:
## what percent of image_id x is taken up by "fence" or "sidewalk"?
df_consolidated_detections[(df_consolidated_detections.detection_lvl_2.isin(['fence','sidewalk'])) & \
    (df_consolidated_detections.image_id == 1192053898413677)]

Unnamed: 0,image_id,detection_label,detection_lvl_0,detection_lvl_1,detection_lvl_2,detection_lvl_3,detection_lvl_4,percent_of_image
7,1192053898413677,construction--barrier--fence,construction,barrier,fence,,,6.593327
11,1192053898413677,construction--flat--sidewalk,construction,flat,sidewalk,,,1.367505


In [65]:
## what percent of image_id x is taken up by "fence" or "sidewalk"?
df_consolidated_detections['percent_of_image'][(df_consolidated_detections.detection_lvl_2.isin(['fence','sidewalk'])) & \
    (df_consolidated_detections.image_id == 1192053898413677)].sum()

7.960831721470019

In [68]:
df_consolidated_detections.to_excel(f"{image_metadata_folder}//sample_consolidated_detections.xlsx", index=False)

In [71]:
df_consolidated_detections[['image_id','detection_label','percent_of_image']][(df_consolidated_detections.image_id == 1192053898413677) & \
    (df_consolidated_detections.detection_label == 'nature--vegetation')]

Unnamed: 0,image_id,detection_label,percent_of_image
0,1192053898413677,nature--vegetation,21.245164
