## Objectives
### 1) Combine box and plate annotation line into 1 txt file per 1 photo
### 2) Combine box and plate annotation line into 1 detections csv file - 1 annotation 1 line
### 3) Create 1 summary file containing 
####    - total objects detected 
####    - box detected
####    - plate detected 
####    - top confidence 
####    - box confidence 
####    - plate confidence

In [2]:
!pip install pandas

Collecting pandas
  Using cached pandas-1.1.5-cp36-cp36m-manylinux1_x86_64.whl (9.5 MB)
Collecting pytz>=2017.2
  Using cached pytz-2022.1-py2.py3-none-any.whl (503 kB)
Collecting numpy>=1.15.4
  Using cached numpy-1.19.5-cp36-cp36m-manylinux2010_x86_64.whl (14.8 MB)
Installing collected packages: pytz, numpy, pandas
Successfully installed numpy-1.19.5 pandas-1.1.5 pytz-2022.1
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m


In [85]:
import os
import pandas as pd
import numpy as np
import re
import ast
import shutil

{0: 'Box', 1: 'Plate'}


In [122]:
def copytree(src, dst, symlinks=False, ignore=None):
    for item in os.listdir(src):
        s = os.path.join(src, item)
        d = os.path.join(dst, item)
        if os.path.isdir(s):
            shutil.copytree(s, d, symlinks, ignore)
        else:
            shutil.copy2(s, d)

def combine_annotation(trg_dict, trg_path ,imagefile_path):
    ## get image ids
    image_ids = [re.split('\.',f )[0] for f in os.listdir(imagefile_path) if os.path.isfile(os.path.join(imagefile_path, f))]
    
    ## prepare df schema
    coltype_list = [
        ('image_id', str),
        ('class_id', int),
        ('x_center', float),
        ('y_center', float),
        ('width', float),
        ('height', float),
        ('confidence', float)
        ]
    dtypes = np.dtype(coltype_list)
    trg_cols = [i[0] for i in coltype_list]
#     print(f"trg_cols:{trg_cols}")
#     df_collate = pd.DataFrame(np.empty(0, dtype=dtypes))
    collate_list = []
    image_no_annt_list = []
    ## iterate over image ids X class detection permutation
    for img in image_ids:
        image_lvl_list = []
        image_lvl_w_id_list = []
        ## iterate over box and plate detection of the same photo
        for key,val in trg_dict.items():
            filepath = os.path.join(val['detect_path'], f"{img}.txt")
            ## check whether there is any detection for the photo
            if os.path.exists(filepath):
                ## collect annotation line
                dataline = [ast.literal_eval(i) for i in 
                            re.split(' ',open(filepath, "r").read().splitlines()[0])]
                dataline[0] = val['class_id']
                image_lvl_list.append(dataline)
                
                dataline_w_id = [img]+dataline
#                 print(f"dataline_w_id: {dataline_w_id}")
                image_lvl_w_id_list.append(dataline_w_id)
#                 print(f"image_lvl_w_id_list: {image_lvl_w_id_list}")
                
            
        ## check whether there is at least 1 annotation from box or plate detection
#         print(f"image_lvl_list:{image_lvl_list}")
#         print(f"len(image_lvl_list): {len(image_lvl_list)}")
        if len(image_lvl_list) >0:
            trg_imglvl_filepath = os.path.join(trg_path, f"{img}.txt")
            img_textfile = open(trg_imglvl_filepath, "w")

            for element in image_lvl_list:
                img_textfile.write(str(element) + "\n")
            img_textfile.close()
            
            for element in image_lvl_w_id_list:
                collate_list.append(element)
#                 print(f"collate_list: {collate_list}")
        
        ## if no annoation for the image, collect image id into a list
        else:
#             print("no annotation for this image")
#             print(f"img:{img}")
            for key in trg_dict.keys():
                dataline_no_annt = [img, key, np.nan, np.nan, np.nan, np.nan, np.nan]
#                 print(f"dataline_no_annt head: {dataline_no_annt[:10]}")
                image_no_annt_list.append(dataline_no_annt)
#                 print(f"image_no_annt_list head: {image_no_annt_list[:10]}")

    collate_filepath = os.path.join(trg_path, f"collate.txt")
    collate_textfile = open(collate_filepath, "w")
    for element in collate_list:
        collate_textfile.write(str(element) + "\n")
    collate_textfile.close()
    
    combined_rows = collate_list + image_no_annt_list
#     print(f"image_no_annt_list head: {image_no_annt_list[:10]}")
#     print(f"combined_rows head: {combined_rows[:10]}")
    df_collate = pd.DataFrame(combined_rows,columns=trg_cols)
    df_collate['class_name'] = df_collate['class_id'].apply(lambda x: None if 
                                                         x not in class_dict.keys() else 
                                                         str(class_dict[x]).lower())
    
    ## group and transpose into image level sumry
    aggregations = {
    'detection_cnt':'sum',
    'confidence':'max'
    }
    aggcols = [i for i in aggregations.keys()]

    df_collate['detection_cnt'] = df_collate['confidence'].apply(lambda x: 0 if pd.isnull(x) else 1)

    df_imglvl = df_collate.groupby(
        by=['image_id', 'class_id', 'class_name'],as_index=False)[aggcols].agg(aggregations).pivot_table(
                index=['image_id'],
                columns=['class_name'],
                values=['detection_cnt', 'confidence'],
                aggfunc={'detection_cnt':np.sum, 'confidence':np.max},
                fill_value=0
            ).reset_index()
    
    print(f"df_imglvl columns: {df_imglvl.columns}")
    print(f"df_imglvl head: {df_imglvl.head}")
    df_imglvl[('detection_cnt','sum')] = df_imglvl['detection_cnt'].sum(axis=1)
    df_imglvl[('confidence','max')] = df_imglvl['confidence'].max(axis=1)
    
    ## re-order columns
    conf_cols = [i for i in df_imglvl.columns.values if i[0] == 'confidence']
    detection_cols = [i for i in df_imglvl.columns.values if i[0] == 'detection_cnt']
    rest_cols = [i for i in df_imglvl.columns.values if 
                 all([i not in col_list for col_list in [conf_cols,detection_cols]])]
    ordered_cols = rest_cols + detection_cols + conf_cols
    df_imglvl = df_imglvl[ordered_cols]

    detection_cnt_filepath = os.path.join(trg_path, 'detection_sumry.csv')
    df_imglvl.to_csv(detection_cnt_filepath)

In [124]:
if __name__ == "__main__":
    
    trg_dict = {
        'Box': {
            'detect_path':os.path.join(os.getcwd(),'runs/detect/boxmodel/exp/labels'),
            'class_id':0
        },
        'Plate': {
            'detect_path':os.path.join(os.getcwd(),'runs/detect/platemodel/exp/labels'),
            'class_id':1
        }
    }

    class_dict = {}
    for key,val in trg_dict.items():
        class_dict[val['class_id']] = key

    # print(trg_dict)
    # print(class_dict)
    
    detect_main_path = os.path.join(os.getcwd(),'runs/detect')
    main_dst_path = os.path.join(detect_main_path,'combined')
    if os.path.exists(main_dst_path):
        shutil.rmtree(main_dst_path)
    os.mkdir(main_dst_path)
    
    trg_label_path = os.path.join(main_dst_path,f"labels")
    if os.path.exists(trg_label_path):
        shutil.rmtree(trg_label_path)
    os.mkdir(trg_label_path)
    imagefile_path = 'bangkok_photos'
    
    ## generate combined annotation
    combine_annotation(trg_dict, trg_label_path ,imagefile_path)
    
    ## copy labeled photos model by model
    for key,val in class_dict.items():
        model = str(val).lower() + 'model'
        src = os.path.join(detect_main_path,f"{model}")
        dst = os.path.join(main_dst_path,f"{model}")
        if os.path.exists(dst):
            shutil.rmtree(dst)
        os.mkdir(dst)
        copytree(src=src, dst=dst, symlinks=False, ignore=None)

df_imglvl columns: MultiIndex([(     'image_id',      ''),
            (   'confidence',   'box'),
            (   'confidence', 'plate'),
            ('detection_cnt',   'box'),
            ('detection_cnt', 'plate')],
           names=[None, 'class_name'])
df_imglvl head: <bound method NDFrame.head of                image_id confidence           detection_cnt      
class_name                     box     plate           box plate
0           item_176784   0.768101  0.506685             1     1
1           item_176808   0.716463  0.000000             1     0
2           item_176815   0.384043  0.926138             1     1
3           item_176817   0.894981  0.904000             1     1
4           item_176864   0.952070  0.922956             1     1
..                  ...        ...       ...           ...   ...
986         item_205697   0.604707  0.653710             1     1
987         item_205715   0.824267  0.738132             1     1
988         item_205800   0.923140  0.934128 

In [126]:
# shutil.rmtree('bangkok_photos')