In [1]:
import pandas as pd
import os
import json
from datetime import datetime
import shutil

In [2]:
# load localization to db
meta_path = '../testcases/petra_meta/meta/'
user_path = '../testcases/petra_meta/users.csv'
image_path = '../testcases/petra_meta/images.csv'
schema_path = '../testcases/petra_meta/label_schema.csv'


def generate_annotate_json(meta_path, label_name_data):
    files = os.listdir(meta_path)
    filename_list = []
    for f in files:
        # filename = f.replace(':','/')
        filename = f.replace('.txt','')
        filename_list.append(filename)
    localization_dic = {}
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S.%f")[:-3]
    for filename in filename_list:
        try:
            f = open(meta_path + filename + '.txt', 'r') 
            content=f.readlines() 
            table_list = [] # store the image meta info
            text_index = 0
            localization_dic[filename] = []
            for index, item in enumerate(content):
                row = item.lstrip()
                row = row.split(',')
                if row[0] not in label_name_data:
                    print(f"unknown label name {row[0]}, please check your label_schema.csv!")
                    exit()
                localization_dic[filename].append({
                    'labelType': 'AI',
                    'labelID': label_name_data[row[0]]['label_id'],
                    'prob': row[1],
                    'timestamp': current_time,
                    'L': row[2],
                    'T': row[3],
                    'R': row[4],
                    'B': row[5].replace('\n','')
                })
        finally:
            if f:
                f.close()
    # print(localization_dic)
    # with open('result.json', 'w') as fp:
    #     json.dump(localization_dic, fp)
    return localization_dic
    
def stratify_schema(label_schema):
    schema_dic = {}
    for i in range(len(label_schema)):
        schema_dic[label_schema.at[i,'M_label_id']] = {
            'label_id': label_schema.at[i,'M_label_id'],
            'label_type': int(label_schema.at[i,'M_label_type']),
            'label_parent': label_schema.at[i,'M_label_parent']
        }
    return schema_dic

def stratify_schema_name(label_schema):
    schema_dic = {}
    for i in range(len(label_schema)):
        schema_dic[label_schema.at[i,'M_label_name']] = {
            'label_id': label_schema.at[i,'M_label_id'],
            'label_type': int(label_schema.at[i,'M_label_type']),
            'label_parent': label_schema.at[i,'M_label_parent']
        }
    return schema_dic

def stratify_image(image_data):
    image_dic = {}
    for i in range(len(image_data)):
        image_dic[image_data.at[i,'M_image_id']] = {
            'image_id': image_data.at[i,'M_image_id'],
            'image_name': image_data.at[i,'M_image_name'].split('.')[0]}
    return image_dic

def load_localization_to_db(replaceDB = True):
    '''
    @replaceDB: if the original database will be overwritten
    '''
    label_schema = pd.read_csv(schema_path, encoding = 'utf-8')
    label_name_data = stratify_schema_name(label_schema)
    label_data = stratify_schema(label_schema)
    image_data = pd.read_csv(image_path, encoding = 'utf-8')
    image_data = stratify_image(image_data)
    localization_dic = generate_annotate_json(meta_path, label_name_data)
    if replaceDB:
        user_data = pd.read_csv(user_path, encoding = 'utf-8')
        anno_table_list = []
        id = 0
        for i in range(len(user_data)):
            username = user_data.at[i,'M_username']
            assignment = user_data.at[i,'M_assignment_by_image_id'].split(';')
            if(len(assignment) == 1 and assignment[0] == ''):
                assignment = []
            for image_id in assignment:
                anno_table_dic = {}
                anno_table_dic['id'] = id
                id += 1
                anno_table_dic['image_id'] = image_id
                anno_table_dic['username'] = username
                anno_table_dic['annotation_log'] = ""
                anno_table_dic['log_dates'] = ""
                anno_table_dic['is_error_image'] = 0
                anno_table_dic['need_discuss'] = 0
                anno_table_dic['marked_fun'] = 0
                anno_table_dic['marked_OK'] = 0
                anno_table_dic['checked_caption'] = 0
                anno_table_dic['checked_paper'] = 0
                for label in label_data:
                    if(int(label_data[label]['label_type']) == 0):
                        pass
                    elif(int(label_data[label]['label_type']) == 1):
                        anno_table_dic[label] = int(0)
                    elif(int(label_data[label]['label_type']) == 2):
                        anno_table_dic[label['label_parent']] = ""
                    elif(int(label_data[label]['label_type']) == 3):
                        anno_table_dic[label] = ""
                    elif(int(label_data[label]['label_type']) == -1):
                        anno_table_dic[label] = ""
                if image_data[image_id]['image_name'] in localization_dic:
                    localization_res = localization_dic[image_data[image_id]['image_name']]
                    for bbox in localization_res:
                        # print(bbox)
                        if label_data[bbox['labelID']]['label_type'] == 1:
                            anno_table_dic[bbox['labelID']] = int(1)
                        elif label_data[bbox['labelID']]['label_type'] == 2:
                            anno_table_dic[label_data[bbox['labelID']]['label_parent']] = bbox['labelID']
                    anno_table_dic['regions'] = json.dumps(localization_res)

                else:
                    anno_table_dic['regions'] = ""
                
                anno_table_list.append(anno_table_dic)
        df = pd.DataFrame(anno_table_list)
    df.to_csv('../testcases/petra_example1/annotations.csv',index=False)

load_localization_to_db()

In [45]:
# export localization from db to .txt files

'''
inputs:
* annotations.csv
* images.csv
'''
image_path = './test/images.csv'
annotation_path = './test/annotations.csv'
user_path = './test/users.csv'
schema_path = './test/label_schema.csv'

def stratify_schema(label_schema):
    schema_dic = {}
    for i in range(len(label_schema)):
        schema_dic[label_schema.at[i,'label_id']] = {
            'label_id': label_schema.at[i,'label_id'],
            'label_name': label_schema.at[i,'label_name'],
            'label_type': int(label_schema.at[i,'label_type']),
            'label_parent': label_schema.at[i,'label_parent']
        }
    return schema_dic

def write_file(filename, label_data, regions):
    try:
        file = open(filename, mode="w")
        for region in regions:
            file.write(f"{label_data[region['labelID']]['label_name']},{region['prob']},{region['L']},{region['T']},{region['R']},{region['B']}\n")
    finally:
        file.close()

def export_db_to_localization():
    image_data = pd.read_csv(image_path, encoding = 'utf-8')
    image_dic = image_data.set_index('image_id')
    annotation_data = pd.read_csv(annotation_path, encoding = 'utf-8')
    label_schema = pd.read_csv(schema_path, encoding = 'utf-8')
    label_data = stratify_schema(label_schema)

    if(os.path.isdir('./meta')):
        shutil.rmtree('./meta')
        os.mkdir('./meta')
    else:
        os.mkdir('./meta')

    user_data = pd.read_csv(user_path, encoding = 'utf-8')
    users = list(user_data['username'].unique())
    for user in users:
        annotation_sub_data = annotation_data.loc[annotation_data['username'] == user].reset_index(drop=True)
        if len(annotation_data) != 0:
            os.mkdir('./meta/' + user)
            root_path = './meta/' + user + '/'
            for i in range(len(annotation_sub_data)):
                image_id = annotation_sub_data.at[i,'image_id']
                image_name = image_dic.loc[image_id]['image_name']
                regions = annotation_sub_data.at[i,'regions']
                regions = json.loads(regions)
                dir = os.path.dirname(image_name)
                if dir != '':
                    dir = root_path + str(dir)
                    if not os.path.exists(dir):
                        os.makedirs(dir)
                    file_name = root_path + os.path.splitext(image_name)[0] + '.txt'
                    write_file(file_name, label_data, regions)
                else:
                    file_name = root_path + os.path.splitext(image_name)[0] + '.txt'
                    write_file(file_name, label_data, regions)

export_db_to_localization()

In [15]:
image_data = pd.read_csv(image_path, encoding = 'utf-8')
image_dic = image_data.set_index('image_id')
image_dic.loc['I1']['image_name']

'paper2_01.png'