In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib import animation
from matplotlib.animation import FuncAnimation
from IPython import display
import numpy as np
import pandas as pd
from ast import literal_eval
from PIL import Image
from os import path, listdir
import re
import json

ffmpeg_binpath = r'C:/Program Files/ffmpeg-4.2.2-win64/bin/ffmpeg.exe'
root_path = r"D:/Sources/BenedettiniBBox/2_Points_of_Interest_Recognition/Points Of Interest Recognition/Monastero dei Benedettini/"
train_path = path.join(root_path, "Training/")
test_path = path.join(root_path, "Test/")
val_path = path.join(root_path, "Validation/")

In [None]:
def createDataFrameTrainingClassesMap(train_path):   
    class_names = listdir(train_path)
    class_names.sort(key=lambda x: (lambda l : int(l[0])*100+int(l[1]))(re.split('\.|\_', x)))
    classes_map_df = pd.DataFrame(list(zip(class_names, range(37))), columns=['class_name', 'class_label'])
    classes_map_df.set_index('class_name', inplace=True)
    return classes_map_df

classes_map_df = createDataFrameTrainingClassesMap(train_path)
classes_map_df.to_pickle('classes_map_df.pickle')

#classes_map_df = pd.read_pickle('classes_map_df.pickle')

classes_map_df

In [None]:
D_categories = {"categories": []}

for row, item in classes_map_df.iterrows():
    D_categories["categories"].append({'id':int(item[0]), 'name':row})

In [None]:
import re
import pandas as pd
from os import listdir

def createDataFrameTrainingClassFromCSV(class_path):
    # handle both class_path ending with/without '/'
    class_name = class_path.split('/')[-2] if class_path[-1] == '/' else class_path.split('/')[-1]
    try:
        df_even = pd.read_csv(path.join(class_path, class_name+'_1.csv'))
        df_even = df_even.set_index(pd.Index(range(len(df_even)*2)[1::2]))
    except FileNotFoundError:
        raise FileNotFoundError("file {}_1.csv not found".format(class_name))
    try:
        df_odd = pd.read_csv(path.join(class_path, class_name+'_2.csv'))
        df_odd = df_odd.set_index(pd.Index(range(len(df_odd)*2)[::2]))
    except FileNotFoundError:
        raise FileNotFoundError("file {}_2.csv not found".format(class_name))
    df = df_even.append(df_odd)
    df = df[df['region_count'] != 0]
    df.sort_index(inplace=True)
    df.reset_index(inplace=True)
    df = df[['#filename','region_count','region_shape_attributes']]
    return df
    
def createDataFrameTrainingClassFromTxt(class_path):
    list_df = []
    class_name = class_path.split('/')[-1]
    if not len(class_name): class_name = class_path.split('/')[-2]

    annotations_file_name = [annotation for annotation in listdir(path.join(class_path, 'images')) if annotation.endswith('.txt')]
    annotations_file_name.sort(key=lambda x: (lambda l : int(l[1]))(re.split('frame_000|\.', x)))

    for annotation_file_name in annotations_file_name:
        try:
            annotation_file_path = path.join(class_path, 'images', annotation_file_name)
            filename = annotation_file_name[:-3]+'jpg'
            img_path = path.join(class_path, 'images', filename)
            im_width, im_height = Image.open(img_path).size
            region_shape_attributes_list = [line.split() for line in open(annotation_file_path)]

            for region_shape_attributes in region_shape_attributes_list:
                _, center_box_w, center_box_h, box_w, box_h = list(map(float, region_shape_attributes))
                region_shape_attributes = '{{"name":"rect","x":{},"y":{},"width":{},"height":{}}}'.format(
                    int(center_box_w*im_width - (box_w*im_width)//2),
                    int(center_box_h*im_height - (box_h*im_height)//2),
                    int(box_w*im_width),
                    int(box_h*im_height)
                )
                region_count = len(region_shape_attributes_list)
                list_df.append([filename, region_count, region_shape_attributes])
        except FileNotFoundError as e:
            pass
        
    return pd.DataFrame(list_df, columns=['#filename', 'region_count', 'region_shape_attributes'])

def createTrainingDatasetJSONFromCSV(train_path, classes_map_df):
    training_dataset_json = {"annotations": []}
    
    for class_name, (class_label,) in classes_map_df.iterrows():
        class_path = path.join(train_path, class_name)
        try:
            df = createDataFrameTrainingClassFromCSV(class_path)
        except FileNotFoundError as e:
            print(e, ", loading annotations from .txt files", sep="")
            df = createDataFrameTrainingClassFromTxt(class_path)
        except Exception as e:
            print(e)
            continue

        for idx, record in df.iterrows():
            img_name, region_count, bb_dict_str = record
            #convert string to dict
            bb_dict = eval(bb_dict_str)
            bbox = [bb_dict['x'], bb_dict['y'], bb_dict['width'], bb_dict['height']]

            training_dataset_json["annotations"].append({
                "path": "Training/{}/images/{}".format(class_name, img_name),
                "bbox": bbox,
                "class_id": class_label
            })
            
    return training_dataset_json

def createTrainingDatasetJSONFromTxt(train_path, classes_map_df):
    training_dataset_json = {"annotations": []}
    
    for class_name, (class_label,) in classes_map_df.iterrows():
        class_path = path.join(train_path, class_name)
        try:
            df = createDataFrameTrainingClassFromTxt(class_path)
        except Exception as e:
            print(e)
            continue

        for idx, record in df.iterrows():
            img_name, region_count, bb_dict_str = record
            #convert string to dict
            bb_dict = eval(bb_dict_str)
            bbox = [bb_dict['x'], bb_dict['y'], bb_dict['width'], bb_dict['height']]

            training_dataset_json["annotations"].append({
                "path": "Training/{}/images/{}".format(class_name, img_name),
                "bbox": bbox,
                "class_id": class_label
            })
            
    return training_dataset_json

In [None]:
def createTestDatasetJSONFromTxt(test_path, classes_map_df):
    test_dataset_json = {"annotations": []}

    L_folder_ID = list(range(100, 167))
    S_unwanted_ID = {106, 121, 127, 128, 131, 133, 141} 
    L_folder_ID = [ID for ID in L_folder_ID if ID not in S_unwanted_ID]
    
    for folder_ID in L_folder_ID:
        try:
            annotation_filepath = path.join(test_path, "bbox_annotations", str(folder_ID)+".txt")
            L_annotations = [line.split() for line in open(annotation_filepath)][1:]

            for annotation in L_annotations:
                try:
                    filename = annotation[0]
                    region_count = int(annotation[3])
                    if region_count == 0: continue
                    img_path = path.join(test_path, str(folder_ID), filename)
                    im_width, im_height = Image.open(img_path).size
                    bb_dict = literal_eval(annotation[5])
                    bbox = [bb_dict['x'], bb_dict['y'], bb_dict['width'], bb_dict['height']]
                    region_attributes = annotation[-1]
                    class_name_start = literal_eval(region_attributes)['Label']
                    class_label = classes_map_df[classes_map_df.index.str.startswith(class_name_start+'_')]['class_label'].item()

                    test_dataset_json["annotations"].append({
                        "path": "Test/{}/{}".format(folder_ID, filename),
                        "bbox": bbox,
                        "class_id": class_label
                    })

                except FileNotFoundError as e:
                    pass
                
        except FileNotFoundError as e:
            print("file {} not found".format(path.join("bbox_annotations", str(folder_ID)+".txt")))
            
    return test_dataset_json

In [None]:
def createValidationDatasetJSONFromTxt(val_path, classes_map_df):
    val_dataset_json = {"annotations": []}
    
    for annotation_filename in listdir(path.join(val_path, "bbox_annotations")):
        annotation_filepath = path.join(val_path, "bbox_annotations", annotation_filename)

        L_annotations = [line.replace('""', '"').split(',') for line in open(annotation_filepath)][1:]
        
        for annotation in L_annotations:
            try:
                filename = annotation[0].replace(" ", "_")
                region_count = int(annotation[3])
                if region_count == 0: continue

                str_dict = annotation[5]+','+annotation[6]+','+annotation[7]+','+annotation[8]+','+annotation[9]
                bb_dict = literal_eval(str_dict[1:-1])
                bbox = [bb_dict['x'], bb_dict['y'], bb_dict['width'], bb_dict['height']]
                class_name_start = literal_eval(annotation[-1][1:-2])['Label']
                class_label = classes_map_df[classes_map_df.index.str.startswith(class_name_start+'_')]['class_label'].item()

                val_dataset_json["annotations"].append({
                    "path": "Validation/images/{}".format(filename),
                    "bbox": bbox,
                    "class_id": class_label
                })
            except ValueError as e:
                print(class_name_start, 'label not recognized')
                
    return val_dataset_json

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd
from PIL import Image

def imageBBFromJSONObject(root_path, sample_JSONObj):
    # Create figure and axes
    fig, ax = plt.subplots(1)
    fig.set_size_inches(8, 5)

    img = Image.open(path.join(root_path, sample_JSONObj['path']))
    class_name = sample_JSONObj['path'].split('/')[0]
    img_name = sample_JSONObj['path'].split('/')[-1]
    ax.set_title("[{}]  [{}]  {}".format(class_name, sample_JSONObj['class_id'], img_name))

    ax.imshow(img)
    ax.axis('off')
    
    x, y, widht, height = sample_JSONObj['bbox']
    # Create a Rectangle patch
    rect = patches.Rectangle((x, y), widht, height, linewidth=3, edgecolor='r', facecolor='none')

    # Add the patch to the Axes
    rect_patch = ax.add_patch(rect)
    
    
from matplotlib import animation
from matplotlib.animation import FuncAnimation
from IPython import display
from PIL import Image
import numpy as np

plt.rcParams['animation.ffmpeg_path'] = ffmpeg_binpath

def videoBB(root_path, dataset_json, num_sample=10, interval=100):
    fig = plt.figure(figsize=(13,8))
    ax = plt.gca()
    ax.axis('off')

    ax_img = ax.imshow(Image.new('RGB', (1280, 720), color = 'white'))
    
    len_training_dataset = len(dataset_json['annotations'])
    samples_idx = np.arange(0, len_training_dataset, len_training_dataset//num_sample)[:num_sample]

    def animate(frame):

        sample_JSONObj = dataset_json['annotations'][samples_idx[frame]]
        img = Image.open(path.join(root_path, sample_JSONObj['path']))
        class_name = sample_JSONObj['path'].split('/')[0]
        img_name = sample_JSONObj['path'].split('/')[-1]
        ax.set_title("[{}]  [{}]  {}".format(class_name, sample_JSONObj['class_id'], img_name))
        ax_img.set_array(img)

        [p.remove() for p in reversed(ax.patches)]

        x, y, widht, height = sample_JSONObj['bbox']
        rect = patches.Rectangle((x, y), widht, height, linewidth=3, edgecolor='r', facecolor='none')
        rect_patch = ax.add_patch(rect)

    anim = FuncAnimation(fig, animate, frames=num_sample, interval=interval)
    video = anim.to_html5_video(embed_limit=10000)
    html = display.HTML(video)
    display.clear_output(wait=True)
    display.display(html)
    plt.close()

In [None]:
test_dataset_json = createTestDatasetJSONFromTxt(test_path, classes_map_df)
test_dataset_json.update(D_categories)

In [None]:
#print(json.dumps(test_dataset_json['dataset'][:3], indent=2))
len(test_dataset_json['annotations'])

In [None]:
training_dataset_json = createTrainingDatasetJSONFromCSV(train_path, classes_map_df)
training_dataset_json.update(D_categories)

In [None]:
#print(json.dumps(training_dataset_json['dataset'][:3], indent=2))
len(training_dataset_json['annotations'])

In [None]:
val_dataset_json = createValidationDatasetJSONFromTxt(val_path, classes_map_df)
val_dataset_json.update(D_categories)

In [None]:
#print(json.dumps(val_dataset_json['dataset'][:3], indent=2))
len(val_dataset_json['annotations'])

In [None]:
import json

with open("test_dataset.json", "w") as output_file:
    json.dump(test_dataset_json, output_file)

with open("train_dataset.json", "w") as output_file:
    json.dump(training_dataset_json, output_file)
    
with open("val_dataset.json", "w") as output_file:
    json.dump(val_dataset_json, output_file)
    
#with open('val_dataset.json') as input_file:
#    val_dataset_json = json.load(input_file)

In [None]:
imageBBFromJSONObject(root_path, sample_JSONObj = training_dataset_json['annotations'][12414])

In [None]:
videoBB(root_path, training_dataset_json, num_sample=5, interval=350)