In [22]:
import os, shutil, json ,cv2
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm
from PIL import Image
import numpy as np

from detectron2.config import get_cfg
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.logger import setup_logger
from detectron2.data import (
    DatasetCatalog,
    MetadataCatalog,
    build_detection_test_loader,
)
from detectron2.evaluation import (
    COCOEvaluator,
    inference_on_dataset,
)
from detectron2.engine import DefaultPredictor, DefaultTrainer

import sys
sys.path.append("../")
from  trust.strategies.tactful_smi import TACTFUL_SMI

## Detectron Helper Functions

In [23]:
class CocoTrainer(DefaultTrainer):

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):

        if output_folder is None:
            os.makedirs("coco_eval", exist_ok=True)
            output_folder = "coco_eval"

        return COCOEvaluator(dataset_name, cfg, False, output_folder)


def create_model(cfg, type="train"):
    if type == "train":
        trainer = CocoTrainer(cfg)
        trainer.resume_or_load(resume=False)
        return trainer
    if type == "test":
        tester = DefaultPredictor(cfg)
        return tester


def crop_object(image, box, ground_truth=False):
    """Crops an object in an image

  Inputs:
    image: PIL image
    box: one box from Detectron2 pred_boxes
    """
    if (not ground_truth):
        x_top_left = box[0]
        y_top_left = box[1]
        x_bottom_right = box[2]
        y_bottom_right = box[3]
    else:
        x_top_left = box[0]
        y_top_left = box[1]
        x_bottom_right = box[0] + box[2]
        y_bottom_right = box[1] + box[3]
    x_center = (x_top_left + x_bottom_right) / 2
    y_center = (y_top_left + y_bottom_right) / 2

    try:
        crop_img = image.crop((int(x_top_left), int(y_top_left),
                               int(x_bottom_right), int(y_bottom_right)))
    except Exception as e:
        pass

    return crop_img


def do_evaluate(cfg, model, output_path):
    results = dict()
    for dataset_name in cfg.DATASETS.TEST:
        data_loader = build_detection_test_loader(cfg, dataset_name)
        evaluator = COCOEvaluator(dataset_name,
                                  output_dir=os.path.join(
                                      output_path, "inference", dataset_name))
        results_i = inference_on_dataset(model.model, data_loader, evaluator)
        results[dataset_name] = results_i
    return results


def remove_dataset(name):
    if name in DatasetCatalog.list():
        DatasetCatalog.remove(name)
        MetadataCatalog.remove(name)


'''
Returns the list of cropped image based on the objects. The method uses the trained object detection\
     model to get bouding box and crop the images.
'''
def crop_images_classwise(model: DefaultPredictor, src_path, dest_path,
                          proposal_budget: int):
    if not os.path.exists(dest_path + '/obj_images'):
        os.makedirs(dest_path + '/obj_images')
    obj_im_dir = dest_path + '/obj_images'
    MAPPING = {
        "0": "text",
        "1": "title",
        "2": "list",
        "3": "table",
        "4": "figure"
    }
    no_of_objects = 0
    for d in tqdm(os.listdir(src_path)):
        image = cv2.imread(os.path.join(src_path, d))
        height, width = image.shape[:2]
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        inputs = [{"image": image, "height": height, "width": width}]
        images = model.model.preprocess_image(inputs)

        features = model.model.backbone(images.tensor)
        proposals, _ = model.model.proposal_generator(images, features)
        instances, _ = model.model.roi_heads(images, features,
                                                     proposals)
        boxes = instances[0].pred_boxes
        classes = instances[0].pred_classes.cpu().numpy().tolist()
        max_score_order = torch.argsort(instances[0].scores).tolist()
        
        if (proposal_budget > len(max_score_order)):
            proposal_budget = len(max_score_order)
        
        for singleclass in classes:
            if not os.path.exists(
                    os.path.join(dest_path, 'obj_images',
                                 MAPPING[str(singleclass)])):
                os.makedirs(
                    os.path.join(dest_path, 'obj_images',
                                 MAPPING[str(singleclass)]))

        img = Image.open(os.path.join(src_path, d))
        for idx, box in enumerate(
                list(boxes[max_score_order[:proposal_budget]])):
            no_of_objects += 1
            box = box.detach().cpu().numpy()

            crop_img = crop_object(img, box)
            try:
                crop_img.save(
                    os.path.join(
                        obj_im_dir, MAPPING[str(classes[idx])],
                        os.path.split(os.path.join(src_path, d))[1].replace(
                            ".jpg", "") + "_" + str(idx) + ".jpg"))
            except Exception as e:
                print(e)

    print("Number of objects: " + str(no_of_objects))


'''
Returns the list of cropped images based on the objects. The method make use of ground truth to crop the image.
'''
def crop_images_classwise_ground_truth(train_json_path, src_path, dest_path,
                                       category: str):
    if not os.path.exists(dest_path + '/obj_images'):
        os.makedirs(dest_path + '/obj_images')
    obj_im_dir = dest_path + '/obj_images'

    MAPPING = {"text": 1, "title": 2, "list": 3, "table": 4, "figure": 5}
    no_of_objects = 0
    with open(train_json_path) as f:
        data = json.load(f)
    annotations = data['annotations']
    file_names = os.listdir(src_path)
    file_ids = {
        x['id']: x['file_name']
        for x in data['images'] if x['file_name'] in file_names
    }
    for idx, d in tqdm(file_ids.items()):
        img = cv2.imread(os.path.join(src_path, d))
        if not os.path.exists(
                os.path.join(dest_path, 'obj_images', category)):
            os.makedirs(os.path.join(dest_path, 'obj_images', category))

        img = Image.open(os.path.join(src_path, d))
        boxes = [
            x['bbox'] for x in annotations if x['image_id'] == idx
            and x['category_id'] == MAPPING[category]
        ]
        for idx, box in enumerate(list(boxes)):
            no_of_objects += 1
            box = np.asarray(box, dtype=np.float32)

            crop_img = crop_object(img, box, True)
            crop_img.save(
                os.path.join(
                    obj_im_dir, category,
                    os.path.split(os.path.join(src_path, d))[1].replace(
                        ".jpg", "") + "_" + str(idx) + ".jpg"))

    print("Number of objects: " + str(no_of_objects))


def Random_wrapper(image_list, budget=10):
    rand_idx = np.random.permutation(len(image_list))[:budget]
    rand_idx = rand_idx.tolist()
    Random_results = [image_list[i] for i in rand_idx]

    return Random_results

# Normal Helper Functions

In [24]:
def aug_train_subset(subset_result, train_data_json, lake_data_json, budget, src_dir, dest_dir):
    with open(lake_data_json, mode="r") as f:
        lake_dataset = json.load(f)
    with open(train_data_json, mode="r") as f:
        train_dataset = json.load(f)

    categories = lake_dataset['categories']
    image_list = list(filter(lambda x: x['file_name'] in subset_result, lake_dataset['images']))
    image_id = [image['id'] for image in image_list]
    annotations_shift = list(filter(lambda x: x['image_id'] in image_id, lake_dataset['annotations']))

    train_annotations = train_dataset['annotations'];
    train_image_list = train_dataset['images'];

    # appending the images to train images
    train_image_list += image_list;
    train_annotations += annotations_shift;

    #removing the images lake dataset.
    final_lake_image_list = list(filter(lambda x: x['file_name'] not in subset_result, lake_dataset['images']))
    image_id = [image['id'] for image in image_list]
    final_lake_annotations = list(filter(lambda x: x['image_id'] not in image_id, lake_dataset['annotations']))

    #moving data from lake set to train set.
    change_dir(subset_result, src_dir, dest_dir)

    #changing the coco-file for annotations
    create_labels_update(train_image_list, train_annotations, categories, train_data_json)
    create_labels_update(final_lake_image_list, final_lake_annotations, categories, lake_data_json)

def change_dir(image_results, src_dir, dest_dir):
    names = [names.split("/")[-1].replace(".jpg", "") for names in image_results]
    for index in range(len(names)):
        source_img = os.path.join(src_dir[0], "{}.jpg".format(names[index]))
        destination_img = os.path.join(dest_dir[0], "{}.jpg".format(names[index]))
        if not os.path.exists(dest_dir[0]) or not os.path.exists(dest_dir[1]):
            os.mkdir(dest_dir[0])
            os.mkdir(dest_dir[1])
        
        try:
            shutil.copy(source_img, destination_img)
        except shutil.SameFileError:
            print("Source and destination represents the same file.")
        
        # If there is any permission issue
        except PermissionError:
            print("Permission denied.")
        
        # For other errors
        except Exception as e:
            print("Error occurred while copying file.", e)


        # removing the data from the lake data
        try:
            os.remove(os.path.join(src_dir[0], "{}.jpg".format(names[index])))
        except:
            pass

def create_labels_update(images, annotations, categories, filename):
    labels = {}
    labels['images'] = images
    labels['annotations'] = annotations
    labels['categories'] = categories

    with open(filename, "w") as f:
        json.dump(labels, f)
    

def remove_dir(dir_name):
    try:
        shutil.rmtree(dir_name)
    except:
        pass


def create_dir(dir_name):
    try:
        os.mkdir(dir_name)
    except:
        pass

def get_original_images_path(subset_result:list):
    return ["_".join(x.split("/")[-1].split("_")[:2])+ ".jpg" for x in subset_result]

Copying Dataset

In [25]:
!rm -r publaynet
!rm -r query_data_img
!cp -r ../../dataset/publaynet .
!cp -r ../../tal4dla-exp3/query_data_img/ .

In [26]:
args = {
    "output_path":'sanskrit_com1', 
    "strategy":'com', 
    "total_budget":300, 
    "budget":30, 
    "lake_size":6896, 
    "train_size":2000, 
    "category":'list', 
    "device":1, 
    "proposal_budget":30, 
    "iterations":10
}

query_path = 'query_data_img/' + args['category']
torch.cuda.set_device(args['device'])
train_data_dirs = ("publaynet/train_data_img",
                   "publaynet/train_targeted.json")
lake_data_dirs = ("publaynet/lake_data_img",
                  "publaynet/lake_targeted.json")
test_data_dirs = ("publaynet/test_data_img",
                  "publaynet/test_targeted.json")
val_data_dirs = ("publaynet/val_data_img",
                 "publaynet/val_targeted.json")

train_path = 'model_result'
training_name = args['output_path']
model_path = os.path.join(train_path, training_name)
if (not os.path.exists(model_path)):
    create_dir(model_path)

# train a faster_rcnn model on the initial_set, add respective config file path
output_dir = os.path.join(model_path, "initial_training")
config_file_path = '../../dataset/configs/publaynet/faster_rcnn_R_101_FPN_3x.yaml'
selection_strag = args['strategy']
selection_budget = args['budget']
budget = args['total_budget']
proposal_budget = args['proposal_budget']

#setting up configuration
cfg = get_cfg()
cfg.merge_from_file(config_file_path)
cfg.DATASETS.TRAIN = ("initial_set",)
cfg.DATASETS.TEST = ('test_set', 'val_set')
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.WARMUP_ITERS = 1000
cfg.SOLVER.MAX_ITER = 500
cfg.SOLVER.IMS_PER_BATCH = 6
cfg.MODEL.RPN.NMS_THRESH = 0.8
cfg.MODEL.RPN.POST_NMS_TOPK_TEST: 2000
# cfg.TEST.EVAL_PERIOD = 1000
cfg.OUTPUT_DIR = output_dir
cfg.TRAINING_NAME = training_name

logger = setup_logger(os.path.join(output_dir, cfg.TRAINING_NAME))


#clearing data if already exist
remove_dataset("initial_set")
remove_dataset("test_set")
remove_dataset("val_set")

# Registering dataset intial_set for initial training, test_set and val_set for test and validation respectively.
register_coco_instances(
    "initial_set", {}, train_data_dirs[1], train_data_dirs[0])
register_coco_instances("test_set", {}, test_data_dirs[1], test_data_dirs[0])
register_coco_instances("val_set", {}, val_data_dirs[1], val_data_dirs[0])


## Intial Training of model

In [27]:
# # step 1
# train a faster_rcnn model on the initial_set.
logger.info("Starting Initial_set Training")
cfg.MODEL_WEIGHTS = '../../dataset/Initial_model_weight/model_final.pkl'
model = create_model(cfg)
torch.cuda.empty_cache()
model.train()
logger.info("Initial_set training complete")

[32m[03/21 16:55:05 detectron2]: [0mStarting Initial_set Training
[32m[03/21 16:55:05 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm

Some model parameters or buffers are not found in the checkpoint:
[34mbackbone.fpn_lateral2.{bias, weight}[0m
[34mbackbone.fpn_lateral3.{bias, weight}[0m
[34mbackbone.fpn_lateral4.{bias, weight}[0m
[34mbackbone.fpn_lateral5.{bias, weight}[0m
[34mbackbone.fpn_output2.{bias, weight}[0m
[34mbackbone.fpn_output3.{bias, weight}[0m
[34mbackbone.fpn_output4.{bias, weight}[0m
[34mbackbone.fpn_output5.{bias, weight}[0m
[34mproposal_generator.rpn_head.anchor_deltas.{bias, weight}[0m
[34mproposal_generator.rpn_head.conv.{bias, weight}[0m
[34mproposal_generator.rpn_head.objectness_logits.{bias, weight}[0m
[34mroi_heads.box_head.fc1.{bias, weight}[0m
[34mroi_heads.box_head.fc2.{bias, weight}[0m
[34mroi_heads.box_predictor.bbox_pred.{bias, weight}[0m
[34mroi_heads.box_predictor.cls_score.{bias, weight}[0m
The checkpoint state_dict contains keys that are not used by the model:
  [35mfc1000.{bias, weight}[0m


[32m[03/21 16:55:06 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[03/21 16:55:15 d2.utils.events]: [0m eta: 0:03:33  iter: 19  total_loss: 3.317  loss_cls: 1.958  loss_box_reg: 0.02701  loss_rpn_cls: 0.6269  loss_rpn_loc: 0.6771  time: 0.4361  data_time: 0.0250  lr: 9.7405e-06  max_mem: 9737M
[32m[03/21 16:55:24 d2.utils.events]: [0m eta: 0:03:25  iter: 39  total_loss: 1.584  loss_cls: 0.3964  loss_box_reg: 0.05097  loss_rpn_cls: 0.6064  loss_rpn_loc: 0.535  time: 0.4434  data_time: 0.0103  lr: 1.9731e-05  max_mem: 9737M
[32m[03/21 16:55:34 d2.utils.events]: [0m eta: 0:03:17  iter: 59  total_loss: 1.379  loss_cls: 0.1689  loss_box_reg: 0.06887  loss_rpn_cls: 0.5696  loss_rpn_loc: 0.5924  time: 0.4635  data_time: 0.0090  lr: 2.972e-05  max_mem: 9739M
[32m[03/21 16:56:01 d2.utils.events]: [0m eta: 0:03:11  iter: 79  total_loss: 1.195  loss_cls: 0.1781  loss_box_reg: 0.0745  loss_rpn_cls: 0.5207  loss_rpn_loc: 0.3836  time: 0.6954  data_time: 0.0094  lr: 3.97

## Evaluating model and saving first iteration result

In [None]:

iteration = args['iterations']
result_val = []
result_test = []

# del model
torch.cuda.empty_cache()

# step 2
# evaluate the inital model and get worst performing classcfg.MODEL.WEIGHTS = cfg.OUTPUT_DIR + "/model_final.pth
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR , "model_final.pth")
model = create_model(cfg, "test")
result = do_evaluate(cfg, model, output_dir)
result_val.append(result['val_set'])
result_test.append(result['test_set'])
category_selection = []

[32m[03/21 16:15:30 d2.checkpoint.c2_model_loading]: [0mFollowing weights matched with model:
| Names in Model                                  | Names in Checkpoint                                                                                  | Shapes                                          |
|:------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
| backbone.bottom_up.res2.0.conv1.*               | backbone.bottom_up.res2.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (64,) (64,) (64,) (64,) (64,64,1,1)             |
| backbone.bottom_up.res2.0.conv2.*               | backbone.bottom_up.res2.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (64,) (64,) (64,) (64,) (64,64,3,3)             |
| backbone.bottom_up.res2.0.conv3.*               | backbone.bottom_up.res2.0.conv3.

## Starting of AL rounds

In [None]:
budget

300

In [None]:
i = 0
selection_arg = {"class":args['category'], 'eta':1, "model_path":model_path, 'smi_function':args['strategy']}
try:
    while (i < iteration and budget > 0):
        # step 3
        # get embeddings for initial and lakeset from RESNET101

        if (selection_strag != "random"):

            # creating new query set for under performing class for each iteration
            remove_dir(os.path.join(model_path, "query_images"))
            try:
                os.remove(os.path.join(model_path, "data_query.csv"))
            except:
                pass

            # Cropping object based on ground truth for the query set.
            # The set is part of train set, so no need of using object detection model to find the bounding box.
            crop_images_classwise_ground_truth(train_data_dirs[1], query_path, os.path.join(
                model_path, "query_images"), args['category'])
            
            remove_dir(os.path.join(model_path, "lake_images"))
            try:
                os.remove(os.path.join(model_path, "data.csv"))
            except:
                pass
            crop_images_classwise(
                model, lake_data_dirs[0], os.path.join(model_path, "lake_images"), proposal_budget=proposal_budget)
            
            selection_arg['iteration'] = i
            strategy_sel = TACTFUL_SMI(args = selection_arg)
            lake_image_list, subset_result = strategy_sel.select(proposal_budget)
            subset_result = [lake_image_list[i] for i in subset_result]
            subset_result = list(
                set(get_original_images_path(subset_result)))
            
        else:
            lake_image_list = os.listdir(lake_data_dirs[0])
            subset_result = Random_wrapper(
                lake_image_list, selection_budget)

        # reducing the selection budget
        budget -= len(subset_result)
        if (budget > 0):

            # transferring images from lake set to train set
            aug_train_subset(
                subset_result, train_data_dirs[1], lake_data_dirs[1], budget, lake_data_dirs, train_data_dirs)
        # removing the old training images from the detectron configuration and adding new one
        remove_dataset("initial_set")
        register_coco_instances(
            "initial_set", {}, train_data_dirs[1], train_data_dirs[0])

        del model
        torch.cuda.empty_cache()
        # before starting the model active learning loop, calculating the embedding of the lake datset
        # change iteration as per the requirement
        cfg.MODEL.WEIGHTS = cfg.OUTPUT_DIR + "/model_final.pth"
        cfg.SOLVER.MAX_ITER = 500
        model = create_model(cfg, "train")
        model.train()

        # reevaluating the model train once again
        del model
        torch.cuda.empty_cache()
        # before starting the model active learning loop, calculating the embedding of the lake datset
        cfg.MODEL.WEIGHTS = cfg.OUTPUT_DIR + "/model_final.pth"
        model = create_model(cfg, "test")
        result = do_evaluate(cfg, model, output_dir)
        result_val.append(result['val_set'])
        result_test.append(result['test_set'])

        # increasing the iteration number
        # publishing each iteration result to csv
        i += 1
        print("remaining_budget", budget)
        final_data = []
        temp = []
        for it in result_val:
            print(it)
            for k, val in it.items():
                temp = list(val.keys())
                final_data.append(list(val.values()))
        csv = pd.DataFrame(final_data, columns=temp)
        csv.to_csv(os.path.join(output_dir, '{}'.format(
            "val_scores"+selection_strag+".csv")))
        final_data = []
        for it in result_test:
            print(it)
            for k, val in it.items():
                temp = list(val.keys())
                final_data.append(list(val.values()))
        csv = pd.DataFrame(final_data, columns=temp)
        csv.to_csv(os.path.join(output_dir, '{}'.format(
            "test_scores"+selection_strag+".csv")))
except Exception as e:
    logger.error("Error while training:", e)

finally:
    final_data = []
    temp = []
    for i in result_val:
        print(i)
        for k, val in i.items():
            temp = list(val.keys())
            final_data.append(list(val.values()))
    csv = pd.DataFrame(final_data, columns=temp)
    csv.to_csv(os.path.join(output_dir, '{}'.format(
        "val_scores"+selection_strag+".csv")))
    final_data = []
    for i in result_test:
        print(i)
        for k, val in i.items():
            temp = list(val.keys())
            final_data.append(list(val.values()))
    csv = pd.DataFrame(final_data, columns=temp)
    csv.to_csv(os.path.join(output_dir, '{}'.format(
        "test_scores"+selection_strag+".csv")))
