# In this notebook, we develop the pipeline to perform evaluation on a trained ubteacher model

In [9]:
import os
import json
#import logging
import sys
from datetime import datetime
from pathlib import Path

# Hacky way to resolve project paths
sys.path.append(str(Path(os.getcwd()).parents[0]))
sys.path.append(str(Path(os.getcwd()).parents[1]))

import matplotlib.pyplot as plt
plt.rcParams.update({"font.size": 6})
#matplotlib.use('Agg')
import numpy as np
import torch

from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data import build_detection_test_loader
from detectron2.data.build import build_batch_data_loader, get_detection_dataset_dicts
from detectron2.data import transforms as T
from detectron2.utils.visualizer import Visualizer
from detectron2.utils.logger import setup_logger
setup_logger()
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.evaluation import COCOEvaluator
from detectron2.structures import Boxes
from detectron2.data.datasets.coco import convert_to_coco_dict

from ubteacher.config import add_ubteacher_config
from ubteacher.modeling.meta_arch.rcnn import TwoStagePseudoLabGeneralizedRCNN
from ubteacher.engine.trainer import UBRCNNTeacherTrainer

#import ubteacher.utils.utils_v1 as utils_v1
import ubteacher.utils.ROI_utils as ROI_utils

# Optional imports for ROI_detection2 processing

import ubteacher.utils.eval_utils as eval_utils
from ubteacher.utils.ROI_utils import TrainHelper

In [10]:
print("Paths available in this notebook:")
for n, path in enumerate(sys.path):
    print(f"{n}. {path}")

Paths available in this notebook:
0. /home/chao_lab/SynologyDrive/chaolab_AI_path/unbiased_teacher2/notebooks
1. /home/chao_lab/miniconda3/envs/ssod/lib/python38.zip
2. /home/chao_lab/miniconda3/envs/ssod/lib/python3.8
3. /home/chao_lab/miniconda3/envs/ssod/lib/python3.8/lib-dynload
4. 
5. /home/chao_lab/miniconda3/envs/ssod/lib/python3.8/site-packages
6. /home/chao_lab/SynologyDrive/chaolab_AI_path/unbiased_teacher2
7. /home/chao_lab/SynologyDrive/chaolab_AI_path
8. /home/chao_lab/SynologyDrive/chaolab_AI_path/unbiased_teacher2
9. /home/chao_lab/SynologyDrive/chaolab_AI_path


## Recreate parts of classes and functions necessary for this notebook

In [11]:
def convert_to_coco_json(dataset_name: str, output_dir: str, allow_cached=True):
    """
    Converts dataset into COCO format and saves it to a json file.
    dataset_name must be registered in DatasetCatalog and in detectron2's standard format.

    Args:
    dataset_name -- reference from the config file to the catalogs
                    must be registered in DatasetCatalog and in detectron2's 
                    standard format
    output_file -- path of json file that will be saved to
    allow_cached -- if json file is already present then skip conversion
    """
    logger = setup_logger(name=__name__)
    coco_files = []
    for file in os.scandir(output_dir):
        if "coco" in file.name:
            coco_files.append(file.path)
    if coco_files:
        logger.info("Removing previously cached coco files...")
        for i in coco_files:
            logger.info(f"Removed {i}")
            os.remove(i)
    coco_dict = convert_to_coco_dict(dataset_name)
    coco_json = json.dumps(coco_dict, indent=4)
    coco_json_file = dataset_name + "_coco_format.json"
    print(os.path.join(output_dir, coco_json_file))
    
    with open(os.path.join(output_dir, coco_json_file), "w") as json_file:
        json_file.write(coco_json)

## Set validation data paths, load model

In [36]:
model_path_parent = '/home/chao_lab/SynologyDrive/chaolab_AI_path/unbiased_teacher2/completed_outputs/tcga_mix_v2.8'
model_path = os.path.join(model_path_parent, "model_0039999.pth")
dataseed = '/home/chao_lab/SynologyDrive/chaolab_AI_path/unbiased_teacher2/dataseed/OSCC_TCGA_FullMix_1008.json'
dataset_dir = '/mnt/voyage/Datasets_pathology/GT_2023/LesionFinder' #Temporarily voyunbiased_teacher2/test_eval'age because GT is not sync'd to FN
output_dir = '/mnt/MT8/SynologyDrive/GT_2023' # Major Tom format

#dataseed = os.path.join(sys.path[7], "dataseed/OSCC_TCGA_FullMix_1008.json")
#dataset_dir = "/mnt/d/SynologyDrive/GT_2023/LesionFinder"


In [37]:
# Load ledger from dataseed
with open(dataseed, 'r') as f:
    ledger = json.load(f)
    val_data = ledger['val']
sample_im_path = Path(val_data.get("images")[0])
source_dataset_dir = str(sample_im_path.parent.parent)
print(f"The data on this computer is in: {source_dataset_dir}")

# Adjust data paths for the current computer
for d in val_data:
    updated_paths = [
        i.replace(source_dataset_dir, dataset_dir) for i in val_data[d]
    ]
    val_data[d] = updated_paths
print("\nCheck a few files to see if their data paths are properly adjusted:")
for i in val_data["images"][:5]:
    print("    ", i)

The data on this computer is in: /home/chao_lab/GT_2023/LesionFinder

Check a few files to see if their data paths are properly adjusted:
     /mnt/voyage/Datasets_pathology/GT_2023/LesionFinder/SRI_OSCC/Case 23 G5_1.npy
     /mnt/voyage/Datasets_pathology/GT_2023/LesionFinder/SRI_OSCC/Case 16 J19_2.npy
     /mnt/voyage/Datasets_pathology/GT_2023/LesionFinder/SRI_OSCC/Case 1 J10_1.npy
     /mnt/voyage/Datasets_pathology/GT_2023/LesionFinder/SRI_OSCC/Case 4 F33_3.npy
     /mnt/voyage/Datasets_pathology/GT_2023/LesionFinder/SRI_OSCC/Case 12 G4_0.npy


NOTE: Using register_dataset function (new) in ROI_utils
TODO: Move functions to utils_v1 under ubteacher module

In [38]:
# Dataset registration
reg_name = 'LesionFinder_val'
print(f"Registering {reg_name} to DatasetCatalog...")
# Fake a cat_map since its not currently used
cat_map = {'lesion': 0}
try:
    ROI_utils.TrainHelper().register_dataset(reg_name, val_data, cat_map)
    dataset_name = list(DatasetCatalog.items())[-1][0]
    print(f"The dataset {dataset_name} is now registered")
except:
    print(f"There was a problem with dataset registeration")
# Set prediction class name
ROI_metadata = MetadataCatalog.get(dataset_name).set(thing_classes=['lesion'])


Registering LesionFinder_val to DatasetCatalog...
working on 'ROI_LesionFinder_val'...
The dataset ROI_LesionFinder_val is now registered


In [39]:
reg_name = 'val'
dataset_name = 'ROI_' + reg_name

In [40]:
#model_path_parent = os.path.join(sys.path[7], "output")
#model_path = os.path.join(model_path_parent, "model_0094999.pth")
#output_dir = os.path.join(model_path_parent, "test_eval")
# Set detection configs
print("\nSetting detection configs...")
print("Loading model...")
cfg = get_cfg()
add_ubteacher_config(cfg)

# Added config args.  if the cfg doesn't merge properly

cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER = 0.0
cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES = 1
cfg.MODEL.ROI_BOX_HEAD.USE_FED_LOSS = False
cfg.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE = False
cfg.SOLVER.BASE_LR_END = 0.005
cfg.SOLVER.NUM_DECAYS = 1
cfg.SOLVER.RESCALE_INTERVAL = False

cfg.PARENTDIR = ""
cfg.DETECTION_MODE = ""
cfg.DATASEED = ""
cfg.MODEL.ROI_BOX_HEAD.BATCH_SIZE_PER_IMAGE = 10
cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 1
cfg.MODEL.RPN.NUM_CLASSES = 1
cfg.merge_from_file(os.path.join(model_path_parent, "config.yaml"))
cfg.MODEL.WEIGHTS = os.path.join(model_path_parent, "model_0039999.pth") #changed by Ben 8pm 10/13
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.DATASETS.TEST = dataset_name


Setting detection configs...
Loading model...


In [41]:
inf_helper = eval_utils.InferenceHelper(cfg)
cat_map = {'lesion': 0}
# Optional registration with ROI_detection2 inference logic

print(f"Registering dataset '{dataset_name}'...")
TrainHelper().register_dataset(
    reg_name, val_data, cat_map
)
ROI_metadata = MetadataCatalog.get(dataset_name)
cfg.DATASETS.TEST = dataset_name
dataloader = build_detection_test_loader(
    cfg, cfg.DATASETS.TEST, mapper=eval_utils.EvalHelper.validation_mapper
)

Registering dataset 'ROI_val'...
working on 'ROI_val'...


AssertionError: Dataset 'ROI_val' is already registered!

In [42]:
# Setup dataloader
# dataloader = UBRCNNTeacherTrainer.build_test_loader(cfg, dataset_name)
# Initialize the model
model = DefaultPredictor(cfg).model
model.eval()
eval_dataset = cfg.DATASETS.TEST
thing_classes = ROI_metadata.thing_classes

Some model parameters or buffers are not found in the checkpoint:
[34mbackbone.bottom_up.res2.0.conv1.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.0.conv1.weight[0m
[34mbackbone.bottom_up.res2.0.conv2.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.0.conv2.weight[0m
[34mbackbone.bottom_up.res2.0.conv3.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.0.conv3.weight[0m
[34mbackbone.bottom_up.res2.0.shortcut.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.0.shortcut.weight[0m
[34mbackbone.bottom_up.res2.1.conv1.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.1.conv1.weight[0m
[34mbackbone.bottom_up.res2.1.conv2.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.1.conv2.weight[0m
[34mbackbone.bottom_up.res2.1.conv3.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.1.conv3.weight[0m
[34mbackbone.bottom_up.res2.2.conv1.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.2.conv1.weight[0m
[34mbackbone.bottom_up.res2.2.conv2.norm.{bias, weight}

In [43]:
# Prepare coco format json
convert_to_coco_json(eval_dataset, output_dir)

print("Begin evaluation using the COCO API...")
coco_evaluator = COCOEvaluator(
    dataset_name=eval_dataset,
    #tasks = ('bbox'),
    distributed=False,
    output_dir=output_dir,
    #max_dets_per_image=10,
)

[32m[10/15 15:45:14 __main__]: [0mRemoving previously cached coco files...
[32m[10/15 15:45:14 __main__]: [0mRemoved /mnt/MT8/SynologyDrive/GT_2023/coco_instances_results.json
[32m[10/15 15:45:14 __main__]: [0mRemoved /mnt/MT8/SynologyDrive/GT_2023/ROI_val_coco_format.json
[32m[10/15 15:45:14 d2.data.datasets.coco]: [0mConverting dataset dicts into COCO format
[32m[10/15 15:45:14 d2.data.datasets.coco]: [0mConversion finished, #images: 114, #annotations: 56
/mnt/MT8/SynologyDrive/GT_2023/ROI_val_coco_format.json
Begin evaluation using the COCO API...


In [44]:
coco_evaluator.reset()

# Perform inference with stats
predictor = DefaultPredictor(cfg)

for inputs in dataloader:
    #test_img_id = inputs[0]['image_id']
    #print(test_img_id)
    #input_img = inputs[0]['image'].permute(1, 2, 0).numpy() # same as channel_last
    with torch.no_grad():
        preds = model(inputs)
    
    # Accumulate COCO evaluation metrics
    # NOTE If get numpy error, update pycocotools to 2.0.7
    coco_evaluator.process(inputs, preds)
    
coco_results = coco_evaluator.evaluate()

Some model parameters or buffers are not found in the checkpoint:
[34mbackbone.bottom_up.res2.0.conv1.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.0.conv1.weight[0m
[34mbackbone.bottom_up.res2.0.conv2.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.0.conv2.weight[0m
[34mbackbone.bottom_up.res2.0.conv3.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.0.conv3.weight[0m
[34mbackbone.bottom_up.res2.0.shortcut.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.0.shortcut.weight[0m
[34mbackbone.bottom_up.res2.1.conv1.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.1.conv1.weight[0m
[34mbackbone.bottom_up.res2.1.conv2.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.1.conv2.weight[0m
[34mbackbone.bottom_up.res2.1.conv3.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.1.conv3.weight[0m
[34mbackbone.bottom_up.res2.2.conv1.norm.{bias, weight}[0m
[34mbackbone.bottom_up.res2.2.conv1.weight[0m
[34mbackbone.bottom_up.res2.2.conv2.norm.{bias, weight}

[32m[10/15 15:45:31 d2.evaluation.coco_evaluation]: [0mPreparing results for COCO format ...
[32m[10/15 15:45:31 d2.evaluation.coco_evaluation]: [0mSaving results to /mnt/MT8/SynologyDrive/GT_2023/coco_instances_results.json
[32m[10/15 15:45:31 d2.evaluation.coco_evaluation]: [0mEvaluating predictions with unofficial COCO API...


In [45]:
for inputs in dataloader:
    test_img_id = inputs[0]['image_id']
    print(test_img_id)
    img = (inputs[0]['image'].permute(1,2,0).numpy())
    print(np.shape(img))
    plt.imshow(img)
    plt.show(); plt.close()
    break

Case 23 G5_1
(2569, 1822, 3)


In [46]:
with torch.no_grad():
    outputs = model([inputs[0]])
instances = outputs[0]["instances"].to('cpu')
print(instances)

Instances(num_instances=0, image_height=2569, image_width=1822, fields=[pred_boxes: Boxes(tensor([], size=(0, 4))), scores: tensor([]), pred_classes: tensor([], dtype=torch.int64), pred_boxes_std: tensor([], size=(0, 4))])
