## 以 rfcos 为例，下载rfcos的权重，用模型进行推理，保存推理的结果

In [None]:
import os
import mim

from mmengine.config import Config
from mmengine.evaluator import DumpResults
from mmengine.runner import Runner

from mmrotate.utils import register_all_modules
from mmdet.utils import register_all_modules as register_all_modules_mmdet


def monkey_patch_of_collections_typehint_for_mmrotate1x():
    import collections
    from collections.abc import Mapping, Sequence, Iterable
    collections.Mapping = Mapping
    collections.Sequence = Sequence
    collections.Iterable = Iterable

monkey_patch_of_collections_typehint_for_mmrotate1x()

register_all_modules_mmdet(init_default_scope=False)
register_all_modules(init_default_scope=False)

model_name = 'rotated-fcos-le90_r50_fpn_1x_dota'
save_root = './eval_rfcos/'
ckpt_fname = mim.download('mmrotate', [model_name], dest_root=os.path.join(save_root, model_name))[0]

cfg_fpath = os.path.join(save_root, model_name, f'{model_name}.py')
ckpt_fpath = os.path.join(save_root, model_name, ckpt_fname)

cfg = Config.fromfile(cfg_fpath)
cfg.launcher = "none"
cfg.work_dir = save_root
cfg.load_from = ckpt_fpath
cfg.test_dataloader.dataset.data_root = "../playground/data/split_ss_dota"

# test on trainval set
runner = Runner.from_cfg(cfg)
runner.test_evaluator.metrics.append(
    DumpResults(out_file_path=os.path.join(save_root, model_name, 'results_trainval.pkl')))
runner.test()

# test on test set
cfg.test_dataloader.dataset.ann_file = ''
cfg.test_dataloader.dataset.data_prefix = dict(img_path='test/images/')
cfg.test_dataloader.dataset.pipeline = cfg.test_pipeline
cfg.test_evaluator = dict(
    type='DOTAMetric',
    format_only=True,
    merge_patches=True,
    outfile_prefix=os.path.join(save_root, model_name, 'test'))
runner = Runner.from_cfg(cfg)
runner.test_evaluator.metrics.append(
    DumpResults(out_file_path=os.path.join(save_root, model_name, 'results_test.pkl')))

runner.test()

## 查看保存的pkl结果的格式

目测就是 datasample dump成字典了

In [8]:
import os
import pickle

model_name = 'rotated-fcos-le90_r50_fpn_1x_dota'
save_root = './eval_rfcos/'
with open(os.path.join(save_root, model_name, 'results_trainval.pkl'), 'rb') as f:
    results_trainval = pickle.load(f)
with open(os.path.join(save_root, model_name, 'results_test.pkl'), 'rb') as f:
    results_test = pickle.load(f)
print(results_trainval[0].keys())
print(results_test[0].keys())

dict_keys(['scale_factor', 'pad_shape', 'batch_input_shape', 'ori_shape', 'img_id', 'img_shape', 'img_path', 'pred_instances', 'ignored_instances', 'gt_instances'])
dict_keys(['img_path', 'img_shape', 'pad_shape', 'scale_factor', 'img_id', 'ori_shape', 'batch_input_shape', 'pred_instances', 'gt_instances', 'ignored_instances'])


## 设定一个初始化评测器的函数

In [9]:
import os
import torch
from mmengine.fileio import load
from mmengine.evaluator import Evaluator
from mmrotate.evaluation import DOTAMetric

model_name = 'rotated-fcos-le90_r50_fpn_1x_dota'
save_root = './eval_rfcos/'

def prepare_dota_evaluator():
    def monkey_patch_of_collections_typehint_for_mmrotate1x():
        import collections
        from collections.abc import Mapping, Sequence, Iterable
        collections.Mapping = Mapping
        collections.Sequence = Sequence
        collections.Iterable = Iterable

    monkey_patch_of_collections_typehint_for_mmrotate1x()

    evaluator = Evaluator(DOTAMetric(metric="mAP"))
    evaluator.dataset_meta = {
        'classes':
        ('plane', 'baseball-diamond', 'bridge', 'ground-track-field',
            'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
            'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
            'harbor', 'swimming-pool', 'helicopter'),
        # palette is a list of color tuples, which is used for visualization.
        'palette': [(165, 42, 42), (189, 183, 107), (0, 255, 0), (255, 0, 0),
                    (138, 43, 226), (255, 128, 0), (255, 0, 255),
                    (0, 255, 255), (255, 193, 193), (0, 51, 153),
                    (255, 250, 205), (0, 139, 139), (255, 255, 0),
                    (147, 116, 116), (0, 0, 255)]
    }
    return evaluator

## 该检查点的正常的评测结果

In [16]:
evaluator = prepare_dota_evaluator()

results_trainval = load(os.path.join(save_root, model_name, 'results_trainval.pkl'))   
evaluator.offline_evaluate(data_samples=results_trainval, chunk_size=128)

10/25 16:46:51 - mmengine - [4m[97mINFO[0m - 
---------------iou_thr: 0.5---------------
[2024-10-25 16:46:54,270] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-25 16:46:54,361] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-25 16:46:54,403] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-25 16:46:54,411] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
10/25 16:47:36 - mmengine - [4m[97mINFO[0m - 
+--------------------+-------+--------+--------+-------+
| class              | gts   | dets   | recall | ap    |
+--------------------+-------+--------+--------+-------+
| plane              | 18788 | 41274  | 0.971  | 0.904 |
| baseball-diamond   | 1087  | 20502  | 0.947  | 0.804 |
| bridge             | 4183  | 74732  | 0.819  | 0.602 |
| ground-track-field | 733   | 15477 

{'dota/mAP': 0.8031923174858093, 'dota/AP50': 0.803}

## 所有score强制为1的时候的评测结果

In [18]:
evaluator = prepare_dota_evaluator()

results_trainval = load(os.path.join(save_root, model_name, 'results_trainval.pkl'))   
for res in results_trainval:
    res["pred_instances"]["scores"] = torch.ones_like(res["pred_instances"]["scores"])
evaluator.offline_evaluate(data_samples=results_trainval, chunk_size=128)

10/25 16:53:36 - mmengine - [4m[97mINFO[0m - 
---------------iou_thr: 0.5---------------
[2024-10-25 16:53:38,885] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-25 16:53:39,094] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-25 16:53:39,140] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-25 16:53:39,153] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
10/25 16:54:22 - mmengine - [4m[97mINFO[0m - 
+--------------------+-------+--------+--------+-------+
| class              | gts   | dets   | recall | ap    |
+--------------------+-------+--------+--------+-------+
| plane              | 18788 | 41274  | 0.971  | 0.425 |
| baseball-diamond   | 1087  | 20502  | 0.947  | 0.048 |
| bridge             | 4183  | 74732  | 0.819  | 0.038 |
| ground-track-field | 733   | 15477 

{'dota/mAP': 0.16053149104118347, 'dota/AP50': 0.161}

## 用score设定阈值过滤一遍后，把剩下的score设成1的结果

In [24]:
import pandas
results = {}
for threshold in (0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.7):
    for hack_score in [True, False]:
        print("".join(["="*10, f" score threshold={threshold} ", "="*10]))

        evaluator = prepare_dota_evaluator()

        results_trainval = load(os.path.join(save_root, model_name, 'results_trainval.pkl'))   
        for res in results_trainval:
            keep = res["pred_instances"]["scores"] > threshold
            res["pred_instances"]["scores"] = res["pred_instances"]["scores"][keep]
            res["pred_instances"]["labels"] = res["pred_instances"]["labels"][keep]
            res["pred_instances"]["bboxes"] = res["pred_instances"]["bboxes"][keep]
            
            if hack_score:
                res["pred_instances"]["scores"] = torch.ones_like(res["pred_instances"]["scores"])
                
        map = evaluator.offline_evaluate(data_samples=results_trainval, chunk_size=128)['dota/mAP']
        
        if threshold not in results:
            results[threshold] = {}
        if hack_score:
            results[threshold]["score=1"] = map
        else:
            results[threshold]["score=score"] = map
            
print(pandas.DataFrame.from_dict(results).T)



10/25 17:12:54 - mmengine - [4m[97mINFO[0m - 
---------------iou_thr: 0.5---------------
[2024-10-25 17:12:57,466] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-25 17:12:57,602] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-25 17:12:57,713] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-25 17:12:57,741] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
10/25 17:13:36 - mmengine - [4m[97mINFO[0m - 
+--------------------+-------+-------+--------+-------+
| class              | gts   | dets  | recall | ap    |
+--------------------+-------+-------+--------+-------+
| plane              | 18788 | 21623 | 0.960  | 0.777 |
| baseball-diamond   | 1087  | 3720  | 0.940  | 0.257 |
| bridge             | 4183  | 10496 | 0.787  | 0.229 |
| ground-track-field | 733   | 2450  | 0.79

结论：
1. 在有score的情况下，设定阈值过滤掉低分的目标会导致掉点，mAP似乎会容忍低分的目标作为一些gt的填补，是否是会更在乎recall而容忍低分和虚警？
2. 如果score置成1的话，不应该不进行任何筛选，但也不应该进行过于严格的筛选
3. 这里面实验结果最好的是 0.25 的阈值下

我目前的印象里，score会影响的似乎主要是最一开始的一个sort（还要再回一下），全给1的话说不定这个sort就会摆烂或者相当于随机，这可能扰乱了map的sort导致了掉点
重要的是，这说明很可能这个下降的程度要取决于sort被影响的程度，或者说，这个掉点的量很可能是有随机性的
todo：后面可以探索一下这个随机性对于map的值的影响，试试看能不能衡量出一个平均值和方差看看这个掉点大概是啥样的，或者说还有没有可能会有小丑涨点的状况

## 对test也做了过滤和置1的操作，用trainval里效果最好的那个阈值

In [27]:
model_name = 'rotated-fcos-le90_r50_fpn_1x_dota'

evaluator = Evaluator(
    DOTAMetric(format_only=True, 
               merge_patches=True, 
               outfile_prefix=os.path.join(save_root, model_name, 'test_0.2above-score=1'))
)

evaluator.dataset_meta = {
    'classes':
    ('plane', 'baseball-diamond', 'bridge', 'ground-track-field',
        'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
        'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
        'harbor', 'swimming-pool', 'helicopter'),
    # palette is a list of color tuples, which is used for visualization.
    'palette': [(165, 42, 42), (189, 183, 107), (0, 255, 0), (255, 0, 0),
                (138, 43, 226), (255, 128, 0), (255, 0, 255),
                (0, 255, 255), (255, 193, 193), (0, 51, 153),
                (255, 250, 205), (0, 139, 139), (255, 255, 0),
                (147, 116, 116), (0, 0, 255)]
}

results_test = load(os.path.join(save_root, model_name, 'results_test.pkl'))   
for res in results_test:
    keep = res["pred_instances"]["scores"] > 0.25
    res["pred_instances"]["scores"] = res["pred_instances"]["scores"][keep]
    res["pred_instances"]["labels"] = res["pred_instances"]["labels"][keep]
    res["pred_instances"]["bboxes"] = res["pred_instances"]["bboxes"][keep]
    res["pred_instances"]["scores"] = torch.ones_like(res["pred_instances"]["scores"])
    
evaluator.offline_evaluate(data_samples=results_test, chunk_size=128)

10/25 17:37:28 - mmengine - [4m[97mINFO[0m - The submission file save at ./eval_rfcos/rotated-fcos-le90_r50_fpn_1x_dota/test_0.2above-score=1/test_0.2above-score=1.zip


{}

送给 dota evaluation 查看实验结果

rotated fcos baseline:

```python
mAP: 0.7123079788356041
ap of each class: plane:0.8916182684809109, baseball-diamond:0.7191643582273857, bridge:0.48042269222436457, ground-track-field:0.6203818542325392, small-vehicle:0.7925815334022364, large-vehicle:0.7362002200598536, ship:0.8573985143996952, tennis-court:0.9090227613465995, basketball-court:0.8066071522146891, storage-tank:0.8431662211826575, soccer-ball-field:0.5939472468454459, roundabout:0.6290973735033039, harbor:0.6204171011301067, swimming-pool:0.6981793541941752, helicopter:0.48641503109009765
COCO style result:
AP50: 0.7123079788356041
AP75: 0.3700845169417177
mAP: 0.3940536185583186
```

Rotated FCOS 的测试 设定了0.25的score阈值，并把剩下的score全重置为1

```python
mAP: 0.5891609083502711
ap of each class: plane:0.7450649608789142, baseball-diamond:0.6488683433830279, bridge:0.3317678182505998, ground-track-field:0.4458891577699675, small-vehicle:0.6044638167353917, large-vehicle:0.5844260350465345, ship:0.7432801107048516, tennis-court:0.8874432749891452, basketball-court:0.7096262298134566, storage-tank:0.6887741046831954, soccer-ball-field:0.45957920409052394, roundabout:0.46982105429960336, harbor:0.49046957808843145, swimming-pool:0.5657295197554624, helicopter:0.4622104167649613
COCO style result:
AP50: 0.5891609083502711
AP75: 0.27821359548046143
mAP: 0.31064270094235924
```

## 尝试把置1的操作换成seed不同的randomness

In [10]:
import pandas
import random
results = {}
for threshold in (0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.7):
    for hack_score in ["score", "1", 
                       "rnd:42", "rnd:666", "rnd:6666", "rnd:66666", "rnd:666666", "rnd:2024", 
                       "rnd:9968", "rnd:0", "rnd:1", "rnd:123"]:
        print("".join(["="*10, f" score threshold={threshold} ", "="*10]))

        evaluator = prepare_dota_evaluator()

        results_trainval = load(os.path.join(save_root, model_name, 'results_trainval.pkl'))   
        for res in results_trainval:
            keep = res["pred_instances"]["scores"] > threshold
            res["pred_instances"]["scores"] = res["pred_instances"]["scores"][keep]
            res["pred_instances"]["labels"] = res["pred_instances"]["labels"][keep]
            res["pred_instances"]["bboxes"] = res["pred_instances"]["bboxes"][keep]
            
            if hack_score == "1":
                res["pred_instances"]["scores"] = torch.ones_like(res["pred_instances"]["scores"])
            elif hack_score.startswith("rnd:"):
                seed = hack_score.split(":")[-1]
                random.seed(seed)
                torch.manual_seed(seed)
                res["pred_instances"]["scores"] = torch.rand_like(res["pred_instances"]["scores"])
                
        map = evaluator.offline_evaluate(data_samples=results_trainval, chunk_size=128)['dota/mAP']
        
        if threshold not in results:
            results[threshold] = {}
        results[threshold][hack_score] = map
            
    random_values = [v for k, v in results[threshold].items() if k.startswith("rnd:")]
    results[threshold]["rnd:max"] = max(random_values)
    results[threshold]["rnd:min"] = min(random_values)
    results[threshold]["rnd:mean"] = sum(random_values) / len(random_values)
            
print(pandas.DataFrame.from_dict(results).T)



10/29 16:37:36 - mmengine - [4m[97mINFO[0m - 
---------------iou_thr: 0.5---------------
[2024-10-29 16:37:39,709] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-29 16:37:39,879] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-29 16:37:39,889] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-10-29 16:37:39,922] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
10/29 16:38:20 - mmengine - [4m[97mINFO[0m - 
+--------------------+-------+-------+--------+-------+
| class              | gts   | dets  | recall | ap    |
+--------------------+-------+-------+--------+-------+
| plane              | 18788 | 21623 | 0.960  | 0.904 |
| baseball-diamond   | 1087  | 3720  | 0.940  | 0.804 |
| bridge             | 4183  | 10496 | 0.787  | 0.582 |
| ground-track-field | 733   | 2450  | 0.79

In [12]:
for threshold in (0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.7):
    random_values = [v for k, v in results[threshold].items() if k.startswith("rnd:")]
    # results[threshold]["rnd:mean"] = sum(random_values) / len(random_values)
    results[threshold]["rnd:std"] = sum((v - results[threshold]["rnd:mean"])**2 for v in random_values) / len(random_values)

df = pandas.DataFrame.from_dict(results).T
df.to_csv(os.path.join(save_root, model_name, 'results_trainval_score_threshold.csv'))
print(df)

         score         1    rnd:42   rnd:666  rnd:6666  rnd:66666  rnd:666666  \
0.10  0.796567  0.473128  0.485691  0.470060  0.475408   0.460204    0.494141   
0.20  0.764042  0.627359  0.636019  0.637143  0.631079   0.629365    0.637885   
0.25  0.737540  0.652667  0.658215  0.656413  0.653548   0.652213    0.654050   
0.30  0.698156  0.648651  0.649108  0.649977  0.649677   0.649729    0.650873   
0.35  0.639825  0.611792  0.611264  0.610505  0.612490   0.612733    0.612868   
0.40  0.561349  0.547297  0.549103  0.545866  0.547176   0.548946    0.547503   
0.50  0.353396  0.350561  0.350529  0.350633  0.350528   0.350815    0.350507   
0.70  0.090824  0.090796  0.090824  0.090806  0.090823   0.090855    0.090807   

      rnd:2024  rnd:9968     rnd:0     rnd:1   rnd:123   rnd:max   rnd:min  \
0.10  0.475788  0.483125  0.480347  0.480462  0.464385  0.494141  0.460204   
0.20  0.629501  0.633367  0.632105  0.631837  0.628300  0.637885  0.628300   
0.25  0.652948  0.655909  0.657424  

说明随机性确实会影响结果，但是结果似乎没有造成很大的方差。有意思的是，效果最好时候（0.25阈值）的时候标准差最大，也就是最不稳定。但是每一列又是0.25的地方最好

奇怪，这里还是有nms的，测试集在 merge results 的时候

TODO：想想怎么解释下
https://github.com/open-mmlab/mmrotate/blob/d50ab767fd0a6852e7931ac8e3a759f9605d659a/mmrotate/evaluation/metrics/dota_metric.py#L163-L172

## 其他权重的测试集结果

In [2]:
# Downloaded checkpoints for DOTA dataset
tasks = (
    "rotated-fcos-le90_r50_fpn_1x_dota", 
    "rotated-retinanet-rbox-le90_r50_fpn_1x_dota",
    # "sasm-reppoints-qbox_r50_fpn_1x_dota",
    # "cfa-qbox_r50_fpn_1x_dota",
    # "r3det-oc_r50_fpn_1x_dota",
    # "rotated_atss_obb_r50_fpn_1x_dota_le90",
    # "rotated-faster-rcnn-le90_r50_fpn_1x_dota",
    # "oriented-rcnn-le90_r50_fpn_1x_dota",
    # "roi-trans-le90_r50_fpn_1x_dota",
    # "roi-trans-le90_swin-tiny_fpn_1x_dota",
)

import os
import mim
import pandas

import torch

from mmengine.fileio import load
from mmengine.config import Config
from mmengine.evaluator import DumpResults, Evaluator
from mmengine.runner import Runner

from mmrotate.evaluation import DOTAMetric
from mmrotate.utils import register_all_modules
from mmdet.utils import register_all_modules as register_all_modules_mmdet


def monkey_patch_of_collections_typehint_for_mmrotate1x():
    import collections
    from collections.abc import Mapping, Sequence, Iterable
    collections.Mapping = Mapping
    collections.Sequence = Sequence
    collections.Iterable = Iterable

monkey_patch_of_collections_typehint_for_mmrotate1x()

register_all_modules_mmdet(init_default_scope=False)
register_all_modules(init_default_scope=False)


def prepare_dota_evaluator():
    evaluator = Evaluator(DOTAMetric(metric="mAP"))
    evaluator.dataset_meta = {
        'classes':
        ('plane', 'baseball-diamond', 'bridge', 'ground-track-field',
         'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
         'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
         'harbor', 'swimming-pool', 'helicopter'),
    }
    return evaluator


def download_and_inference_pretrained_model(model_name, save_root):
    ckpt_fname = mim.download('mmrotate', [model_name], dest_root=os.path.join(save_root, model_name))[0]

    cfg_fpath = os.path.join(save_root, model_name, f'{model_name}.py')
    ckpt_fpath = os.path.join(save_root, model_name, ckpt_fname)

    cfg = Config.fromfile(cfg_fpath)
    cfg.launcher = "none"
    cfg.work_dir = save_root
    cfg.load_from = ckpt_fpath
    cfg.test_dataloader.dataset.data_root = "../playground/data/split_ss_dota"

    # inference on trainval set
    runner = Runner.from_cfg(cfg)
    runner.test_evaluator.metrics.append(
        DumpResults(out_file_path=os.path.join(save_root, model_name, 'results_trainval.pkl')))
    runner.test()

    # inference on test set
    cfg.test_dataloader.dataset.ann_file = ''
    cfg.test_dataloader.dataset.data_prefix = dict(img_path='test/images/')
    cfg.test_dataloader.dataset.pipeline = cfg.test_pipeline
    cfg.test_evaluator = dict(
        type='DOTAMetric',
        format_only=True,
        merge_patches=True,
        outfile_prefix=os.path.join(save_root, model_name, 'test'))
    runner = Runner.from_cfg(cfg)
    runner.test_evaluator.metrics.append(
        DumpResults(out_file_path=os.path.join(save_root, model_name, 'results_test.pkl')))

    runner.test()
    

def get_trainval_results_of_different_settings(model_name, save_root):
    results = {}
    for threshold in (0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.7):
        for hack_score in [True, False]:
            print("".join(["="*10, f" score threshold={threshold} ", "="*10]))

            evaluator = prepare_dota_evaluator()

            results_trainval = load(os.path.join(save_root, model_name, 'results_trainval.pkl'))   
            for res in results_trainval:
                keep = res["pred_instances"]["scores"] > threshold
                res["pred_instances"]["scores"] = res["pred_instances"]["scores"][keep]
                res["pred_instances"]["labels"] = res["pred_instances"]["labels"][keep]
                res["pred_instances"]["bboxes"] = res["pred_instances"]["bboxes"][keep]
                
                if hack_score:
                    res["pred_instances"]["scores"] = torch.ones_like(res["pred_instances"]["scores"])
                    
            map = evaluator.offline_evaluate(data_samples=results_trainval, chunk_size=128)['dota/mAP']
            
            if threshold not in results:
                results[threshold] = {}
            if hack_score:
                results[threshold]["score=1"] = map
            else:
                results[threshold]["score=score"] = map
                
    results = pandas.DataFrame.from_dict(results).T
    print(results)
    results.to_csv(os.path.join(save_root, model_name, 'results_trainval_score_threshold.csv'))
    return results
    
# for model_name in tasks:  # TODO: de-comment these lines to download and inference the pretrained model
#     save_root = f"./eval_mmrotate/{model_name}"
#     download_and_inference_pretrained_model(model_name, save_root)
    
for model_name in tasks:
    save_root = f"./eval_mmrotate/{model_name}"
    get_trainval_results_of_different_settings(model_name, save_root)

processing rotated-fcos-le90_r50_fpn_1x_dota...


Output()

[32mSuccessfully downloaded rotated_fcos_r50_fpn_1x_dota_le90-d87568ed.pth to /home/lqy/florence-dota/scripts_py/eval_mmrotate/rotated-fcos-le90_r50_fpn_1x_dota/rotated-fcos-le90_r50_fpn_1x_dota[0m
[32mSuccessfully dumped rotated-fcos-le90_r50_fpn_1x_dota.py to /home/lqy/florence-dota/scripts_py/eval_mmrotate/rotated-fcos-le90_r50_fpn_1x_dota/rotated-fcos-le90_r50_fpn_1x_dota[0m
01/04 13:51:47 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.12 (main, Jul  5 2023, 18:54:27) [GCC 11.2.0]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 1500387243
    GPU 0,1,2,3,4: NVIDIA GeForce RTX 4090
    CUDA_HOME: /usr/local/cuda-12.1
    NVCC: Cuda compilation tools, release 12.1, V12.1.105
    GCC: gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
    PyTorch: 2.3.0+cu121
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) o

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


01/04 13:51:55 - mmengine - [4m[97mINFO[0m - Epoch(test) [   50/21046]    eta: 0:23:09  time: 0.0662  data_time: 0.0181  memory: 434  
01/04 13:51:56 - mmengine - [4m[97mINFO[0m - Epoch(test) [  100/21046]    eta: 0:15:40  time: 0.0236  data_time: 0.0018  memory: 434  
01/04 13:51:58 - mmengine - [4m[97mINFO[0m - Epoch(test) [  150/21046]    eta: 0:12:54  time: 0.0213  data_time: 0.0017  memory: 434  
01/04 13:51:59 - mmengine - [4m[97mINFO[0m - Epoch(test) [  200/21046]    eta: 0:11:29  time: 0.0211  data_time: 0.0016  memory: 434  
01/04 13:52:00 - mmengine - [4m[97mINFO[0m - Epoch(test) [  250/21046]    eta: 0:10:36  time: 0.0207  data_time: 0.0015  memory: 434  
01/04 13:52:01 - mmengine - [4m[97mINFO[0m - Epoch(test) [  300/21046]    eta: 0:10:00  time: 0.0208  data_time: 0.0015  memory: 434  
01/04 13:52:02 - mmengine - [4m[97mINFO[0m - Epoch(test) [  350/21046]    eta: 0:09:37  time: 0.0214  data_time: 0.0015  memory: 434  
01/04 13:52:03 - mmengine - [4m[



01/04 14:00:50 - mmengine - [4m[97mINFO[0m - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
01/04 14:00:50 - mmengine - [4m[97mINFO[0m - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) RuntimeInfoHook                    
(BELOW_NORMAL) LoggerHook                         
 -------------------- 
before_train:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(VERY_LOW    ) CheckpointHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(NORMAL      ) DistSamplerSeedHook                
 -------------------- 
before_train_iter:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
 -------------------- 
after_train_iter:
(VERY_HIGH   ) Runti

Output()

[32mSuccessfully downloaded rotated_retinanet_obb_r50_fpn_1x_dota_le90-c0097bc4.pth to /home/lqy/florence-dota/scripts_py/eval_mmrotate/rotated-retinanet-rbox-le90_r50_fpn_1x_dota/rotated-retinanet-rbox-le90_r50_fpn_1x_dota[0m
[32mSuccessfully dumped rotated-retinanet-rbox-le90_r50_fpn_1x_dota.py to /home/lqy/florence-dota/scripts_py/eval_mmrotate/rotated-retinanet-rbox-le90_r50_fpn_1x_dota/rotated-retinanet-rbox-le90_r50_fpn_1x_dota[0m
01/04 14:05:16 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.12 (main, Jul  5 2023, 18:54:27) [GCC 11.2.0]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 681878584
    GPU 0,1,2,3,4: NVIDIA GeForce RTX 4090
    CUDA_HOME: /usr/local/cuda-12.1
    NVCC: Cuda compilation tools, release 12.1, V12.1.105
    GCC: gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
    PyTorch: 2.3.0+cu121
    PyTorch compiling details: PyTorch buil



01/04 14:05:27 - mmengine - [4m[97mINFO[0m - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
01/04 14:05:27 - mmengine - [4m[97mINFO[0m - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) RuntimeInfoHook                    
(BELOW_NORMAL) LoggerHook                         
 -------------------- 
before_train:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(VERY_LOW    ) CheckpointHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(NORMAL      ) DistSamplerSeedHook                
 -------------------- 
before_train_iter:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
 -------------------- 
after_train_iter:
(VERY_HIGH   ) Runti



01/04 14:15:30 - mmengine - [4m[97mINFO[0m - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
01/04 14:15:30 - mmengine - [4m[97mINFO[0m - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) RuntimeInfoHook                    
(BELOW_NORMAL) LoggerHook                         
 -------------------- 
before_train:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(VERY_LOW    ) CheckpointHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(NORMAL      ) DistSamplerSeedHook                
 -------------------- 
before_train_iter:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
 -------------------- 
after_train_iter:
(VERY_HIGH   ) Runti

In [7]:
# Downloaded checkpoints for DIOR dataset

tasks = (
    "rotated-retinanet-rbox-le90_r50_fpn_1x_dior", 
)

import os
import mim
import pandas

import torch

from mmengine.fileio import load
from mmengine.config import Config
from mmengine.evaluator import DumpResults, Evaluator
from mmengine.runner import Runner

from mmrotate.evaluation import DOTAMetric
from mmrotate.utils import register_all_modules
from mmdet.utils import register_all_modules as register_all_modules_mmdet


def monkey_patch_of_collections_typehint_for_mmrotate1x():
    import collections
    from collections.abc import Mapping, Sequence, Iterable
    collections.Mapping = Mapping
    collections.Sequence = Sequence
    collections.Iterable = Iterable

monkey_patch_of_collections_typehint_for_mmrotate1x()

register_all_modules_mmdet(init_default_scope=False)
register_all_modules(init_default_scope=False)


def prepare_evaluator():
    evaluator = Evaluator(DOTAMetric(metric="mAP"))
    evaluator.dataset_meta = {
        'classes':
        ('airplane', 'airport', 'baseballfield', 'basketballcourt', 'bridge',
         'chimney', 'expressway-service-area', 'expressway-toll-station',
         'dam', 'golffield', 'groundtrackfield', 'harbor', 'overpass', 'ship',
         'stadium', 'storagetank', 'tenniscourt', 'trainstation', 'vehicle', 'windmill'),
    }
    return evaluator


def download_and_inference_pretrained_model(model_name, save_root):
    ckpt_fname = mim.download('mmrotate', [model_name], dest_root=os.path.join(save_root, model_name))[0]

    cfg_fpath = os.path.join(save_root, model_name, f'{model_name}.py')
    ckpt_fpath = os.path.join(save_root, model_name, ckpt_fname)

    cfg = Config.fromfile(cfg_fpath)
    cfg.launcher = "none"
    cfg.work_dir = save_root
    cfg.load_from = ckpt_fpath
    cfg.val_dataloader.dataset.data_root = "../playground/data/DIOR"
    cfg.test_dataloader.dataset.data_root = "../playground/data/DIOR"

    # inference on test set
    runner = Runner.from_cfg(cfg)
    runner.test_evaluator.metrics.append(
        DumpResults(out_file_path=os.path.join(save_root, model_name, 'results_test.pkl')))
    runner.test()

    # inference on trainval set
    cfg.test_dataloader = cfg.val_dataloader
    cfg.test_evaluator = cfg.val_evaluator
    runner = Runner.from_cfg(cfg)
    runner.test_evaluator.metrics.append(
        DumpResults(out_file_path=os.path.join(save_root, model_name, 'results_trainval.pkl')))
    runner.test()
    

def get_trainval_results_of_different_settings(model_name, save_root):
    results = {}
    for threshold in (0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.7):
        for hack_score in [True, False]:
            print("".join(["="*10, f" score threshold={threshold} ", "="*10]))

            evaluator = prepare_evaluator()

            results_test = load(os.path.join(save_root, model_name, 'results_test.pkl'))   
            for res in results_test:
                keep = res["pred_instances"]["scores"] > threshold
                res["pred_instances"]["scores"] = res["pred_instances"]["scores"][keep]
                res["pred_instances"]["labels"] = res["pred_instances"]["labels"][keep]
                res["pred_instances"]["bboxes"] = res["pred_instances"]["bboxes"][keep]
                
                if hack_score:
                    res["pred_instances"]["scores"] = torch.ones_like(res["pred_instances"]["scores"])
                    
            map = evaluator.offline_evaluate(data_samples=results_test, chunk_size=128)['dota/mAP']
            
            if threshold not in results:
                results[threshold] = {}
            if hack_score:
                results[threshold]["score=1"] = map
            else:
                results[threshold]["score=score"] = map
                
    results = pandas.DataFrame.from_dict(results).T
    print(results)
    results.to_csv(os.path.join(save_root, model_name, 'results_test_score_threshold.csv'))
    return results
    
for model_name in tasks:  # TODO: de-comment these lines to download and inference the pretrained model
    save_root = f"./eval_mmrotate/{model_name}"
    download_and_inference_pretrained_model(model_name, save_root)
    
for model_name in tasks:
    save_root = f"./eval_mmrotate/{model_name}"
    get_trainval_results_of_different_settings(model_name, save_root)

01/04 16:43:05 - mmengine - [4m[97mINFO[0m - 
---------------iou_thr: 0.5---------------
[2025-01-04 16:43:08,228] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2025-01-04 16:43:08,268] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2025-01-04 16:43:08,299] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2025-01-04 16:43:08,334] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
01/04 16:43:29 - mmengine - [4m[97mINFO[0m - 
+-------------------------+-------+-------+--------+-------+
| class                   | gts   | dets  | recall | ap    |
+-------------------------+-------+-------+--------+-------+
| airplane                | 8212  | 7426  | 0.555  | 0.341 |
| airport                 | 666   | 3264  | 0.339  | 0.051 |
| baseballfield           | 3434  | 5778  | 0.771  | 0.355 |
| basketballc

In [None]:
# Checkpoints trained on localhost

tasks = (
    dict(
        work_dir="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_1x_dior-2",
        cfg_fpath="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_1x_dior-2/rotated-fcos-le90_r50_fpn_1x_dior.py",
        ckpt_fpath="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_1x_dior-2/epoch_12.pth",
    ), 
    dict(
        work_dir="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_1x_fair",
        cfg_fpath="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_1x_fair/rotated-fcos-le90_r50_fpn_1x_fair.py",
        ckpt_fpath="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_1x_fair/epoch_12.pth",
    ),
    dict(
        work_dir="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_dota",
        cfg_fpath="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_dota/rotated-retinanet-rbox-le90_r50_fpn_1x_dota.py",
        ckpt_fpath="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_dota/epoch_12.pth",
    ), 
    dict(
        work_dir="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_fair",
        cfg_fpath="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_fair/rotated-retinanet-rbox-le90_r50_fpn_1x_fair.py",
        ckpt_fpath="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_fair/epoch_12.pth",
    ),
    dict(
        work_dir="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_6x_srsdd",
        cfg_fpath="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_6x_srsdd/rotated-fcos-le90_r50_fpn_6x_srsdd.py",
        ckpt_fpath="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_6x_srsdd/epoch_72.pth",
    ), 
    dict(
        work_dir="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_6x_srsdd",
        cfg_fpath="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_6x_srsdd/rotated-retinanet-rbox-le90_r50_fpn_6x_srsdd.py",
        ckpt_fpath="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_6x_srsdd/epoch_72.pth",
    ),
)

import os
import mim
import pandas

import torch

from mmengine.fileio import load
from mmengine.config import Config
from mmengine.evaluator import DumpResults, Evaluator
from mmengine.runner import Runner

from mmrotate.utils import register_all_modules
from mmdet.utils import register_all_modules as register_all_modules_mmdet


def monkey_patch_of_collections_typehint_for_mmrotate1x():
    import collections
    from collections.abc import Mapping, Sequence, Iterable
    collections.Mapping = Mapping
    collections.Sequence = Sequence
    collections.Iterable = Iterable

monkey_patch_of_collections_typehint_for_mmrotate1x()

register_all_modules_mmdet(init_default_scope=False)
register_all_modules(init_default_scope=False)


def prepare_evaluator(dataset_name):
    if dataset_name == "dota":
        from mmrotate.evaluation import DOTAMetric
        evaluator = Evaluator(DOTAMetric(metric="mAP"))
        evaluator.dataset_meta = {
            'classes':
            ('plane', 'baseball-diamond', 'bridge', 'ground-track-field',
             'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
             'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
             'harbor', 'swimming-pool', 'helicopter'),
        }
    elif dataset_name == "fair":
        from lmmrotate.modules.fair_metric import FAIRMetric
        evaluator = Evaluator(FAIRMetric(metric="mAP"))
        evaluator.dataset_meta = {
            'classes':
            ('Boeing737', 'Boeing747', 'Boeing777', 'Boeing787', 'C919', 'A220',
            'A321', 'A330', 'A350', 'ARJ21', 'Passenger Ship', 'Motorboat',
            'Fishing Boat', 'Tugboat', 'Engineering Ship', 'Liquid Cargo Ship',
            'Dry Cargo Ship', 'Warship', 'Small Car', 'Bus', 'Cargo Truck',
            'Dump Truck', 'Van', 'Trailer', 'Tractor', 'Excavator',
            'Truck Tractor', 'Basketball Court', 'Tennis Court', 'Football Field',
            'Baseball Field', 'Intersection', 'Roundabout', 'Bridge'),
        }
    elif dataset_name == "dior":
        from mmrotate.evaluation import DOTAMetric
        evaluator = Evaluator(DOTAMetric(metric="mAP"))
        evaluator.dataset_meta = {
            'classes':
            ('airplane', 'airport', 'baseballfield', 'basketballcourt', 'bridge',
             'chimney', 'expressway-service-area', 'expressway-toll-station',
             'dam', 'golffield', 'groundtrackfield', 'harbor', 'overpass', 'ship',
             'stadium', 'storagetank', 'tenniscourt', 'trainstation', 'vehicle', 'windmill'),
        }
    elif dataset_name == "srsdd":
        from mmrotate.evaluation import RotatedCocoMetric
        evaluator = Evaluator(RotatedCocoMetric(metric='bbox', classwise=True))
        evaluator.dataset_meta = {
            'classes':
            ('Cell-Container', 'Container', 'Dredger', 'Fishing', 'LawEnforce', 'ore-oil'),
        }
    return evaluator


def inference_localhost_model(cfg_fpath, ckpt_fpath, work_dir, **kwargs):
    cfg = Config.fromfile(cfg_fpath)
    cfg.launcher = "none"
    cfg.load_from = ckpt_fpath
    cfg.val_dataloader.dataset.data_root = os.path.join("../", cfg.val_dataloader.dataset.data_root)
    cfg.test_dataloader.dataset.data_root = os.path.join("../", cfg.test_dataloader.dataset.data_root)

    if "dior" in cfg_fpath or "srsdd" in cfg_fpath:
        # inference on test set
        runner = Runner.from_cfg(cfg)
        runner.test_evaluator.metrics.append(
            DumpResults(out_file_path=os.path.join(work_dir, 'results_test.pkl')))
        runner.test()

    elif "fair" in cfg_fpath:
        from lmmrotate.modules.fair_dataset import FAIRDOTADataset
        from lmmrotate.modules.fair_metric import FAIRMetric

        # inference on test set
        cfg.test_evaluator.outfile_prefix = os.path.join(work_dir, 'test')
        runner = Runner.from_cfg(cfg)
        runner.test_evaluator.metrics.append(
            DumpResults(out_file_path=os.path.join(work_dir, 'results_test.pkl')))
        runner.test()

        # inference on trainval set
        cfg.test_dataloader = cfg.val_dataloader
        cfg.test_evaluator = cfg.val_evaluator
        runner = Runner.from_cfg(cfg)
        runner.test_evaluator.metrics.append(
            DumpResults(out_file_path=os.path.join(work_dir, 'results_trainval.pkl')))
        runner.test()

    elif "dota" in cfg_fpath:
        # inference on trainval set
        runner = Runner.from_cfg(cfg)
        runner.test_evaluator.metrics.append(
            DumpResults(out_file_path=os.path.join(work_dir, 'results_trainval.pkl')))
        runner.test()

        # inference on test set
        cfg.test_dataloader.dataset.ann_file = ''
        cfg.test_dataloader.dataset.data_prefix = dict(img_path='test/images/')
        cfg.test_dataloader.dataset.pipeline = cfg.test_pipeline
        cfg.test_evaluator = dict(
            type='DOTAMetric',
            format_only=True,
            merge_patches=True,
            outfile_prefix=os.path.join(work_dir, 'test', 'dota_Task1'))
        runner = Runner.from_cfg(cfg)
        runner.test_evaluator.metrics.append(
            DumpResults(out_file_path=os.path.join(work_dir, 'results_test.pkl')))
        runner.test()
    else:
        raise NotImplementedError
    

def get_results_of_different_settings(work_dir, **kwargs):
    results = {}

    if "dior" in work_dir:
        split, dataset_name = "test", "dior"
    elif "srsdd" in work_dir:
        split, dataset_name = "test", "srsdd"
    elif "fair" in work_dir:
        split, dataset_name = "trainval", "fair"
    elif "dota" in work_dir:
        split, dataset_name = "trainval", "dota"
    else:
        raise NotImplementedError

    for threshold in (0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.7):
        for hack_score in [True, False]:
            print("".join(["="*10, f" score threshold={threshold} ", "="*10]))

            evaluator = prepare_evaluator(dataset_name)

            results_test = load(os.path.join(work_dir, f'results_{split}.pkl'))   
            for res in results_test:
                keep = res["pred_instances"]["scores"] > threshold
                res["pred_instances"]["scores"] = res["pred_instances"]["scores"][keep]
                res["pred_instances"]["labels"] = res["pred_instances"]["labels"][keep]
                res["pred_instances"]["bboxes"] = res["pred_instances"]["bboxes"][keep]
                
                if hack_score:
                    res["pred_instances"]["scores"] = torch.ones_like(res["pred_instances"]["scores"])
                    
            mAP = evaluator.offline_evaluate(data_samples=results_test, chunk_size=128)
            # import ipdb; ipdb.set_trace()
            mAP = mAP.get('dota/mAP', mAP.get('fair1m/mAP', mAP.get('r_coco/bbox_mAP_50')))
            
            if threshold not in results:
                results[threshold] = {}
            if hack_score:
                results[threshold]["score=1"] = mAP
            else:
                results[threshold]["score=score"] = mAP
                
    results = pandas.DataFrame.from_dict(results).T
    print(results)
    results.to_csv(os.path.join(work_dir, f'results_{split}_score_threshold.csv'))
    return results
    

for task in tasks:  # TODO: de-comment these lines to inference the trained model
    inference_localhost_model(**task)
    
for task in tasks:
    get_results_of_different_settings(**task)

In [None]:
from mmrotate.evaluation import DOTAMetric
work_dir = "../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_dota"
cfg_fpath = "../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_dota/rotated-retinanet-rbox-le90_r50_fpn_1x_dota.py"
ckpt_fpath = "../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_dota/epoch_12.pth"

evaluator = Evaluator(
    DOTAMetric(format_only=True, merge_patches=True, outfile_prefix=os.path.join(work_dir, 'test_0.35above-score=1'))
)

evaluator.dataset_meta = {
    'classes':
    ('plane', 'baseball-diamond', 'bridge', 'ground-track-field',
    'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
    'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
    'harbor', 'swimming-pool', 'helicopter'),
}

results_test = load(os.path.join(work_dir, 'results_test.pkl'))   
for res in results_test:
    keep = res["pred_instances"]["scores"] > 0.35
    res["pred_instances"]["scores"] = res["pred_instances"]["scores"][keep]
    res["pred_instances"]["labels"] = res["pred_instances"]["labels"][keep]
    res["pred_instances"]["bboxes"] = res["pred_instances"]["bboxes"][keep]
    res["pred_instances"]["scores"] = torch.ones_like(res["pred_instances"]["scores"])
    
evaluator.offline_evaluate(data_samples=results_test, chunk_size=128)

In [None]:
tasks = (
    dict(work_dir="../playground/mmrotate_workdir/rotated-fcos-le90_r50_fpn_1x_fair"),
    dict(work_dir="../playground/mmrotate_workdir/rotated-retinanet-rbox-le90_r50_fpn_1x_fair"),
)

def eval_for_fair(work_dir, **kwargs):
    if "fcos" in work_dir:
        threshold = 0.25
    elif "retinanet" in work_dir:
        threshold = 0.35
    else:
        raise NotImplementedError

    evaluator = Evaluator(
        DOTAMetric(format_only=True, merge_patches=True, outfile_prefix=os.path.join(work_dir, f'test_{threshold}above-score=1'))
    )

    evaluator.dataset_meta = {
        'classes':
        ('Boeing737', 'Boeing747', 'Boeing777', 'Boeing787', 'C919', 'A220',
         'A321', 'A330', 'A350', 'ARJ21', 'Passenger Ship', 'Motorboat',
         'Fishing Boat', 'Tugboat', 'Engineering Ship', 'Liquid Cargo Ship',
         'Dry Cargo Ship', 'Warship', 'Small Car', 'Bus', 'Cargo Truck',
         'Dump Truck', 'Van', 'Trailer', 'Tractor', 'Excavator',
         'Truck Tractor', 'Basketball Court', 'Tennis Court', 'Football Field',
         'Baseball Field', 'Intersection', 'Roundabout', 'Bridge'),
    }

    results_test = load(os.path.join(work_dir, 'results_test.pkl'))   
    for res in results_test:
        keep = res["pred_instances"]["scores"] > threshold
        res["pred_instances"]["scores"] = res["pred_instances"]["scores"][keep]
        res["pred_instances"]["labels"] = res["pred_instances"]["labels"][keep]
        res["pred_instances"]["bboxes"] = res["pred_instances"]["bboxes"][keep]
        res["pred_instances"]["scores"] = torch.ones_like(res["pred_instances"]["scores"])
        
    evaluator.offline_evaluate(data_samples=results_test, chunk_size=128)

for task in tasks:
    eval_for_fair(**task)
