In [1]:
from ultralytics import YOLO
import torch
import pandas as pd
from scipy.spatial.distance import pdist
from PIL import Image
import os
import json
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm
import numpy as np
from sklearn.metrics import top_k_accuracy_score

In [2]:
def jsonConvert(json_list, key_field):
    result_dict = {}
    for obj in json_list:
        key = obj.get(key_field)
        if key is not None:
            obj_copy = {k: v for k, v in obj.items() if k != key_field}
            result_dict[key] = obj_copy
        else:
            raise KeyError(f"Key '{key_field}' not found in JSON object: {obj}")
    return result_dict

In [3]:
def getImageSize(text_file_path):
    # Convert the text file path to the image path
    image_path = text_file_path.replace('./runs/obb/predict/labels/', '../datasets/DOTAv1.5/images/val/').replace('.txt', '.jpg')
    with Image.open(image_path) as img:
        width, height = img.size
        return width, height

In [4]:
class_mapping = {
    0: 'plane',
    1: 'ship',
    2: 'storage-tank',
    3: 'baseball-diamond',
    4: 'tennis-court',
    5: 'basketball-court',
    6: 'ground-track-field',
    7: 'harbor',
    8: 'bridge',
    9: 'large-vehicle',
    10: 'small-vehicle',
    11: 'helicopter',
    12: 'roundabout',
    13: 'soccer-ball-field',
    14: 'swimming-pool',
    15: 'container-crane',
    16: 'airport',
    17: 'helipad'
}

In [5]:
def genYOLODesc(filename):
    try:
        size_x, size_y = getImageSize(filename)
        bb_file = pd.read_csv(filename, sep=' ', header=None)
        counts = bb_file[0].value_counts()
        text = 'A remote sensing image containing '
        prop = []
        for label, count in counts.items():
            class_prop = {'class':class_mapping[int(label)], 'count': count}
            if count >1:
                class_info = bb_file[bb_file[0]==label].copy()
                class_info[[1,3,5,7]] = class_info[[1,3,5,7]]*size_x
                class_info[[2,4,6,8]] = class_info[[2,4,6,8]]*size_y
                class_info['x_centroid'] = class_info[[1,3,5,7]].sum(axis=1)/4
                class_info['y_centroid'] = class_info[[2,4,6,8]].sum(axis=1)/4
                avg_dist = pdist(class_info[['x_centroid', 'y_centroid']].values).mean()
                class_prop['avg_dist'] = avg_dist
                dist_text = f'with average distance {round(avg_dist,2)}px, '
            else:
                class_prop['avg_dist'] = None
                dist_text = ', '
            text = text + str(count) + ' ' + class_mapping[int(label)].replace('-', ' ') + 's ' + dist_text
            prop.append(class_prop)
        text = text[:-2] + "."
        return text, prop
    except:
        return 'A satellite image.', []


In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Torch device: ', device)

Torch device:  cuda


In [7]:
max_imgsz = 960
model = YOLO(f'./runs/obb/img{max_imgsz}/weights/best.pt')

In [8]:
metrics = model.val(data='DOTAv1.5.yaml', imgsz=max_imgsz, batch=8, device=device)

Ultralytics YOLOv8.2.75 🚀 Python-3.11.9 torch-2.4.0+cu121 CUDA:0 (NVIDIA GeForce RTX 3090, 24260MiB)
YOLOv8n-obb summary (fused): 187 layers, 3,080,339 parameters, 0 gradients, 8.3 GFLOPs


[34m[1mval: [0mScanning /home/ezraf/projects/PrioriSat/datasets/DOTAv1.5/labels/val.cache... 458 images, 0 backgrounds, 0 corrupt: 100%|██████████| 458/458 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 58/58 [00:13<00:00,  4.22it/s]


                   all        458      69565      0.724      0.474       0.53      0.392
                 plane         71       2550      0.913      0.731      0.808      0.655
                  ship        138      10765      0.903      0.529      0.645      0.465
          storage tank         60       2940      0.874       0.36      0.492       0.34
      baseball diamond         53        213      0.801      0.662      0.718      0.505
          tennis court         93        763      0.931       0.91      0.944      0.899
      basketball court         39        143      0.703      0.465      0.517      0.435
    ground track field         71        145      0.493       0.49      0.484      0.393
                harbor        116       2102      0.819      0.796      0.803      0.482
                bridge         75        466      0.664      0.215      0.288      0.117
         large vehicle        158       5139      0.839      0.711      0.758      0.592
         small vehicl

In [9]:
print(metrics.box.map)  # map50-95
print(metrics.box.map50)  # map50
print(metrics.box.map75)  # map75
print(metrics.box.maps)  # a list contains map50-95 of each category

0.39175053755241257
0.5301804798622533
0.43350317361873025
[     0.6546     0.46452     0.34028     0.50518     0.89853     0.43469     0.39333     0.48207     0.11737     0.59187     0.18508      0.2716     0.23622     0.36623     0.32644           0]


In [10]:
results = model.predict('../datasets/DOTAv1.5/images/val', imgsz=max_imgsz, save_txt=True,  device=device)


image 1/458 /home/ezraf/projects/PrioriSat/YOLO/../datasets/DOTAv1.5/images/val/P0003.jpg: 864x960 31.4ms
image 2/458 /home/ezraf/projects/PrioriSat/YOLO/../datasets/DOTAv1.5/images/val/P0004.jpg: 960x544 29.8ms
image 3/458 /home/ezraf/projects/PrioriSat/YOLO/../datasets/DOTAv1.5/images/val/P0007.jpg: 960x672 33.3ms
image 4/458 /home/ezraf/projects/PrioriSat/YOLO/../datasets/DOTAv1.5/images/val/P0019.jpg: 544x960 32.0ms
image 5/458 /home/ezraf/projects/PrioriSat/YOLO/../datasets/DOTAv1.5/images/val/P0027.jpg: 736x960 31.8ms
image 6/458 /home/ezraf/projects/PrioriSat/YOLO/../datasets/DOTAv1.5/images/val/P0047.jpg: 960x512 31.9ms
image 7/458 /home/ezraf/projects/PrioriSat/YOLO/../datasets/DOTAv1.5/images/val/P0053.jpg: 960x960 3.4ms
image 8/458 /home/ezraf/projects/PrioriSat/YOLO/../datasets/DOTAv1.5/images/val/P0056.jpg: 896x960 32.6ms
image 9/458 /home/ezraf/projects/PrioriSat/YOLO/../datasets/DOTAv1.5/images/val/P0060.jpg: 640x960 31.6ms
image 10/458 /home/ezraf/projects/PrioriSat/YO

In [11]:
pred_descriptions = []
pred_dir = './runs/obb/predict/labels/'
pred_files = [f for f in os.listdir(pred_dir)]

for f in pred_files:
    src_path = os.path.join(pred_dir, f)
    desc, prop = genYOLODesc(src_path)
    dict = {
        'filename': f,
        'description': desc,
        'properties': prop
    }
    pred_descriptions.append(dict)

In [12]:
with open(f"./pred{max_imgsz}.json", "w") as outfile:
    json.dump(pred_descriptions, outfile)

In [13]:
test_data = pd.read_json('../datasets/DOTAv1.5/descriptions/val.json')
pred = pd.read_json(f'./pred{max_imgsz}.json').drop('description', axis=1).rename(columns={'properties': 'pred'})
test_data = pd.merge(test_data, pred, on='filename', how='inner')

In [14]:
y_true = []
y_pred = []
for i, row in test_data.iterrows():
    json_truth = json.loads(str(row['properties']).replace("'", '"').replace("None", "0"))
    y_true.append(jsonConvert(json_truth, 'class'))
    json_pred = json.loads(str(row['pred']).replace("'", '"').replace("None", "0"))
    y_pred.append(jsonConvert(json_pred, 'class'))

similarity_mat = []
for y_p in tqdm(y_pred):
    avg_sim = []
    y_p_keys = set(y_p.keys())

    for y_t in y_true:
        y_t_keys = set(y_t.keys())

        total_sim = 0
        for key in y_t_keys:
            try:
                feats = DictVectorizer().fit_transform([y_t[key], y_p[key]])
                similarity = cosine_similarity(feats[0], feats[1])[0][0]
                total_sim += similarity
            except KeyError:
                pass
        avg_sim.append(total_sim/len(y_t_keys))
    similarity_mat.append(avg_sim)

100%|██████████| 450/450 [01:02<00:00,  7.16it/s]


In [15]:
similarity = np.stack(similarity_mat)
y_desc_true = np.arange(len(y_true))

top_k_stats = []
# Description-wise
top_k_stats.append({'k': 1, 'score': top_k_accuracy_score(y_desc_true, similarity, k=1)})
top_k_stats.append({'k': 3, 'score': top_k_accuracy_score(y_desc_true, similarity, k=3)})
top_k_stats.append({'k': 5, 'score': top_k_accuracy_score(y_desc_true, similarity, k=5)})
top_k_stats.append({'k': 10, 'score': top_k_accuracy_score(y_desc_true, similarity, k=10)})
top_k_stats.append({'k': 20, 'score': top_k_accuracy_score(y_desc_true, similarity, k=20)})
top_k_stats.append({'k': 30, 'score': top_k_accuracy_score(y_desc_true, similarity, k=30)})

In [16]:
top_k_stats

[{'k': 1, 'score': 0.19333333333333333},
 {'k': 3, 'score': 0.31333333333333335},
 {'k': 5, 'score': 0.38222222222222224},
 {'k': 10, 'score': 0.5266666666666666},
 {'k': 20, 'score': 0.6444444444444445},
 {'k': 30, 'score': 0.7266666666666667}]

In [17]:
similarity

array([[    0.66615,     0.24969,     0.49929, ...,           0,     0.24981,     0.49998],
       [          0,        0.25,           0, ...,           0,        0.25,           0],
       [    0.39224,           0,     0.99978, ...,           0,     0.29411,     0.99285],
       ...,
       [          0,   0.0013191,           0, ...,           1,           0,           0],
       [    0.33654,           0,     0.99483, ...,           0,     0.75222,     0.99769],
       [    0.34112,           0,     0.99527, ...,           0,     0.25606,           1]])