In [None]:
import pandas as pd
import os
import json
from scipy.spatial.distance import pdist

In [39]:
def genDesc(filename):
    try:
        bb_file = pd.read_csv(filename, sep=' ', header=None)
        counts = bb_file[8].value_counts()
        text = 'A remote sensing image containing '
        prop = []
        for label, count in counts.items():
            text = text + str(count) + ' ' + label.replace('-', ' ') + 's, '
            class_prop = {'class':label, 'count': count}
            if count >1:
                class_info = bb_file[bb_file[8]==label].drop(9, axis=1)
                class_info['x_centroid'] = class_info[[0,2,4,6]].sum(axis=1)/4
                class_info['y_centroid'] = class_info[[1,3,5,7]].sum(axis=1)/4
                avg_spread = pdist(class_info[['x_centroid', 'y_centroid']].values).mean()
                class_prop['avg_spread'] = avg_spread
            else:
                class_prop['avg_spread'] = None
        text = text[:-2] + "."
        return text, prop
    except:
        return 'A satellite image.', []


In [40]:
train_descriptions = []
val_descriptions = []
train_dir = '../datasets/DOTAv2/labels/DOTA-v2.0_train_original_format/'
val_dir = '../datasets/DOTAv2/labels/DOTA-v2.0_val_original_format/'
train_files = [f for f in os.listdir(train_dir)]
val_files = [f for f in os.listdir(val_dir)]

for f in train_files:
    src_path = os.path.join(train_dir, f)
    desc, prop = genDesc(src_path)
    dict = {
        'filename': f,
        'description': desc,
        'properties': prop
    }
    train_descriptions.append(dict)

for f in val_files:
    src_path = os.path.join(val_dir, f)
    desc, prop = genDesc(src_path)
    dict = {
        'filename': f,
        'description': desc,
        'properties': prop
    }
    val_descriptions.append(dict)

In [None]:
with open("../datasets/DOTAv2/descriptions/train.json", "w") as outfile:
    json.dump(train_descriptions, outfile)
with open("../datasets/DOTAv2/descriptions/val.json", "w") as outfile:
    json.dump(val_descriptions, outfile)

In [38]:
val_descriptions

[{'filename': 'P0613.txt',
  'description': 'A remote sensing image containing 6 harbors, 2 ships.',
  'properties': [{'class': 'harbor',
    'count': 6,
    'avg_spread': 423.4217873956969},
   {'class': 'ship', 'count': 2, 'avg_spread': 405.80668119191927}]},
 {'filename': 'P0964.txt',
  'description': 'A remote sensing image containing 12 harbors, 3 small vehicles, 2 ships, 2 swimming pools.',
  'properties': [{'class': 'harbor',
    'count': 12,
    'avg_spread': 1160.9920300272904},
   {'class': 'small-vehicle', 'count': 3, 'avg_spread': 93.83282925426185},
   {'class': 'ship', 'count': 2, 'avg_spread': 762.6201954446262},
   {'class': 'swimming-pool', 'count': 2, 'avg_spread': 539.706227961101}]},
 {'filename': 'P2236.txt',
  'description': 'A remote sensing image containing 29 small vehicles, 21 planes, 3 large vehicles, 1 helicopters.',
  'properties': [{'class': 'small-vehicle',
    'count': 29,
    'avg_spread': 220.04642936407205},
   {'class': 'plane', 'count': 21, 'avg_spr

In [33]:
bb_file = pd.read_csv('../datasets/DOTAv2/labels/DOTA-v2.0_train_original_format/P0002.txt', sep=' ', header=None)
class_info = bb_file[bb_file[8]=='large-vehicle'].drop(9, axis=1)
class_info['x_centroid'] = class_info[[0,2,4,6]].sum(axis=1)/4
class_info['y_centroid'] = class_info[[1,3,5,7]].sum(axis=1)/4
avg_spread = pdist(class_info[['x_centroid', 'y_centroid']].values).mean()

In [37]:
avg_spread

912.3875113022985