In [1]:
import glob
import json
import os

import geopandas as gpd
import pandas as pd
import shapely
from pyproj import Transformer
from rasterio.crs import CRS
from shapely.geometry import Polygon
from tqdm import tqdm

root = "/workspace/storage/data/fmow-groundtruth"

In [2]:
metadatas, splits = [], []
for split in ["train", "val", "test_gt"]:
    files = glob.glob(os.path.join(root, split, "**", "**", "*ms.json"))
    metadatas.extend(files)
    splits.extend([split] * len(files))

In [3]:
data = {}
geoms = []
for path, split in tqdm(zip(metadatas, splits, strict=False), total=len(metadatas)):
    with open(path) as f:
        metadata = json.load(f)

    key = f"{split}_{metadata['img_filename']}"
    data[key] = {
        "split": split,
        "image": metadata["img_filename"],
        "label": metadata["img_filename"].rsplit("_")[0],
        "date": metadata["timestamp"],
        "height": metadata["img_height"],
        "width": metadata["img_width"],
        "gsd": metadata["gsd"],
        "platform": metadata["sensor_platform_name"],
    }

    poly = shapely.from_wkt(metadata["raw_location"])
    if not metadata["epsg"] == "4326":
        src_crs = CRS.from_epsg(metadata["epsg"])
        dst_crs = CRS.from_epsg("4326")
        t = Transformer.from_crs(src_crs, dst_crs)
        poly = Polygon(t.transform(*poly.exterior.coords))

    geoms.append(poly)

df = gpd.GeoDataFrame(pd.DataFrame(data).T, geometry=geoms, crs="EPSG:4326")
df.to_parquet("../data/fmow.parquet")

100%|██████████| 470085/470085 [00:33<00:00, 14115.07it/s]


In [4]:
df = gpd.read_parquet("../data/fmow.parquet")
df.head()

Unnamed: 0,split,image,label,date,height,width,gsd,platform,geometry
train_crop_field_1123_0_ms.tif,train,crop_field_1123_0_ms.tif,crop,2016-07-28T10:19:00Z,181,256,1.954873,WORLDVIEW02,"POLYGON ((8.45780 45.32947, 8.46230 45.32947, ..."
train_crop_field_1123_1_ms.tif,train,crop_field_1123_1_ms.tif,crop,2013-09-09T10:52:02Z,190,268,1.868486,WORLDVIEW02,"POLYGON ((8.45780 45.32946, 8.46229 45.32946, ..."
train_crop_field_5093_2_ms.tif,train,crop_field_5093_2_ms.tif,crop,2016-12-20T04:06:38Z,229,234,1.948818,GEOEYE01,"POLYGON ((100.60908 14.06644, 100.61358 14.066..."
train_crop_field_5093_1_ms.tif,train,crop_field_5093_1_ms.tif,crop,2017-01-15T04:14:46Z,392,402,1.25131,WORLDVIEW03_VNIR,"POLYGON ((100.60908 14.06644, 100.61358 14.066..."
train_crop_field_6001_0_ms.tif,train,crop_field_6001_0_ms.tif,crop,2016-06-09T11:18:33Z,309,386,1.304175,WORLDVIEW03_VNIR,"POLYGON ((-4.07591 37.20586, -4.07142 37.20586..."
