In [1]:
import os
import sys
import math
import torch
import numpy as np
import matplotlib as mtp
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
datasets_dir = os.path.join("/homeLocal/jpulzdeoliveira/datasets")
pastis_dir = os.path.join(datasets_dir, "PASTIS")

In [3]:
backend_device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {backend_device} as backend device")

Using cuda as backend device


In [11]:
import geopandas as gpd
import multiprocessing as mp
from skimage.measure import label, regionprops
from skimage.morphology import flood_fill
from torch.utils.data import Dataset
from tqdm import tqdm

def get_parcels(args):
    param, meta_patches = args
    parcels_data = {
        'id': [],
        'bbox': [],
        'label': [],
        'tmps_count': [],
    }
    for _, patch in meta_patches.iterrows():
        patch_id_path = os.path.join(param['root'], "ANNOTATIONS", f"ParcelIDs_{patch.ID_PATCH}.npy")
        target_path = os.path.join(param['root'], "ANNOTATIONS", f"TARGET_{patch.ID_PATCH}.npy")
        parcels = np.load(patch_id_path)
        target = np.load(target_path)[0].astype(int)
        parcels_label = label(parcels)
        regions = regionprops(parcels_label)

        count = 0
        for props in regions:
            minr, minc, maxr, maxc = props.bbox
            width = maxc - minc
            height = maxr - minr
            real_area = width * height
            aspect_ratio = width / height
            if real_area >= param['min_area'] and props.extent >= param['min_extent'] \
                and (param['max_ratio'] <= aspect_ratio <= param['min_ratio'] \
                or 1 / param['max_ratio'] >= aspect_ratio >= 1 / param['min_ratio']):
                count += 1
                parcels_data['id'].append(patch.ID_PATCH)
                parcels_data['bbox'].append(props.bbox)
                mask = parcels_label == props.label
                b_label = target[mask][0]
                parcels_data['label'].append(b_label)
                parcels_data['tmps_count'].append(patch.tmps_count)

    return pd.DataFrame(data=parcels_data)

def generate_parcels(
    root,
    cloud_aux,
    min_area=2304.0,
    min_ratio=1,
    max_ratio=3/5,
    min_extent=0.05,
    num_threads=-1
    ):
    data_root = os.path.join(root, "DATA_S2")
    
    print("Reading patch metadata...")
    meta_patch = gpd.read_file(os.path.join(root, "metadata.geojson"))
    meta_patch.sort_index(inplace=True)

    print("Reading cloud analisys metadata...")
    cloud_aux = pd.read_csv(cloud_aux)
    cloud_aux.rename(columns={'id': 'ID_PATCH'}, inplace=True)
    meta_patch = meta_patch.merge(cloud_aux, on='ID_PATCH')
    meta_patch.dropna(subset=['tmps_count'])
    
    num_processes = mp.cpu_count() if num_threads == -1 else num_threads
    chuncks = np.array_split(meta_patch, num_processes)
    param = {
        'root': root,
        'min_area': min_area,
        'min_ratio': min_ratio,
        'max_ratio': max_ratio,
        'min_extent': min_extent,
    }
    
    print("Generating parcels...")

    with mp.Pool(num_processes) as pool:
        result = pool.map(get_parcels, zip([param]*num_processes, chuncks,))
    parcels_df = pd.concat(result)

    print("Done.")

    return parcels_df

save_file = os.path.join("./pastis_parcels_cloud_analisys.csv")
cloud_aux = os.path.join("./cloud_aux.csv")
generate_parcels(
    pastis_dir,
    cloud_aux,
    num_threads=mp.cpu_count() * 3 // 5
    ).to_csv(save_file, index=False)

Reading patch metadata...
Reading cloud analisys metadata...
Generating parcels...


  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)


Done.
