# Validate WILDS Metadata Extension

In [1]:
import json
import pathlib
import os
import tempfile

import pandas as pd
from wilds import get_dataset
import geemap
import ee
import numpy as np

file_path = os.path.abspath('')

PROJECT_ROOT = pathlib.Path(file_path).parent.parent.resolve()
DATA_DIR = PROJECT_ROOT / "data"

if not (os.path.exists(PROJECT_ROOT) and os.path.exists(DATA_DIR)):
    raise NotADirectoryError()

EE_PROJECT_NAME = 'seeing-the-big-picture'

try:
    ee.Authenticate()
    ee.Initialize(project=EE_PROJECT_NAME)
except Exception as e:
    print("Please authenticate Earth Engine: earthengine authenticate")
    raise

  from pkg_resources import parse_version


In [8]:
dataset = get_dataset(dataset="fmow")
metadata = pd.read_csv(DATA_DIR / "rgb_metadata_wilds_extended.csv")

In [10]:
CHOOSE_RANDOM = False 

split = "train"
category = "airport"
sample_id = "32_0"

if CHOOSE_RANDOM:
    sample_meta_df = metadata.sample()
    while sample_meta_df["split"].values[0] == "seq":
        sample_meta_df = metadata.sample()
    sample_idx = sample_meta_df.index[0]
    # Turn pandas dataframe with one element into pandas series
    sample_meta = sample_meta_df.squeeze()
else:
    cond_df = (
        (metadata["img_filename"] == f"{category}_{sample_id}_rgb.jpg")
        & (metadata["split"] == split)
    )
    sample_meta_df = metadata.loc[cond_df]
    sample_idx = sample_meta_df.index[0]
    # Turn pandas dataframe with one element into pandas series
    sample_meta = sample_meta_df.squeeze()

img_pil, y_tensor, _ = dataset[sample_idx]  # Triple: (PIL, label, metadata)

In [None]:
img_center_lon, img_center_lat, img_span_deg = sample_meta[
    "img_center_lon"], sample_meta["img_center_lat"], sample_meta["img_span_deg"]

# Compute bounds of the fmow image - format fits geemap.ImageOverlay bounds
image_bounds = (
    (img_center_lat - (img_span_deg / 2), img_center_lon - (img_span_deg / 2)),
    (img_center_lat + (img_span_deg / 2), img_center_lon + (img_span_deg / 2))
)
# Compute extended bounds for Landsat8 download - format must fit with ee.Geometry.Rectangle
buffer_factor = 6.0
extended_bounds = [img_center_lon - (img_span_deg / 2 * buffer_factor), img_center_lat - (img_span_deg / 2 * buffer_factor),
                   img_center_lon + (img_span_deg / 2 * buffer_factor), img_center_lat + (img_span_deg / 2 * buffer_factor)]

((np.float64(-1.0140575646754744), np.float64(-79.48694022837181)), (np.float64(-0.9691395896577617), np.float64(-79.4420222533541)))


In [12]:
def scale_l8(image):
    return (image
            .select(['SR_B2', 'SR_B3', 'SR_B4'])
            .multiply(0.0000275)
            .add(-0.2))


m = geemap.Map(center=[img_center_lat, img_center_lon], zoom=17)
region = ee.Geometry.Rectangle(extended_bounds)

l8 = (
    ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
    .filterBounds(region)
    .map(scale_l8)
)

least_cloudy = l8.sort('CLOUD_COVER').first()

rgb_mask = (least_cloudy
            .select(['SR_B2', 'SR_B3', 'SR_B4'])
            .mask()
            .reduce(ee.Reducer.min()))

coverage_dict = rgb_mask.reduceRegion(
    reducer=ee.Reducer.mean(),
    geometry=region,
    scale=30,
    maxPixels=1e7
)

least_cloudy_coverage = ee.Number(coverage_dict.get('min'))
rgb_ok = least_cloudy_coverage.gte(0.99)

composite = l8.median()
context = ee.Image(ee.Algorithms.If(rgb_ok, least_cloudy, composite))

m.addLayer(
    context,
    {'bands': ['SR_B4', 'SR_B3', 'SR_B2'], 'min': 0, 'max': 0.3},
    'Landsat8'
)
m.addLayer(
    region
)

with tempfile.NamedTemporaryFile(suffix=".png") as temp:
    img_pil.save(temp.name)
    overlay = geemap.ImageOverlay(
        url=temp.name,
        bounds=image_bounds,
        name="PIL overlay"
    )
    m.add_layer(overlay)

m

Map(center=[np.float64(-0.991598577166618), np.float64(-79.46448124086295)], controls=(WidgetControl(options=[â€¦