# Validate Landsat8 Download
This notebook should combine the download of images based on selected metadata (from `validate_extended_metadata.ipynb`) with the actual cloud masking performed for the landsat8 download to offer a quick investigation on the sourrounding region for specific samples.

In [22]:
import pathlib
import os

import pandas as pd
from wilds import get_dataset
import geemap
import ee
import numpy as np
from geopy.distance import geodesic
from geopy.point import Point

file_path = os.path.abspath('')
PROJECT_ROOT = pathlib.Path(file_path).parent.parent.resolve()
DATA_DIR = PROJECT_ROOT / "data" / "fmow_landsat"
DOWNLOAD_SPAN_KM = 29.896581425173256 / 2

if not (os.path.exists(PROJECT_ROOT) and os.path.exists(DATA_DIR)):
    raise NotADirectoryError()

EE_PROJECT_NAME = 'seeing-the-big-picture'

try:
    ee.Authenticate()
    ee.Initialize(project=EE_PROJECT_NAME)
except Exception as e:
    print("Please authenticate Earth Engine: earthengine authenticate")
    raise e

In [23]:
dataset = get_dataset(dataset="fmow")
metadata = pd.read_csv(DATA_DIR / "rgb_metadata_extended.csv")

In [1]:
CHOOSE_RANDOM = False 
CHOOSE_BY_IDX = True

split = "train"
category = "airport"
sample_id = "230_2"

sample_idx = 183491 

if CHOOSE_RANDOM:
    sample_meta_df = metadata.sample()
    while sample_meta_df["split"].values[0] == "seq":
        sample_meta_df = metadata.sample()
    sample_idx = sample_meta_df.index[0]
    # Turn pandas dataframe with one element into pandas series
    sample_meta = sample_meta_df.squeeze()
elif CHOOSE_BY_IDX:
    sample_meta = metadata.iloc[sample_idx].squeeze()
else:
    cond_df = (
        (metadata["img_filename"] == f"{category}_{sample_id}_rgb.jpg")
        & (metadata["split"] == split)
    )
    sample_meta_df = metadata.loc[cond_df]
    sample_idx = sample_meta_df.index[0]
    # Turn pandas dataframe with one element into pandas series
    sample_meta = sample_meta_df.squeeze()

img_pil, y_tensor, _ = dataset[sample_idx]  # Triple: (PIL, label, metadata)

NameError: name 'metadata' is not defined

In [25]:
def compute_img_span(img_center_lon: float, img_center_lat: float, img_span_km: float) -> tuple[float, float]:
    """Compute the distance in degrees longitude and latitude from kilometers.

    Args:
        img_center_lon (float): Longitude of the image center.
        img_center_lat (float): Latitude of the image center. 
        img_span_km (float): Span of the image in kilometer.  

    Returns:
        tuple[float, float]: Span of the image in degrees longitude and latitude.
    """
    img_center = Point(img_center_lat, img_center_lon)
    img_upper = geodesic(kilometers=(img_span_km / 2)).destination(img_center, 0)
    img_lower = geodesic(kilometers=(img_span_km / 2)).destination(img_center, 180)
    img_left = geodesic(kilometers=(img_span_km / 2)).destination(img_center, 90)
    img_right = geodesic(kilometers=(img_span_km / 2)).destination(img_center, 270)
    return (np.abs(img_left.longitude - img_right.longitude), np.abs(img_upper.latitude - img_lower.latitude))

In [26]:
img_center_lon, img_center_lat, img_span_km = sample_meta[
    "img_center_lon"], sample_meta["img_center_lat"], sample_meta["img_span_km"]

img_span_lon, img_span_lat = compute_img_span(img_center_lon, img_center_lat, img_span_km)
download_span_lon, download_span_lat = compute_img_span(img_center_lon, img_center_lat, DOWNLOAD_SPAN_KM)

# Compute bounds of the fmow image - format fits geemap.ImageOverlay bounds
image_bounds = (
    (img_center_lat - (img_span_lat / 2), img_center_lon - (img_span_lon / 2)),
    (img_center_lat + (img_span_lat / 2), img_center_lon + (img_span_lon / 2))
)
# Compute extended bounds for Landsat8 download - format must fit with ee.Geometry.Rectangle
extended_bounds = [img_center_lon - (download_span_lon / 2), img_center_lat - (download_span_lat / 2),
                   img_center_lon + (download_span_lon / 2), img_center_lat + (download_span_lat / 2)]

In [27]:
center_coords = [img_center_lat, img_center_lon]
region = ee.Geometry.Rectangle(extended_bounds)

optical_bands = ['SR_B4', 'SR_B3', 'SR_B2']
vis_params = {'bands': optical_bands, 'min': 0, 'max': 0.3}

def scale_l8(image):
    scaled_optical_bands = (image
            .select(optical_bands)
            .multiply(0.0000275)
            .add(-0.2))
    return image.addBands(scaled_optical_bands, optical_bands, True) 

l8 = (ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
       .filterBounds(region)
       .map(scale_l8))

In [28]:
def mask_l8_clouds(image: ee.Image) -> ee.Image:
    """Updates an image mask, to filter out cloudy pixels.

    For a detailed description of the 'QA_PIXEL' flags see:
        https://www.usgs.gov/landsat-missions/landsat-collection-2-quality-assessment-bands

    Args:
        image (ee.Image) 

    Returns:
        ee.Image: Image with updated mask.
    """
    qa = image.select('QA_PIXEL')
    # Only pixels for which the first 5 bits equal zero are not masked away.
    cloud_mask = qa.bitwiseAnd(int('11111', 2)).eq(0)
    return image.updateMask(cloud_mask)


def compute_validity_fraction(image: ee.Image, lazy: bool = True) -> ee.Number:
    """Computes the fraction of valid pixels of an image.

    Valid pixels are those, which are not masked away, i.e. whose 
    mask value is equal to one.

    Args:
        image (ee.Image): Image to compute the validity for. 
        lazy (bool): Specifies wheter the API call should be stacked or executed directly.
    Returns:
        ee.Number: Fraction of pixels, which are not masked. 
    """
    validity = ee.Number(
        image.select(optical_bands)
        .mask()
        .reduce(ee.Reducer.min())
        .reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=region,
            scale=30,
            maxPixels=1e7)
        .get('min')
    )
    return validity if lazy else validity.getInfo()


def add_validity(collection: ee.ImageCollection) -> ee.ImageCollection:
    """Stacks API call to compute the fraction of valid pixels for each image in l8."""
    return collection.map(lambda img: img.set('validity', compute_validity_fraction(img)))


def get_least_cloudy_single_image(collection: ee.ImageCollection, region: ee.Geometry) -> ee.Image:
    """Return least cloudy image of the collection for the region."""
    return add_validity(collection).sort('validity', False).first()


l8_cloud_masked = l8.map(mask_l8_clouds)
least_cloudy = get_least_cloudy_single_image(l8_cloud_masked, region)
least_cloudy_validity = least_cloudy.get('validity').getInfo()
print(least_cloudy_validity)

was_single_used = least_cloudy_validity > 0.99 
if not was_single_used:
    composite_mosaic = l8_cloud_masked.mosaic()
    print(compute_validity_fraction(composite_mosaic, lazy=False))

m = geemap.Map(center=center_coords, zoom=13)
m.addLayer(
    least_cloudy.mask(),
    vis_params,
    'Least Cloudy Single Image Mask'
)
m.addLayer(
    least_cloudy,
    vis_params,
    'Least Cloudy Single Image'
)

if not was_single_used:
    m.addLayer(
        composite_mosaic,
        vis_params,
        'Mosaic'
    )

m.addLayer(region, None, 'Region')
m.addLayerControl()
m

1


Map(center=[np.float64(37.4110144791), np.float64(126.9044917405614)], controls=(WidgetControl(options=['positâ€¦

# Problems

## Seasons
- Sample `67417` - least cloudy image is favoured from `2019-03-18`, which appears to be snowy in south russia

## Optical issues
- Sample `344352` - green sprinkles overlaying everything 
- Sample `4158` - white everywhere

## Further
, `93920`, `215411` `86183`  
`183491`