# The Big Picture - Context Retrieval

Notebook to combine high-resolution FMoW satellite image with broader but lower-resolution Landsat image from Google Earth Engine by patching the high-res image into the low-res image.

In [19]:
"""
Requirements:
- earthengine-api
- rasterio
- numpy
- boto3 (for AWS S3 access)
- Pillow
- pyproj
"""

import ee
import geemap
import boto3
from botocore import UNSIGNED
from botocore.config import Config
import tempfile
import os
import json

In [22]:
prefix = f'Hosted-Datasets/fmow/fmow-rgb/'
s3 = boto3.client(
    's3',
    region_name='us-east-1',
    config=Config(signature_version=UNSIGNED)
)
bucket_name = 'spacenet-dataset'
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix, MaxKeys=100)
response

{'ResponseMetadata': {'RequestId': 'KAQH0D3M83WXYBYE',
  'HostId': 'b0g+JnkPRGUlC2bwE4ez+VJdSxG26DNIVNz5+E3GOytqwqIbY5Ek09wQQnbubQJ7kQCt3cyxM+I=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'b0g+JnkPRGUlC2bwE4ez+VJdSxG26DNIVNz5+E3GOytqwqIbY5Ek09wQQnbubQJ7kQCt3cyxM+I=',
   'x-amz-request-id': 'KAQH0D3M83WXYBYE',
   'date': 'Thu, 13 Nov 2025 12:55:44 GMT',
   'x-amz-bucket-region': 'us-east-1',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'IsTruncated': True,
 'Contents': [{'Key': 'Hosted-Datasets/fmow/fmow-rgb/CHANGELOG.md',
   'LastModified': datetime.datetime(2019, 8, 15, 22, 57, 17, tzinfo=tzutc()),
   'ETag': '"6f566b599066484cef89a275f5d46b7a"',
   'Size': 1707,
   'StorageClass': 'STANDARD'},
  {'Key': 'Hosted-Datasets/fmow/fmow-rgb/IARPA-fMoW.pdf',
   'LastModified': datetime.datetime(2019, 8, 15, 22, 57, 17, tzinfo=tzutc()),
   'ETag': '"738b61c620dfac03e9cd24b190244858"',
   'Size': 6926

In [9]:
def download_fmow_image(split, category, index=0):
    temp_dir = tempfile.mkdtemp()
    s3 = boto3.client(
        's3',
        region_name='us-east-1',
        config=Config(signature_version=UNSIGNED)
    )
    bucket_name = 'spacenet-dataset'
    prefix = f'Hosted-Datasets/fmow/fmow-rgb/{split}/{category}/'
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix, MaxKeys=100)
    
    image_files = [obj['Key'] for obj in response['Contents'] 
        if obj['Key'].endswith(('.jpg', '.png', '.tif'))]
    image_key = image_files[min(index, len(image_files)-1)]
    image_path = os.path.join(temp_dir, f"fmow_image.jpg")

    print("Downloading image")
    s3.download_file(bucket_name, image_key, image_path)
    print(f"Downloaded successfully to {image_path}")

    base_path = os.path.splitext(image_key)[0]
    metadata_key = base_path + '.json'
    metadata_path = os.path.join(temp_dir, "fmow_metadata.json")

    metadata = {}
    s3.download_file(bucket_name, metadata_key, metadata_path)
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)
    print(f"Metadata downloaded successfully")

    return metadata

metadata = download_fmow_image("train", "airport")
print(metadata)

Downloading image
Downloaded successfully to /tmp/tmpmlp_e459/fmow_image.jpg
Metadata downloaded successfully
{'img_filename': 'airport_0_0_msrgb.jpg', 'bounding_boxes': [{'category': 'airport', 'ID': 144, 'visible': 'True', 'box': [174, 363, 1429, 661]}], 'gsd': 2.45140385628, 'img_width': 2050, 'img_height': 1578, 'mean_pixel_height': 2.1906917870723e-05, 'mean_pixel_width': 2.1910128878048e-05, 'utm': '36S', 'country_code': 'TUR', 'cloud_cover': 0, 'timestamp': '2002-02-07T08:43:59Z', 'scan_direction': 'Forward', 'approximate_wavelengths': [661, 545, 477], 'pan_resolution_dbl': 0.613170742989, 'pan_resolution_start_dbl': 0.613170742989, 'pan_resolution_end_dbl': 0.613170742989, 'pan_resolution_min_dbl': 0.613170742989, 'pan_resolution_max_dbl': 0.613170742989, 'multi_resolution_dbl': 2.45140385628, 'multi_resolution_start_dbl': 2.45140385628, 'multi_resolution_end_dbl': 2.45140385628, 'multi_resolution_min_dbl': 2.45140385628, 'multi_resolution_max_dbl': 2.45140385628, 'target_azimu

In [17]:
'gsd' in metadata
metadata['gsd'] * metadata['img_height']
metadata['gsd'] * metadata['img_width']

5025.377905374

In [33]:
import tarfile
import io

s3 = boto3.client('s3',
                  region_name='us-east-1',
                  config=Config(signature_version=UNSIGNED))

bucket = 'spacenet-dataset'
key = 'Hosted-Datasets/fmow/fmow-rgb/groundtruth.tar.bz2'

print("Downloading groundtruth.tar.bz2 ...")
obj = s3.get_object(Bucket=bucket, Key=key)
stream = io.BytesIO(obj['Body'].read())
print("Download successful")

Downloading groundtruth.tar.bz2 ...
Download successful


In [31]:
response = s3.head_object(Bucket=bucket, Key=key)
size = response['ContentLength']
print(size)

245218563


In [34]:
with tarfile.open(fileobj=stream, mode='r:bz2') as tar:
    file = tar.extractfile('val/golf_course/golf_course_13/golf_course_13_3_rgb.json')
    print(json.load(file))

{'img_filename': 'golf_course_13_3_rgb.jpg', 'bounding_boxes': [{'raw_category': 'Golf Course', 'category': 'golf_course', 'raw_location': 'POLYGON ((-69.9573118949585933 41.6878386408654507, -69.9529339536472747 41.6878386408654507, -69.9529339536472747 41.6848813904204221, -69.9573118949585933 41.6848813904204221, -69.9573118949585933 41.6878386408654507))', 'ID': -1, 'spatial_reference': 'GCS_WGS_1984', 'epsg': '4326', 'crowd_rank': 0.819959, 'box': [20, 77, 1464, 989]}, {'category': 'golf_course', 'ID': 16296, 'visible': 'True', 'box': [20, 77, 1464, 989]}], 'gsd': 0.333779722452, 'img_width': 1502, 'img_height': 1126, 'mean_pixel_height': 2.990142007106e-06, 'mean_pixel_width': 2.990397070572e-06, 'utm': '19T', 'country_code': 'USA', 'cloud_cover': 0, 'timestamp': '2016-06-27T15:55:29Z', 'scan_direction': 'Forward', 'approximate_wavelengths': [661, 545, 477], 'pan_resolution_dbl': 0.333779722452, 'pan_resolution_start_dbl': 0.332446455956, 'pan_resolution_end_dbl': 0.335012316704,