In [2]:
import eotdl


In this notebook we generate the dataset for the use case

1. Generate list of Satellogic images to be used (containing bb and acquisition time)
2. Explore available S1/S2 images with different criteria
	- bounding box overlap
	- acquisition time overlap
3. Download matching S1/S2 at given resolution
4. Generate metadata and ingest to EOTDL 

In [10]:
import geopandas as gpd

gdf = gpd.read_parquet('~/Desktop/EarthPulse_Local_Data/satellogic-earthview-items.parquet')
# gdf = gpd.read_parquet('~/DeLocal_Data/satellogic-earthview-items-with-matches.parquet')
# json_path, zone, region, date, geometry, matches
# json_path, zone, region, date, geometry
gdf.head()


Unnamed: 0,json_path,zone,region,date,geometry
0,data/json/zone=04N/region=603411_2346301/date=...,04N,603411_2346301,2022-09-15,"POLYGON ((-157.99991 21.21181, -157.99988 21.2..."
1,data/json/zone=04N/region=603411_2346685/date=...,04N,603411_2346685,2022-09-15,"POLYGON ((-157.99988 21.21528, -157.99986 21.2..."
2,data/json/zone=04N/region=603411_2347069/date=...,04N,603411_2347069,2022-09-15,"POLYGON ((-157.99986 21.21875, -157.99984 21.2..."
3,data/json/zone=04N/region=603411_2347453/date=...,04N,603411_2347453,2022-09-15,"POLYGON ((-157.99984 21.22221, -157.99981 21.2..."
4,data/json/zone=04N/region=603411_2347837/date=...,04N,603411_2347837,2022-09-15,"POLYGON ((-157.99981 21.22568, -157.99979 21.2..."


In [4]:
gdf.shape

NameError: name 'gdf' is not defined

In [5]:
gdf['num_matches'] = gdf.matches.apply(len)
gdf['num_matches'].value_counts()

NameError: name 'gdf' is not defined

In [6]:
gdf['matches'].sample(1).values

NameError: name 'gdf' is not defined

In [7]:
sample =  gdf.sample(1)

s1_matches = sample.s1_matches.values
s2_matches = sample.s2_matches.values

print(f"s1_matches: {s1_matches}\n")
print(s1_matches)
print(f"s2_matches {s2_matches}\n")
print(s2_matches)

NameError: name 'gdf' is not defined

In [9]:
from eotdl.tools import bbox_from_centroid
from eotdl.access import download_sentinel_imagery
from download_images import download_sat_image, filter_matches
import shutil
import os

WIDTH = 38
HEIGHT = 38

shutil.rmtree('sample')
os.makedirs('sample', exist_ok=True)


# starting from HR data (Satellogic), find available S1/S2 that matches in time/area
# take all matches, remove ones with >= 10% cloud cover,

print("collecting...")

# in short, satellogic segment downloads from url using metadata, and saves in chunks to local sample dir
# the sentinal 2 segment downloads image from local path (/fastdata/Satellogic/data/),

# sat (hr) image is huge. sample var is the details of the sat image, including its metadata and where to download it.
# we download the sent2 image of a small box.

# satellogic (HR DATA)

print("downloading sat (hr) image...")
date = sample.date.iloc[0]
json_path = sample.json_path.iloc[0]  #
centroid = sample.geometry.iloc[0].centroid  # get centroid/center point of sat image.

# only downloads if doesn't exist in fastdata
dst_path_sat = download_sat_image(json_path=json_path, output_path="sample/satellogic")  # todo: copy instead of download

# get best fit match by filtering clouds and date.
s1_match = filter_matches(matches=s1_matches, date=date)
s2_match = filter_matches(matches=s2_matches, date=date)


# sentinel data
custom_bbox = bbox_from_centroid(x=centroid.y, y=centroid.x, pixel_size=10, width=WIDTH, height=HEIGHT)
sent1_name = json_path.split('/')[-1].replace('_metadata.json', '_S1GRD')
sent2_name = json_path.split('/')[-1].replace('_metadata.json', '_S2L2A')

print("downloading S1 match...")
download_sentinel_imagery(output="sample/sentinel1",
						  time_interval=sent1_match["properties"]["datetime"],
						  bounding_box=custom_bbox,
						  collection_id="sentinel-1-grd",
						  name=sent1_name
)

print("downloading S2 match...")
download_sentinel_imagery(output="sample/sentinel2",
						  time_interval=sent2_match["properties"]["datetime"],
						  bounding_box=custom_bbox,
						  collection_id="sentinal-2-l2a",
						  name=sent2_name
						  )
shutil.copy2("", "")
dst_path_sentinel1 = "sample/sentinel1/" + sent1_name + '.tif'
dst_path_sentinel2 = "sample/sentinel2/" + sent2_name + '.tif'

# todo: add checking for sentinel file already in fast data before downloading.
    

ModuleNotFoundError: No module named 'sentinelhub'

In [25]:
import matplotlib.pyplot as plt
import rasterio as rio

fig, axs = plt.subplots(1, 3, figsize=(12, 4))

# Satellogic (HR)
axs[0].imshow((rio.open(dst_path_sat).read()[:3, ...].transpose(1, 2, 0) / 3000).clip(0, 1))
axs[0].set_title("Satellogic (HR)")
axs[0].axis('off')

# Sentinel-1 (bands 3, 2, 1 if available; fallback to band 1 grayscale)
try:
	axs[1].imshow((rio.open(dst_path_sentinel1).read()[(3, 2, 1), ...].transpose(1, 2, 0) / 3000).clip(0, 1))
except Exception as e:
	print(f"s1 image loading error: {e}")
	axs[1].imshow(rio.open(dst_path_sentinel1).read(1), cmap='gray')
axs[1].set_title("Sentinel-1")
axs[1].axis('off')

# Sentinel-2 (RGB: bands 4, 3, 2)
axs[2].imshow((rio.open(dst_path_sentinel2).read()[(3, 2, 1), ...].transpose(1, 2, 0) / 3000).clip(0, 1))
axs[2].set_title("Sentinel-2 (RGB)")
axs[2].axis('off')

plt.tight_layout()
plt.show()


ValueError: Key backend: 'module://matplotlib_inline.backend_inline' is not a valid value for backend; supported values are ['gtk3agg', 'gtk3cairo', 'gtk4agg', 'gtk4cairo', 'macosx', 'nbagg', 'notebook', 'qtagg', 'qtcairo', 'qt5agg', 'qt5cairo', 'tkagg', 'tkcairo', 'webagg', 'wx', 'wxagg', 'wxcairo', 'agg', 'cairo', 'pdf', 'pgf', 'ps', 'svg', 'template']

In [29]:
import matplotlib.pyplot as plt


# plot histogram with number of images per zone
fig = plt.figure(figsize=(15, 3))
gdf['zone'].value_counts().plot(kind='bar', ax=fig.gca(), grid=True)
plt.show()


ModuleNotFoundError: No module named 'matplotlib'

In [None]:
len(gdf['zone'].unique()), len(gdf['region'].unique())

In [None]:
gdf['zone'].value_counts().min()

In [None]:
# plot histogram with number of images per month (all images are from 2022)
fig = plt.figure(figsize=(15, 3))
gdf['date'].dt.month.value_counts().plot(kind='bar', ax=fig.gca(), grid=True)
plt.show()


In [None]:
zones = sorted(gdf['zone'].unique())
zone_gdf = gdf[gdf['zone'] == zones[4]]
sample = zone_gdf.sample(5, random_state=2025)
sample

Download the images

In [7]:
!rm -rf /fastdata/Satellogic/data/tifs/satellogic/*
!rm -rf /fastdata/Satellogic/data/tifs/sentinel2/*

/bin/bash: line 1: /usr/bin/rm: Argument list too long
/bin/bash: line 1: /usr/bin/rm: Argument list too long


In [None]:
from download_images import download_matches

# now supports S2 + S1
matches = []
for row, item in sample.iterrows():
	print(item.json_path)

	sent1_path, sent2_path, sat_path = download_matches((item.matches, item.date, item.json_path, item.geometry.centroid))

	matches.append((sent1_path, sent2_path, sat_path))

In [None]:
matches

In [None]:
import rasterio as rio

for match in matches:
	if match is None: continue

	s1_path, s2_path, sat_path = match

	fig, axs = plt.subplots(1, 3, figsize=(12, 4))

	# Satellogic (HR image)
	axs[0].imshow((rio.open(sat_path).read()[:3, ...].transpose(1, 2, 0) / 4000).clip(0, 1))
	axs[0].set_title("Satellogic")
	axs[0].axis('off')

	# Sentinel-1 (Visualize bands 4,3,2 if available, or grayscale fallback)
	try:
		axs[1].imshow((rio.open(s1_path).read()[(3, 2, 1), ...].transpose(1, 2, 0) / 4000).clip(0, 1))
	except Exception as e:
		print(f"s1 image loading error: {e}")
		axs[1].imshow(rio.open(s1_path).read(1), cmap='gray')
	axs[1].set_title("Sentinel-1")
	axs[1].axis('off')

	# Sentinel-2 (Bands 4,3,2 — RGB)
	axs[2].imshow((rio.open(s2_path).read()[(3, 2, 1), ...].transpose(1, 2, 0) / 4000).clip(0, 1))
	axs[2].set_title("Sentinel-2")
	axs[2].axis('off')
	plt.show()

