In [0]:
%pip install pystac_client planetary_computer rioxarray numpy==1.26.4 geopy
%restart_python

In [0]:
import pystac_client
from geopy.geocoders import Nominatim

# Set up the geocoder
geolocator = Nominatim(user_agent="my_stac_project")

# Geocode place to get raw data
location = geolocator.geocode("Otway Forest, Victoria, Australia")

if location:
    location_raw = location.raw

    # Get bounding box (min_lat, max_lat, min_lon, max_lon)
    bbox = [
        float(location_raw["boundingbox"][2]),  # min longitude (west)
        float(location_raw["boundingbox"][0]),  # min latitude (south)
        float(location_raw["boundingbox"][3]),  # max longitude (east)
        float(location_raw["boundingbox"][1])   # max latitude (north)
    ]
    print("Bounding box:", bbox)
else:
    print("Location not found.")

In [0]:
import pystac_client
import planetary_computer
import rioxarray

# Connect to Planetary Computer STAC API
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace
)

# Search for data (example: Sentinel-2)
search = catalog.search(
    collections=["sentinel-2-l2a"],
    bbox=bbox,
    datetime="2023-01-01/2023-01-31",
    query={"eo:cloud_cover": {"lt": 10}}, # Only images with <10% cloud,
    limit=10
)

items = list(search.get_items())
item = items[0]

# Sign the item for authentication
signed_item = planetary_computer.sign(item)

# Download specific assets as GeoTIFFs
b04_href = signed_item.assets["B04"].href  # Red band
b03_href = signed_item.assets["B03"].href  # Green band
b02_href = signed_item.assets["B02"].href  # Blue band

rioxarray.open_rasterio(b04_href).rio.to_raster("sentinel2_red.tif")
rioxarray.open_rasterio(b03_href).rio.to_raster("sentinel2_green.tif")
rioxarray.open_rasterio(b02_href).rio.to_raster("sentinel2_blue.tif")

In [0]:
import matplotlib.pyplot as plt
import rasterio

with rasterio.open('/Workspace/Users/danny.wong@databricks.com/sentinel2_red.tif') as src_red:
    red = src_red.read(1)
with rasterio.open('/Workspace/Users/danny.wong@databricks.com/sentinel2_green.tif') as src_green:
    green = src_green.read(1)
with rasterio.open('/Workspace/Users/danny.wong@databricks.com/sentinel2_blue.tif') as src_blue:
    blue = src_blue.read(1)

plt.imshow(red)
plt.axis('off')
plt.show()

plt.imshow(green)
plt.axis('off')
plt.show()

plt.imshow(blue)
plt.axis('off')
plt.show()

In [0]:
import rioxarray
import numpy as np
import matplotlib.pyplot as plt

# Load TIFF files
red = rioxarray.open_rasterio("sentinel2_red.tif").squeeze()
green = rioxarray.open_rasterio("sentinel2_green.tif").squeeze()
blue = rioxarray.open_rasterio("sentinel2_blue.tif").squeeze()

# Plot true color RGB composite (basic visualization)
rgb_stack = np.stack([red.values, green.values, blue.values], axis=-1)
rgb_norm = (rgb_stack - rgb_stack.min()) / (rgb_stack.max() - rgb_stack.min())  # Normalize for display

plt.figure(figsize=(8, 8))
plt.imshow(rgb_norm)
plt.title("Sentinel-2 RGB True Color Composite")
plt.axis('off')
plt.show()

# Load NIR band (B08)
# After signing and downloading B08 as above:
nir = rioxarray.open_rasterio(signed_item.assets["B08"].href).squeeze()

# NDVI Calculation: (NIR - Red) / (NIR + Red)
ndvi = (nir - red) / (nir + red)
ndvi = ndvi.where((nir + red) != 0)

plt.figure(figsize=(8, 8))
plt.imshow(ndvi, cmap='RdYlGn')
plt.colorbar(label="NDVI")
plt.title("NDVI")
plt.axis('off')
plt.show()


In [0]:
import rioxarray
import pandas as pd
red = rioxarray.open_rasterio("sentinel2_red.tif").squeeze()
green = rioxarray.open_rasterio("sentinel2_green.tif").squeeze()
blue = rioxarray.open_rasterio("sentinel2_blue.tif").squeeze()

In [0]:
# Convert raster bands to pandas DataFrames (index: y, x)
df_red = red.to_dataframe(name="red").reset_index()    # columns: y, x, red
df_green = green.to_dataframe(name="green").reset_index()
df_blue = blue.to_dataframe(name="blue").reset_index()

In [0]:
df = df_red.merge(df_green, on=["x", "y"]).merge(df_blue, on=["x", "y"])
print(df.head())

In [0]:
df.to_parquet("/Volumes/danny_catalog/demo_schema/demo_vol/raster_data.parquet")

In [0]:
spark_df = spark.read.parquet("/Volumes/danny_catalog/demo_schema/demo_vol/raster_data.parquet")

In [0]:
display(spark_df)

In [0]:
spark_df.write.saveAsTable("danny_catalog.demo_schema.raster_data")

In [0]:
%sql
SELECT * FROM danny_catalog.demo_schema.raster_data

Databricks data profile. Run in Databricks to view.