# Visualize the Tsukuba AlphaEarth Mosaic

Use this notebook to explore the clipped Tsukuba AlphaEarth embeddings (GeoTIFF) alongside the city boundary from the GeoPackage. Adjust the file paths if you stored the data outside the repository.


In [5]:
import os
from pathlib import Path
import rasterio

proj_data = Path(rasterio.__file__).parent / "proj_data"
gdal_data = Path(rasterio.__file__).parent / "gdal_data"

if not proj_data.exists():
    raise FileNotFoundError(f"Missing proj_data dir: {proj_data}")
if not gdal_data.exists():
    raise FileNotFoundError(f"Missing gdal_data dir: {gdal_data}")

os.environ["PROJ_LIB"] = str(proj_data)
os.environ["GDAL_DATA"] = str(gdal_data)

print("PROJ_LIB =", os.environ["PROJ_LIB"])
print("GDAL_DATA =", os.environ["GDAL_DATA"])

PROJ_LIB = f:\tsukubasolars\.venv\Lib\site-packages\rasterio\proj_data
GDAL_DATA = f:\tsukubasolars\.venv\Lib\site-packages\rasterio\gdal_data


In [6]:
import os
from pathlib import Path
import rasterio

proj_data = Path(rasterio.__file__).parent / "proj_data"
os.environ.setdefault("PROJ_LIB", str(proj_data))

from pathlib import Path
import os
import fiona
from rasterio.crs import CRS
from rasterio.warp import transform_geom
import geemap
gdal_data = Path(rasterio.__file__).parent / "gdal_data"
os.environ.setdefault("GDAL_DATA", str(gdal_data))


# Update these paths if your files live elsewhere
CANDIDATE_VECTORS = [
    Path('map/tsukuba_gp.gpkg'),
    Path.cwd() / 'map/tsukuba_gp.gpkg',
    Path('F:/tsukubasolars/map/tsukuba_gp.gpkg'),
    Path('E:/tsukubasolars/map/tsukuba_gp.gpkg'),
]
VECTOR_PATH = None
for candidate in CANDIDATE_VECTORS:
    if candidate.exists():
        VECTOR_PATH = candidate
        break
if VECTOR_PATH is None:
    candidates = list(Path.cwd().rglob('tsukuba_gp.gpkg'))
    if not candidates:
        raise FileNotFoundError('Could not find tsukuba_gp.gpkg. Update VECTOR_PATH manually.')
    VECTOR_PATH = candidates[0]
    print(f'Using vector file: {VECTOR_PATH}')
else:
    print(f'Using vector file: {VECTOR_PATH}')

MOSAIC_PATH = Path('E:/data/interim/tsukuba_alphaearth_2024_mosaic_clipped.tif')
LAYER_NAME = 'tsukuba'
RGB_BANDS = (1, 16, 9)  # approximate RGB mapping of embedding dimensions
VIS_PARAMS = {'min': -0.3, 'max': 0.3}

Using vector file: F:\tsukubasolars\map\tsukuba_gp.gpkg


In [7]:
def load_boundary_geojson(vector_path: Path, layer: str) -> dict:
    """Return boundary GeoJSON in EPSG:4326."""
    with fiona.open(vector_path, layer=layer) as src:
        features = []
        source_crs = src.crs_wkt or src.crs
        target_crs = 'EPSG:4326'
        for feat in src:
            geom = feat['geometry']
            if source_crs and source_crs != target_crs:
                geom = transform_geom(
                    source_crs,
                    target_crs,
                    geom,
                )
            props = feat.get('properties', {})
            if hasattr(props, 'items'):
                props = dict(props)
            elif isinstance(props, (list, tuple)):
                props = {f'value_{idx}': value for idx, value in enumerate(props)}
            else:
                props = {'value': props}
            features.append({
                'type': 'Feature',
                'properties': props,
                'geometry': geom,
            })
    if not features:
        raise RuntimeError(f"No features found in {vector_path} (layer {layer}).")
    return {'type': 'FeatureCollection', 'features': features}


def feature_collection_bounds(fc: dict) -> tuple[float, float, float, float]:
    lons = []
    lats = []

    def _collect(coords):
        if isinstance(coords[0], (float, int)):
            lon, lat = coords[:2]
            lons.append(lon)
            lats.append(lat)
        else:
            for part in coords:
                _collect(part)

    for feature in fc['features']:
        _collect(feature['geometry']['coordinates'])
    return min(lons), min(lats), max(lons), max(lats)


boundary_fc = load_boundary_geojson(VECTOR_PATH, LAYER_NAME)
min_lon, min_lat, max_lon, max_lat = feature_collection_bounds(boundary_fc)
center_lat = 0.5 * (min_lat + max_lat)
center_lon = 0.5 * (min_lon + max_lon)
print(f"Boundary extent: ({min_lon:.4f}, {min_lat:.4f}) to ({max_lon:.4f}, {max_lat:.4f})")


Boundary extent: (139.9958, 35.9473) to (140.1732, 36.2368)


In [8]:
m = geemap.Map(center=(center_lat, center_lon), zoom=12, ee_initialize=False)
m.add_basemap('SATELLITE')

# Visualize the clipped mosaic using three embedding bands as pseudo-RGB
m.add_raster(
    str(MOSAIC_PATH),
    layer_name="Tsukuba Embeddings",
    vis_params={"bands": list(RGB_BANDS), **VIS_PARAMS},
)

# Overlay the Tsukuba boundary
style = {"color": "yellow", "weight": 2, "fillOpacity": 0}
m.add_geojson(boundary_fc, layer_name="Tsukuba Boundary", style=style, info_mode=None)

m


Map(center=[36.092073999965294, 140.08447945496306], controls=(WidgetControl(options=['position', 'transparent…

In [1]:
from pathlib import Path
import numpy as np
import rasterio

tif_path = Path("E:/data/interim/tsukuba_alphaearth_2024_mosaic_clipped.tif")

with rasterio.open(tif_path) as src:
    print(
        f"CRS: {src.crs}, shape: {src.width} x {src.height}, bands: {src.count}")
    # Read the full dataset or use windowed reads for large rasters.
    data = src.read()  # shape (bands, rows, cols)

    for idx in range(src.count):
        band = data[idx]
        print(
            f"Band {idx + 1}: min={band.min():.4f}, max={band.max():.4f}, "
            f"mean={band.mean():.4f}, sample pixel={band[0, 0]:.4f}"
        )

        # To get values from a specific location (row, col):
        # row, col = 1500, 1200
        # print(f"Band {idx + 1} @ ({row}, {col}) = {band[row, col]}")

CRS: GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST]], shape: 1975 x 3224, bands: 64
Band 1: min=-0.3189, max=0.2599, mean=-0.0385, sample pixel=0.0000
Band 2: min=-0.4873, max=0.2442, mean=-0.1079, sample pixel=0.0000
Band 3: min=-0.1417, max=0.3937, mean=0.0768, sample pixel=0.0000
Band 4: min=-0.2520, max=0.3460, mean=0.0020, sample pixel=0.0000
Band 5: min=-0.2069, max=0.2520, mean=-0.0056, sample pixel=0.0000
Band 6: min=-0.3278, max=0.3189, mean=-0.0229, sample pixel=0.0000
Band 7: min=-0.3839, max=0.2141, mean=-0.0679, sample pixel=0.0000
Band 8: min=-0.2761, max=0.4656, mean=0.0157, sample pixel=0.0000
Band 9: min=-0.3743, max=0.3743, mean=0.0281, sample pixel=0.0000
Band 10: min=-0.2761, max=0.2844, mean=-0.0011, sample pixel=0.0000
Band 11: min=-0.3369, max=0.3101, mean=-0.0072, sample pixel=0.0000
Band 12:

In [2]:
from pathlib import Path
import numpy as np
import rasterio

tif_path = Path("E:/data/interim/tsukuba_alphaearth_2024_mosaic_clipped.tif")

with rasterio.open(tif_path) as src:
    row = np.random.randint(0, src.height)
    col = np.random.randint(0, src.width)
    values = src.read(window=rasterio.windows.Window(
        col_off=col, row_off=row, width=1, height=1))[:, 0, 0]

    print(f"Random pixel at row={row}, col={col}")
    for band_idx, value in enumerate(values, start=1):
        print(f"Band {band_idx}: {repr(value)}")

Random pixel at row=1463, col=1825
Band 1: np.float64(-0.16000000000000003)
Band 2: np.float64(-0.03844675124951941)
Band 3: np.float64(0.07111111111111111)
Band 4: np.float64(-0.002214532871972318)
Band 5: np.float64(0.0629911572472126)
Band 6: np.float64(0.03844675124951941)
Band 7: np.float64(0.017777777777777778)
Band 8: np.float64(0.186082276047674)
Band 9: np.float64(0.05536332179930796)
Band 10: np.float64(0.04822760476739715)
Band 11: np.float64(-0.08882737408688965)
Band 12: np.float64(-0.006151480199923107)
Band 13: np.float64(-0.21413302575932336)
Band 14: np.float64(0.0271280276816609)
Band 15: np.float64(-0.08421376393694734)
Band 16: np.float64(-0.15378700499807765)
Band 17: np.float64(-0.24415224913494812)
Band 18: np.float64(0.12456747404844293)
Band 19: np.float64(-0.14769703960015382)
Band 20: np.float64(0.041584006151480196)
Band 21: np.float64(-0.2069357939254133)
Band 22: np.float64(0.01384083044982699)
Band 23: np.float64(-0.04822760476739715)
Band 24: np.float64(

In [3]:
from pathlib import Path
import numpy as np
import rasterio

tif_path = Path("E:/data/interim/tsukuba_alphaearth_2024_mosaic_clipped.tif")

# Update this list to match your raster’s band order.
band_names = [f"A{idx:02d}" for idx in range(1, 65)]

with rasterio.open(tif_path) as src:
    if len(band_names) != src.count:
        raise ValueError(
            f"Band name list ({len(band_names)}) must match raster bands ({src.count}).")

    row = np.random.randint(0, src.height)
    col = np.random.randint(0, src.width)

    pixel = src.read(window=rasterio.windows.Window(col, row, 1, 1))[:, 0, 0]

    print(f"Random pixel at row={row}, col={col}")
    for name, value in zip(band_names, pixel):
        print(f"{name}: {repr(value)}")

Random pixel at row=1819, col=582
A01: np.float64(-0.03543252595155709)
A02: np.float64(-0.08421376393694734)
A03: np.float64(0.14173010380622836)
A04: np.float64(0.044844290657439445)
A05: np.float64(-0.029773164167627833)
A06: np.float64(-0.12456747404844293)
A07: np.float64(-0.12456747404844293)
A08: np.float64(0.33685505574778934)
A09: np.float64(0.1929104190695886)
A10: np.float64(-0.029773164167627833)
A11: np.float64(-0.03254133025759323)
A12: np.float64(-0.05173394848135333)
A13: np.float64(-0.3188927335640138)
A14: np.float64(0.0271280276816609)
A15: np.float64(-0.022206843521722412)
A16: np.float64(-0.08421376393694734)
A17: np.float64(-0.13588619761630144)
A18: np.float64(0.03844675124951941)
A19: np.float64(-0.12456747404844293)
A20: np.float64(-0.04822760476739715)
A21: np.float64(-0.003936947327950788)
A22: np.float64(0.13016532103037293)
A23: np.float64(0.08421376393694734)
A24: np.float64(0.01384083044982699)
A25: np.float64(0.004982698961937716)
A26: np.float64(0.18608