In [18]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import mercantile
from shapely.geometry import box, Point
import ee
import rasterio
from rasterio.transform import xy

In [None]:
ee.Initialize()

def export_satellite_embeddings(bbox_coords, filename, resolution=250, year=2021):
    """
    Export Google satellite embeddings for a bounding box.
    bbox_coords: [min_lon, min_lat, max_lon, max_lat]
    """
    # Load collection and create bounding box
    dataset = ee.ImageCollection('GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL')
    bbox = ee.Geometry.Rectangle(bbox_coords)
    
    # Get mosaic without any resampling (to avoid rainbow corruption)
    mosaic = dataset.filterDate(f'{year}-01-01', f'{year+1}-01-01').filterBounds(bbox).mosaic()
    
    # Export task - let GEE handle scaling during export
    task = ee.batch.Export.image.toDrive(
        image=mosaic,
        description=filename,
        folder='GEE_Exports',
        scale=resolution,
        region=bbox,
        maxPixels=1e9,
        crs='EPSG:4326'
    )
    
    task.start()
    print(f"Export task started: {filename}")
    return task



In [None]:
def raster_to_geodataframe(raster_path):
    """
    Convert a multi-band raster to a GeoDataFrame where:
    - Each row represents a pixel
    - Each column represents a band
    - Geometry column contains the center point of each pixel
    """
    
    with rasterio.open(raster_path) as src:
        # Read all bands
        data = src.read()  # Shape: (bands, height, width)
        transform = src.transform
        crs = src.crs
        
        # Get dimensions
        n_bands, height, width = data.shape
        
        # Create arrays for row and column indices
        rows, cols = np.meshgrid(np.arange(height), np.arange(width), indexing='ij')
        
        # Flatten the arrays
        rows_flat = rows.flatten()
        cols_flat = cols.flatten()
        
        # Convert pixel coordinates to geographic coordinates
        x_coords, y_coords = xy(transform, rows_flat, cols_flat)
        
        # Create Point geometries for each pixel center
        geometries = [Point(x, y) for x, y in zip(x_coords, y_coords)]
        
        # Reshape band data to have pixels as rows and bands as columns
        # data shape: (bands, height, width) -> (height*width, bands)
        pixel_data = data.reshape(n_bands, -1).T
        
        # Create column names for bands
        band_columns = [f'band_{i+1}' for i in range(n_bands)]
        
        # Create DataFrame with band data
        df = pd.DataFrame(pixel_data, columns=band_columns)
        
        # Add coordinate columns (optional, useful for reference)
        df['x'] = x_coords
        df['y'] = y_coords
        df['row'] = rows_flat
        df['col'] = cols_flat
        
        # Create GeoDataFrame
        gdf = gpd.GeoDataFrame(df, geometry=geometries, crs=crs)
        
        return gdf

In [15]:
test_bbox = [-1.32, 11.05, 1.7, 5.15 ]
file_name = 'satellite_embeddings_test_large'
export_task = export_satellite_embeddings(test_bbox, file_name, resolution=250, year=2021)

Export task started: satellite_embeddings_test_large


In [20]:
raster_path = "../../unsynced-data/embeddings/satellite_embeddings_test.tif"
test_gdf = raster_to_geodataframe(raster_path)


 1345, 2629 dimensions

In [22]:
togo_ghana = raster_to_geodataframe("../../unsynced-data/embeddings/satellite_embeddings_test.tif")
print ("tg done")
test_large = raster_to_geodataframe("../../unsynced-data/embeddings/satellite_embeddings_test_large.tif")

tg done


In [23]:
togo_ghana.describe()

Unnamed: 0,band_1,band_2,band_3,band_4,band_5,band_6,band_7,band_8,band_9,band_10,...,band_59,band_60,band_61,band_62,band_63,band_64,x,y,row,col
count,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,...,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0
mean,0.064913,-0.100201,0.1645,-0.028126,-0.183644,-0.066567,-0.024771,-0.003763,0.151803,-0.175354,...,-0.320892,-0.221799,-0.048244,0.015349,-0.041269,0.031422,40.161431,7.182031,36.5,38.5
std,0.028272,0.03635,0.032751,0.037684,0.053913,0.026498,0.045719,0.032853,0.050179,0.040636,...,0.045612,0.046893,0.033269,0.033089,0.035307,0.042517,0.050568,0.047974,21.36186,22.516761
min,-0.013841,-0.236463,-0.000554,-0.147697,-0.310096,-0.166336,-0.16,-0.172795,-0.14173,-0.27614,...,-0.487259,-0.364721,-0.214133,-0.119093,-0.206936,-0.113741,40.074968,7.100059,0.0,0.0
25%,0.044844,-0.124567,0.14173,-0.051734,-0.228897,-0.084214,-0.055363,-0.024606,0.119093,-0.206936,...,-0.355309,-0.251965,-0.071111,-0.004983,-0.059116,0.006151,40.117638,7.140484,18.0,19.0
50%,0.062991,-0.098424,0.166336,-0.029773,-0.19291,-0.06699,-0.024606,-0.004983,0.153787,-0.172795,...,-0.327812,-0.228897,-0.048228,0.017778,-0.038447,0.038447,40.161431,7.182031,36.5,38.5
75%,0.079723,-0.075356,0.186082,-0.003014,-0.14173,-0.051734,0.004983,0.013841,0.186082,-0.147697,...,-0.292872,-0.199862,-0.027128,0.038447,-0.017778,0.059116,40.205223,7.223578,55.0,58.0
max,0.179377,0.001538,0.27614,0.14173,0.013841,0.041584,0.166336,0.153787,0.27614,-0.038447,...,-0.135886,-0.000246,0.093564,0.14173,0.084214,0.147697,40.247893,7.264002,73.0,77.0


In [24]:
test_gdf.describe()

Unnamed: 0,band_1,band_2,band_3,band_4,band_5,band_6,band_7,band_8,band_9,band_10,...,band_59,band_60,band_61,band_62,band_63,band_64,x,y,row,col
count,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,...,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0,5772.0
mean,0.064913,-0.100201,0.1645,-0.028126,-0.183644,-0.066567,-0.024771,-0.003763,0.151803,-0.175354,...,-0.320892,-0.221799,-0.048244,0.015349,-0.041269,0.031422,40.161431,7.182031,36.5,38.5
std,0.028272,0.03635,0.032751,0.037684,0.053913,0.026498,0.045719,0.032853,0.050179,0.040636,...,0.045612,0.046893,0.033269,0.033089,0.035307,0.042517,0.050568,0.047974,21.36186,22.516761
min,-0.013841,-0.236463,-0.000554,-0.147697,-0.310096,-0.166336,-0.16,-0.172795,-0.14173,-0.27614,...,-0.487259,-0.364721,-0.214133,-0.119093,-0.206936,-0.113741,40.074968,7.100059,0.0,0.0
25%,0.044844,-0.124567,0.14173,-0.051734,-0.228897,-0.084214,-0.055363,-0.024606,0.119093,-0.206936,...,-0.355309,-0.251965,-0.071111,-0.004983,-0.059116,0.006151,40.117638,7.140484,18.0,19.0
50%,0.062991,-0.098424,0.166336,-0.029773,-0.19291,-0.06699,-0.024606,-0.004983,0.153787,-0.172795,...,-0.327812,-0.228897,-0.048228,0.017778,-0.038447,0.038447,40.161431,7.182031,36.5,38.5
75%,0.079723,-0.075356,0.186082,-0.003014,-0.14173,-0.051734,0.004983,0.013841,0.186082,-0.147697,...,-0.292872,-0.199862,-0.027128,0.038447,-0.017778,0.059116,40.205223,7.223578,55.0,58.0
max,0.179377,0.001538,0.27614,0.14173,0.013841,0.041584,0.166336,0.153787,0.27614,-0.038447,...,-0.135886,-0.000246,0.093564,0.14173,0.084214,0.147697,40.247893,7.264002,73.0,77.0
