# Setup
## Libraries

In [1]:
import os, sys
import ee
import pandas as pd
import geopandas as gpd
import numpy as np
import xarray as xr
import xvec
import rioxarray as rxr
from dask.distributed import Client

sys.path.append('..')
from utils.constants import *

In [2]:
# Trigger the authentication flow.
ee.Authenticate()
# Initialize the library.
ee.Initialize(project=GEE_PROJECT_NAME)
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
#4/1AQlEd8yYK3mNymn3Kfzu6ZuYTBalMaQMqLge9dTEhkv-a1kXKnROglHSNsc

In [3]:
client = Client(n_workers=2, threads_per_worker=2, memory_limit='16GB')
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 2
Total threads: 4,Total memory: 29.80 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:38327,Workers: 2
Dashboard: http://127.0.0.1:8787/status,Total threads: 4
Started: Just now,Total memory: 29.80 GiB

0,1
Comm: tcp://127.0.0.1:44077,Total threads: 2
Dashboard: http://127.0.0.1:33983/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:42659,
Local directory: /tmp/dask-scratch-space/worker-8nr6oxxn,Local directory: /tmp/dask-scratch-space/worker-8nr6oxxn

0,1
Comm: tcp://127.0.0.1:43049,Total threads: 2
Dashboard: http://127.0.0.1:36661/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:37067,
Local directory: /tmp/dask-scratch-space/worker-sosrljey,Local directory: /tmp/dask-scratch-space/worker-sosrljey


## Directory and File Paths

In [4]:
life_data_path = RASTER_INPUT_DIR / "Eyres_et_al_LIFE/arable_0.25.tif"


# Data Processing
## Google Earth Engine Variables

In [10]:
scale_factor = 1000
project_crs = "EPSG:3857"
x_dim = 'X'
y_dim = 'Y'
# worldcover_ds = xr.open_dataset("ee://ESA/WorldCover/v100", engine='ee', engine='ee', scale = scale_factor, crs=project_crs).rio.set_spatial_dims(x_dim=x_dim, y_dim=y_dim)
esri_land_ds = xr.open_dataset("ee://projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m_TS", engine='ee', scale = scale_factor, crs=project_crs).rio.set_spatial_dims(x_dim=x_dim, y_dim=y_dim)
ghs_built_s_ds = xr.open_dataset("ee://JRC/GHSL/P2023A/GHS_BUILT_S", engine='ee', scale = scale_factor, crs=project_crs).rio.set_spatial_dims(x_dim=x_dim, y_dim=y_dim)
ghs_built_v_ds = xr.open_dataset("ee://JRC/GHSL/P2023A/GHS_BUILT_V", engine='ee', scale = scale_factor, crs=project_crs).rio.set_spatial_dims(x_dim=x_dim, y_dim=y_dim)
ghs_smod_ds = xr.open_dataset("ee://JRC/GHSL/P2023A/GHS_SMOD", engine='ee', scale = scale_factor, crs=project_crs).rio.set_spatial_dims(x_dim=x_dim, y_dim=y_dim)
ghs_pop_ds = xr.open_dataset("ee://JRC/GHSL/P2023A/GHS_POP", engine='ee', scale = scale_factor, crs=project_crs).rio.set_spatial_dims(x_dim=x_dim, y_dim=y_dim)
alos_dsm_ds = xr.open_dataset("ee://JAXA/ALOS/AW3D30/V3_2", engine='ee', scale = scale_factor, crs=project_crs).rio.set_spatial_dims(x_dim=x_dim, y_dim=y_dim)

In [12]:
def encode_smod_dataset(dataset, crs = None):
    ghs_smod_categories = {10.: 'water', 11.: 'very_low_density_rural', 12.: 'low_density_rural', 13.: 'rural_cluster', 
                        21.: 'suburban', 22.: 'semidense_urban', 23.: 'dense_urban', 30.: 'urban_centre', -200.: 'no_data'}

    one_hot_vars = []
    for k, v in ghs_smod_categories.items():
        one_hot = xr.where(ghs_smod_ds['smod_code'] == k, 1, 0)
        one_hot = one_hot.rename(f'smod_{v}')
        one_hot_vars.append(one_hot)

    res = xr.merge([ghs_smod_ds] + one_hot_vars).drop_vars('smod_code')
    res.attrs = dataset.attrs.copy()

    if crs:
        res = res.rio.write_crs(crs)
        
    return res

In [13]:
merged_ds = xr.merge([ghs_built_s_ds, ghs_built_v_ds, ghs_smod_ds, ghs_pop_ds]).rio.set_spatial_dims(x_dim=x_dim, y_dim=y_dim)
merged_ds

## LIFE Variable

In [14]:
life_bands = {1: "amphibians", 2: "birds", 3: "mammals", 4: "reptiles"}
life_time = pd.date_range('2025-01-01', periods=1)
life_data_ds = rxr.open_rasterio(life_data_path).to_dataset(dim='band').rename(life_bands)\
    .rio.set_spatial_dims(x_dim='x', y_dim='y').expand_dims(time=life_time).rio.write_crs("EPSG:4326")
life_data_ds

# Nigeria Example

In [14]:
# adm0_ds = xr.open_dataset("ee://WM/geoLab/geoBoundaries/600/ADM0", engine='ee')
gaul = ee.FeatureCollection("FAO/GAUL/2015/level0")
nigeria_boundary_gdf = ee.data.computeFeatures({
    'expression': gaul.filter(ee.Filter.eq('ADM0_NAME', 'Nigeria')),
    'fileFormat': 'GEOPANDAS_GEODATAFRAME'
})
nigeria_boundary_gdf.crs = gaul.first().geometry().projection().getInfo()['crs']
minx, miny, maxx, maxy = nigeria_boundary_gdf.total_bounds

In [15]:
nigeria_merged_ds = merged_ds.rio.clip_box(minx, miny, maxx, maxy)
nigeria_merged_ds = encode_smod_dataset(nigeria_merged_ds).rio.set_spatial_dims(x_dim='lon', y_dim='lat')
nigeria_merged_ds

# Download Data

In [28]:
esri_land_ds['b1'].sel(time='2020-01-01')[0]

In [33]:
x = esri_land_ds.rio.clip_box(minx, miny, maxx, maxy)['b1'].isel(time=1)
x.transpose('lat', 'lon').rio.to_raster('ejemplo.tif')