# Setup
## Libraries

In [2]:
import os, sys
import ee
import geopandas as gpd
import numpy as np
import xarray as xr
import xvec
import rioxarray as rxr
from dask.distributed import Client

sys.path.append('..')
from utils.constants import *

In [3]:
# Trigger the authentication flow.
ee.Authenticate()
# Initialize the library.
ee.Initialize(project=GEE_PROJECT_NAME)
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
#4/1AQlEd8yYK3mNymn3Kfzu6ZuYTBalMaQMqLge9dTEhkv-a1kXKnROglHSNsc

In [4]:
client = Client(n_workers=2, threads_per_worker=2, memory_limit='16GB')
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 2
Total threads: 4,Total memory: 29.80 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:46353,Workers: 2
Dashboard: http://127.0.0.1:8787/status,Total threads: 4
Started: Just now,Total memory: 29.80 GiB

0,1
Comm: tcp://127.0.0.1:41259,Total threads: 2
Dashboard: http://127.0.0.1:34705/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:43927,
Local directory: /tmp/dask-scratch-space/worker-bqo_kb2e,Local directory: /tmp/dask-scratch-space/worker-bqo_kb2e

0,1
Comm: tcp://127.0.0.1:40131,Total threads: 2
Dashboard: http://127.0.0.1:41975/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:39445,
Local directory: /tmp/dask-scratch-space/worker-rm70g3iz,Local directory: /tmp/dask-scratch-space/worker-rm70g3iz


## Directory and File Paths

# Data Processing

In [5]:
# worldcover_ds = xr.open_dataset("ee://ESA/WorldCover/v100", engine='ee')
esri_land_ds = xr.open_dataset("ee://projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m_TS", engine='ee', scale = .25).rio.set_spatial_dims(x_dim='lon', y_dim='lat')
ghs_built_s_ds = xr.open_dataset("ee://JRC/GHSL/P2023A/GHS_BUILT_S", engine='ee', scale = .25)
ghs_built_v_ds = xr.open_dataset("ee://JRC/GHSL/P2023A/GHS_BUILT_V", engine='ee', scale = .25)
ghs_smod_ds = xr.open_dataset("ee://JRC/GHSL/P2023A/GHS_SMOD", engine='ee', scale = .25)
ghs_pop_ds = xr.open_dataset("ee://JRC/GHSL/P2023A/GHS_POP", engine='ee', scale = .25)
alos_dsm_ds = xr.open_dataset("ee://JAXA/ALOS/AW3D30/V3_2", engine='ee', scale = .25)

In [12]:
def encode_smod_dataset(dataset):
    ghs_smod_categories = {10.: 'water', 11.: 'very_low_density_rural', 12.: 'low_density_rural', 13.: 'rural_cluster', 
                        21.: 'suburban', 22.: 'semidense_urban', 23.: 'dense_urban', 30.: 'urban_centre', -200.: 'no_data'}

    one_hot_vars = []
    for k, v in ghs_smod_categories.items():
        one_hot = xr.where(ghs_smod_ds['smod_code'] == k, 1, 0)
        one_hot = one_hot.rename(f'smod_{v}')
        one_hot_vars.append(one_hot)

    res = xr.merge([ghs_smod_ds] + one_hot_vars).drop_vars('smod_code')
    return res

In [10]:
merged_ds = xr.merge([ghs_built_s_ds, ghs_built_v_ds, ghs_smod_ds, ghs_pop_ds]).rio.set_spatial_dims(x_dim='lon', y_dim='lat')
merged_ds#.rio.write_crs('EPSG:4326')

In [8]:
# adm0_ds = xr.open_dataset("ee://WM/geoLab/geoBoundaries/600/ADM0", engine='ee')
gaul = ee.FeatureCollection("FAO/GAUL/2015/level0")
nigeria_boundary_gdf = ee.data.computeFeatures({
    'expression': gaul.filter(ee.Filter.eq('ADM0_NAME', 'Nigeria')),
    'fileFormat': 'GEOPANDAS_GEODATAFRAME'
})
nigeria_boundary_gdf.crs = gaul.first().geometry().projection().getInfo()['crs']
minx, miny, maxx, maxy = nigeria_boundary_gdf.total_bounds

In [14]:
nigeria_merged_ds = merged_ds.rio.clip_box(minx, miny, maxx, maxy)
nigeria_merged_ds = encode_smod_dataset(nigeria_merged_ds)

In [15]:
nigeria_merged_ds.sel(time="2025-01")