# 1: Setup and Data Exploration

Author: Daniel Lusk

## Imports

In [None]:
import glob
import os

import ee
import rioxarray as riox
import utils.geodata as geodata
# import utils.gee as gee
import utils.gdal as gdal
from PreprocessingConfig import PreprocessingConfig
from tqdm.notebook import tqdm
from utils.datasets import Dataset, Unit, resample_dataset

from utils.visualize import plot_traits, plot_rasterio

%load_ext autoreload
%autoreload 2

# ee.Authenticate() # Uncomment if not already authenticated
# ee.Initialize()

# Load configuration
config = PreprocessingConfig()

## Try/iNaturalist trait maps

### Visualize TRY/iNaturalist trait maps

First, let's look at the trait maps generated from TRY/iNaturalist observations by [Wolf, et al. (2022)](https://doi.org/10.1038/s41559-022-01904-x).

In [None]:
if config.plot_traits:
    inat_fns_2deg = config.iNat_fns("2")

    plot_traits(inat_fns_2deg, 4)

### Convert to GeoDataFrames and merge

In [None]:
trait_fns = config.iNat_fns

trait_gdfs = []
for trait_fn in trait_fns:
    gdf = geodata.tif2gdf(trait_fn)
    trait_gdfs.append(gdf)

trait_gdfs = geodata.merge_gdfs(trait_gdfs)
trait_gdfs.head(5)

## WorldClim data

Load the tifs and resample to all resolutions needed

In [None]:
wc = Dataset(
    res=0.5,
    unit=Unit.DEGREE,
    parent_dir=config.WC_dir,
    collection_name=config.WC_name,
    # bio_ids=config.WC_bio_ids,
)

if config.resamp_to_disk:
    resample_dataset(dataset=wc, resolution=2, unit=Unit.DEGREE)

Convert to GeoDataFrames and merge

In [None]:
bio_fns = config.WC_fns

bios = []
for bio_fn in bio_fns:
    name = os.path.splitext(os.path.basename(bio_fn))[0]
    bio = riox.open_rasterio(bio_fn, masked=True)
    bio.name = name
    bios.append(bio)
    
bio_gdfs = geodata.merge_gdfs([geodata.tif2gdf(bio) for bio in bios])
bio_gdfs.head(5)

Compute Preciptation Annual Range by subtracting BIO14 from BIO13

In [None]:
bio_13 = bio_gdfs.loc[:, ["bio_13" in x for x in bio_gdfs.columns]].values
bio_14 = bio_gdfs.loc[:, ["bio_14" in x for x in bio_gdfs.columns]].values
bio_gdfs["wc2.1_10m_bio_13-14"] = bio_13 - bio_14

## MODIS Terra Surface Reflectance bands 1-5

1. Get bands 1-5 of MODIS Terra Surface Reflectance dataset
2. mask clouds
3. aggregate into monthly collections
4. export to Google Drive with a target resolution of ~1km at the equator (0.008983152841195, -0.008983152841195)

In [None]:
if config.gee_export:
    # Get MODIS Terra Surface Reflectance image collection for its first year of operation
    # (2000-03-01 - 2001-03-01)
    ds, de = "2000-03-01", "2001-03-02"
    bands = ["sur_refl_b01", "sur_refl_b02", "sur_refl_b03", "sur_refl_b04", "sur_refl_b05"]
    modis_tsr = ee.ImageCollection("MODIS/061/MOD09GA").filterDate(ds, de)

    # Mask clouds
    qa_band = "state_1km"
    modis_tsr_masked = gee.mask_clouds(modis_tsr, qa_band)

    # Aggregate the image collection into monthly averages for each band
    tsr_bands_monthly = []
    for band in bands:
        monthly = gee.aggregate_ic(modis_tsr_masked.select(band), ds, de)
        tsr_bands_monthly.append(monthly)
        
    # Export images to Google Drive
    for band in tsr_bands_monthly:
        gee.export_collection(band, "MODIS")

5. Merge semi-global observations for each band/month (See `scripts/merge_files.py`)

6. Downsample to match resolution of trait maps (0.5 deg)

In [None]:
modis = Dataset(
    res=0.5,
    unit=Unit.DEGREE,
    parent_dir=config.MODIS_dir,
    collection_name=config.MODIS_name,
)


if config.resamp_to_disk:
    resample_dataset(dataset=modis, resolution=2, unit=Unit.DEGREE)

## ISRIC soil data

1. Download soil data from [ISRIC](https://files.isric.org/soilgrids/latest/data/)
2. Reproject to WGS84, and resample to ~1km resolution

See `get_soil_data_multi.py`

3. Reproject and downsample to Wolf trait maps CRS

In [None]:
soil = Dataset(
    res=0.5,
    unit=Unit.DEGREE,
    parent_dir=config.soil_dir,
    collection_name=config.soil_name,
)

if config.resamp_to_disk:
    resample_dataset(dataset=soil, resolution=2, unit=Unit.DEGREE)

4. Convert to GeoDataFrames and merge

In [None]:
soil_fns = config.soil_fns

soils = []
for soil_fn in soil_fns:
    name = os.path.splitext(os.path.basename(soil_fn))[0]
    obs = riox.open_rasterio(soil_fn, masked=True)
    obs.name = name
    soils.append(obs)

soil_gdfs = geodata.merge_gdfs([geodata.tif2gdf(obs) for obs in soils])
soil_gdfs.head(5)

# Combine all into a single GeoDataFrame

In [None]:
all_gdfs = geodata.merge_gdfs([trait_gdfs, bio_gdfs, modis_gdfs, soil_gdfs])
print("Combined dataframe shape:", all_gdfs.shape)
all_gdfs.head()