# 1: Setup and Data Exploration

Author: Daniel Lusk

## Imports

In [None]:
import glob
import os

import geemap
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rioxarray as rx
import utils
from PreprocessingConfig import Config

from pprint import pprint
from rasterio.enums import Resampling
from tqdm.notebook import tqdm
from visualize import plot_traits

%load_ext autoreload
%autoreload 2

## Load configuration

In [None]:
config = Config()

## Try/iNaturalist trait maps

### Visualize TRY/iNaturalist trait maps

First, let's look at the trait maps generated from TRY/iNaturalist observations by [Wolf, et al. (2022)](https://doi.org/10.1038/s41559-022-01904-x).

In [None]:
inat_fns_2deg = config.iNat_fns("2")

plot_traits(inat_fns_2deg, 4)

### Convert to GeoDataFrames and merge

In [None]:
trait_fns = config.iNat_fns()

trait_gdfs = []
for trait_fn in trait_fns:
    gdf = utils.tif2gdf(trait_fn)
    trait_gdfs.append(gdf)

trait_gdfs = utils.merge_gdfs(trait_gdfs)
trait_gdfs.head(5)

## WorldClim data

Load the tifs and resample to 0.5 degrees

In [None]:
bio_vars = [1, 4, 7, 12, 13, 14, 15]
bio_fns = config.WC_fns(bio_vars)

bios = []
for bio_fn in bio_fns:
    name = os.path.splitext(os.path.basename(bio_fn))[0]
    bio = rx.open_rasterio(bio_fn, masked=True)
    bio.name = name
    bios.append(bio)

scale_factor = 1/3
bios = [utils.resample_raster(bio, scale_factor) for bio in bios]

Convert to GeoDataFrames and merge

In [None]:
bio_gdfs = utils.merge_gdfs([utils.tif2gdf(bio) for bio in bios])
bio_gdfs.head(5)

Compute Preciptation Annual Range by subtracting BIO14 from BIO13

In [None]:
bio_13 = bio_gdfs.loc[:, ["bio_13" in x for x in bio_gdfs.columns]].values
bio_14 = bio_gdfs.loc[:, ["bio_14" in x for x in bio_gdfs.columns]].values
bio_gdfs["wc2.1_10m_bio_13-14"] = bio_13 - bio_14

## MODIS Terra Surface Reflectance bands 1-5

1. Get bands 1-5 of MODIS Terra Surface Reflectance dataset
2. mask clouds
3. aggregate into monthly collections
4. export to Google Drive with a target resolution of ~1km at the equator (0.008983152841195, -0.008983152841195)

In [None]:
# Initialize Google Earth Engine
import ee

# ee.Authenticate() # Uncomment if not already authenticated
ee.Initialize()

# Get MODIS Terra Surface Reflectance image collection for its first year of operation
# (2000-03-01 - 2001-03-01)
ds, de = "2000-03-01", "2001-03-02"
bands = ["sur_refl_b01", "sur_refl_b02", "sur_refl_b03", "sur_refl_b04", "sur_refl_b05"]
modis_tsr = ee.ImageCollection("MODIS/061/MOD09GA").filterDate(ds, de)

# Mask clouds
qa_band = "state_1km"
modis_tsr_masked = utils.mask_clouds(modis_tsr, qa_band)

# Aggregate the image collection into monthly averages for each band
tsr_bands_monthly = []
for band in bands:
    monthly = utils.aggregate_ic(modis_tsr_masked.select(band), ds, de)
    tsr_bands_monthly.append(monthly)
    
# Export images to Google Drive
for band in tsr_bands_monthly:
    utils.export_collection(band, "MODIS")

5. Merge semi-global observations for each band/month

5. Load exported MODIS data from disk

In [None]:
# Load MODIS data

6. Downsample to match resolution of trait maps (0.5 deg)

In [None]:
# Downsample and mask MODIS data

## ISRIC soil data

1. Download soil data from [ISRIC](https://files.isric.org/soilgrids/latest/data/)
2. Reproject to WGS84, and resample to ~1km resolution

See `get_soil_data_multi.py`

3. Reproject and downsample to Wolf trait maps CRS

In [None]:
# Load soil data