# 2: Train XGBoost Model

Author: Daniel Lusk

## Imports and configuration

In [None]:
import os

import rioxarray as riox

import utils
from TrainModelConfig import Config

%load_ext autoreload
%autoreload 2

Load configuration

In [None]:
config = Config()

## Load data

### iNaturalist trait maps

In [None]:
trait_gdfs = []
for trait_fn in config.iNat_fns():
    gdf = utils.tif2gdf(trait_fn)
    trait_gdfs.append(gdf)

trait_gdfs = utils.merge_gdfs(trait_gdfs)
trait_gdfs.head(2)

## WorldClim

Load the tifs and resample to 0.5 degrees

In [None]:
bio_vars = [1, 4, 7, 12, 13, 14, 15]

bios = []
for bio_fn in config.WC_fns(bio_vars):
    name = os.path.splitext(os.path.basename(bio_fn))[0]
    bio = riox.open_rasterio(bio_fn, masked=True)
    bio.name = name
    bios.append(bio)

scale_factor = 1/3
bios = [utils.resample_raster(bio, scale_factor) for bio in bios]

Convert to GeoDataFrames and merge

In [None]:
bio_gdfs = utils.merge_gdfs([utils.tif2gdf(bio) for bio in bios])
bio_gdfs.head(2)

Compute Preciptation Annual Range by subtracting BIO14 from BIO13

In [None]:
bio_13 = bio_gdfs.loc[:, ["bio_13" in x for x in bio_gdfs.columns]].values
bio_14 = bio_gdfs.loc[:, ["bio_14" in x for x in bio_gdfs.columns]].values
bio_gdfs["wc2.1_10m_bio_13-14"] = bio_13 - bio_14
bio_gdfs.head(2)

## XGBoost