In [1]:
%pip install geopandas rasterio matplotlib scikit-learn pandas pyimpute xgboost lightgbm


Note: you may need to restart the kernel to use updated packages.


# Species Distribution Data Loading

In [5]:
import os 
os.mkdir("inputs")
os.mkdir("outputs")

In [8]:
import geopandas as gpd
import shutil
import glob

for f in sorted(glob.glob('data/*[!.csv]')):
    shutil.copy(f,'inputs/')

pa = gpd.GeoDataFrame.from_file("inputs/CardellinaPusilla.shp")
pa.sample(5) # GeoDataFrame for the species

Unnamed: 0,eventDate,sciName,lat,lon,indivCount,geometry
160597,1977-08-23,"Cardellina pusilla (A.Wilson, 1811)",42.05589,-87.70588,2,POINT (42.05589 -87.70588)
26162,2022-05-23,"Cardellina pusilla (A.Wilson, 1811)",40.83686,-119.33664,1,POINT (40.83686 -119.33664)
23600,2022-05-01,"Cardellina pusilla (A.Wilson, 1811)",35.720463,-120.870285,2,POINT (35.72046 -120.87028)
158921,2020-04-14,"Cardellina pusilla (A.Wilson, 1811)",14.638289,-90.576485,1,POINT (14.63829 -90.57649)
36773,2008-05-11,"Cardellina pusilla (A.Wilson, 1811)",32.723286,-117.24206,2,POINT (32.72329 -117.24206)


# Climate Data Loading and Preprocessing

Load Climate Data

In [None]:
# grab climate features - cropped to joshua tree study area
for f in sorted(glob.glob('data/bioclim/bclim*.asc')):
    shutil.copy(f,'inputs/')
raster_features = sorted(glob.glob(
    'inputs/bclim*.asc'))
# check number of features 
print('\nThere are', len(raster_features), 'raster features.')


Model Training and Assessment

In [None]:
from pyimpute import load_training_vector, load_targets

# Assuming 'raster_features' is a list of paths to your climate raster files
train_xs, train_y = load_training_vector(species_distribution, raster_features, response_field='CLASS')


Train ML Classifiers

In [None]:
# import machine learning classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

CLASS_MAP = {
    'rf': (RandomForestClassifier()),
    'et': (ExtraTreesClassifier()),
    'xgb': (XGBClassifier()),
    'lgbm': (LGBMClassifier())
    }
from pyimpute import impute
from sklearn import model_selection
# model fitting and spatial range prediction
for name, (model) in CLASS_MAP.items():
    # cross validation for accuracy scores (displayed as a percentage)
    k = 5 # k-fold
    kf = model_selection.KFold(n_splits=k)
    accuracy_scores = model_selection.cross_val_score(model, train_xs, train_y, cv=kf, scoring='accuracy')
    print(name + " %d-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)"
          % (k, accuracy_scores.mean() * 100, accuracy_scores.std() * 200))

    # spatial prediction
    model.fit(train_xs, train_y)
    os.mkdir('outputs/' + name + '-images')
    impute(target_xs, model, raster_info, outdir='outputs/' + name + '-images',
           class_prob=True, certainty=True)

Map Species–Environment Relationship

In [None]:
from pyimpute import impute

# Prepare target raster grids for prediction
target_xs, raster_info = load_targets(raster_features)
impute(target_xs, model, raster_info, outdir='/content/drive/My Drive/outputs', class_prob=True)


Visualize Model Predictions

In [None]:
import matplotlib.pyplot as plt

# Example for visualizing one of the output probability maps
with rasterio.open('/content/drive/My Drive/outputs/probability_1.0.tif') as src:
    probability_map = src.read(1)

plt.imshow(probability_map, cmap='viridis')
plt.colorbar()
plt.title('Predicted Species Distribution')
plt.show()
