# Generate background pseudo-absence data

In [None]:
############### CONFIGURATION PARAMETERS ###############
# Modify these variables to customize the analysis for your species and regions

# Species selection - choose the target species for distribution modeling
# specie = 'leptocybe-invasa' # 'thaumastocoris-peregrinus' # 

# Geographic regions for training and testing
# training = 'east-asia' # 'australia' #  # Region used for model training
# interest = 'south-east-asia'  # Region of interest for prediction/testing

# Pseudo-absence point generation parameters
# count = 10000  # Number of background points to generate

# Spatial resolution settings
# # 100m ~ 0.001 deg (high resolution)
# # 1000m ~ 0.01 deg (medium resolution)
# ref_res = (0.01, 0.01) # deg  # Reference resolution for raster operations

# Output options
# savefig = True  # Whether to save generated figures to disk

# Alternative configuration example:
# specie = 'leptocybe-invasa' # 'thaumastocoris-peregrinus' # 
# training = 'south-east-asia'  # Use same region for training and testing
# interest = 'south-east-asia'

###########################################################

In [None]:
# =============================================================================
# IMPORT REQUIRED LIBRARIES
# =============================================================================

# Standard library imports
import os  # File system operations for path handling and directory management

# Core scientific computing libraries
import numpy as np  # Numerical computing - arrays, mathematical operations
import pandas as pd  # Data manipulation and analysis - DataFrames, CSV handling
import geopandas as gpd  # Geospatial data handling - vector data, shapefiles
import rioxarray as rioxr  # Raster I/O for xarray - geospatial raster operations

# Specialized geospatial processing libraries
import geowombat as gw  # High-performance geospatial processing library
from geowombat.data import rgbn  # Sample data for geowombat (not used in this notebook)
from geocube.api.core import make_geocube  # Convert vector data to raster format

# Species distribution modeling library
import elapid as ela  # Species distribution modeling library - MaxEnt, sampling functions

# Visualization libraries
import matplotlib.pyplot as plt  # Plotting and visualization - figures, plots
import matplotlib as mpl  # Matplotlib configuration - colormaps, styling

# Cartographic and mapping libraries
import cartopy.crs as ccrs  # Cartographic projections - coordinate reference systems
import cartopy.feature as cfeature  # Cartographic features - coastlines, borders, land
import cartopy.io.shapereader as shapereader  # Shapefile reading utilities

# Warning suppression for cleaner output
import warnings
warnings.filterwarnings("ignore")  # Suppress warning messages for cleaner output

# Configure matplotlib for better visualization
# Set larger font sizes for better readability in plots
params = {'legend.fontsize': 'x-large',
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large'}
plt.rcParams.update(params)

In [None]:
# Set the default cartographic projection for mapping
projection = ccrs.PlateCarree()  # Equirectangular projection (lat/lon coordinates)

def make_map(projection, res, figsize=None, ncols=None):
    """
    Create a map with cartographic features for visualization.
    
    Parameters:
    -----------
    projection : cartopy.crs
        Cartographic projection to use for the map
    res : str
        Resolution scale for cartographic features ('50m', '110m', '50m')
    figsize : tuple, optional
        Figure size in inches (width, height)
    ncols : int, optional
        Number of columns for subplot layout
    
    Returns:
    --------
    fig : matplotlib.figure.Figure
        The created figure object
    axes : list or array
        The axes object(s) for plotting
    """
    
    # Create figure and axes based on layout requirements
    if ncols == None:
        # Single plot layout
        fig = plt.figure(figsize=figsize)
        axes = [plt.axes(projection=projection)]
    else:
        # Multi-column subplot layout
        fig, axes = plt.subplots(figsize=figsize, ncols=ncols, 
                                subplot_kw={'projection': projection})
    
    # Add cartographic features to each axis
    for ax in axes:
        # Add country borders
        ax.add_feature(cfeature.BORDERS.with_scale(res))
        
        # Add state/province boundaries (dotted gray lines)
        ax.add_feature(cfeature.STATES.with_scale(res),
                       linestyle=':', edgecolor='gray', linewidth=0.5)
        
        # Add land areas (light gray fill)
        ax.add_feature(cfeature.LAND.with_scale(res), color='lightgray')
        
        # Add coastlines
        ax.add_feature(cfeature.COASTLINE.with_scale(res))
    
    return fig, axes

In [None]:
# =============================================================================
# DIRECTORY STRUCTURE SETUP
# =============================================================================
# Define paths for data storage and output organization

# Alternative data path (commented out - for server environments)
# data_path = os.path.join(os.sep, 'scratch', 'aciar-fst', 'data')

# Main data directory - contains input datasets (land cover, species data, etc.)
data_path = os.path.join(os.path.dirname(os.getcwd()), 'data')

# Output directories for different types of results
figs_path = os.path.join(os.path.dirname(os.getcwd()), 'figs')  # Generated figures/plots
docs_path = os.path.join(os.path.dirname(os.getcwd()), 'docs')  # Documentation files

# Species-specific output directory structure
out_path = os.path.join(os.path.dirname(os.getcwd()), 'out', specie)  # Main output for species
input_path = os.path.join(out_path, 'input')  # Processed input data
train_path = os.path.join(input_path, 'train')  # Training region data
test_path = os.path.join(input_path, 'test')  # Testing region data

In [None]:
# =============================================================================
# GEOGRAPHIC REGIONS DEFINITION
# =============================================================================
# Dictionary defining different geographic regions for species distribution modeling
# Each key represents a region name, and the value is a list of countries in that region

regions = {
    'east-asia': ['China', 'Taiwan', 'Japan', 'North Korea', 'South Korea'],
    'indo': ["Indonesia",'Malaysia','Singapore','Brunei','East Timor'],
    'sea': ['Myanmar', 'Cambodia', 'Laos', 'Philippines', 'Thailand', 'Vietnam'],
    'south-east-asia': ['Brunei', 'Myanmar', 'Cambodia', 'East Timor', 'Indonesia', 'Laos', 'Malaysia', 'Philippines', 'Singapore', 'Thailand', 'Vietnam'],
    'australia': ['Australia'],
    'australasia' : ['Australia', 'New Zealand'],
    'india-sri-lanka' : ['Sri Lanka'],
    'all' : ['Australia','France','Italy','Portugal','South Africa','United States of America','Madagascar','Spain','Greece','Cyprus','Mexico','Kenya','Algeria','Israel','Egypt','Ethiopia','Ghana','Malawi','Mauritius','Morocco','Mozambique','Rwanda','Sierra Leone','United Republic of Tanzania','Tunisia','Uganda','Zimbabwe','China','India','Iran','Iraq','Jordan','Sri Lanka','Syria','Taiwan','Turkey','Malta','Montenegro','United Kingdom','Argentina','Brazil','Chile','Paraguay','Uruguay']
}

In [None]:
# =============================================================================
# LOAD GEOGRAPHIC BOUNDARIES
# =============================================================================
# Load shapefiles containing country boundaries for the study regions

# Alternative approach: Load separate shapefiles for train and test regions
# This would create separate GeoDataFrames for each region
# gdf_countries = {}
# for mode in ['train', 'test']:
#     if mode == 'train':
#         region = training
#         file_path = train_path
#     else:  # mode == 'test'
#         region = interest
#         file_path = test_path
#     gdf_countries[mode] = gpd.read_file(os.path.join(file_path, '%s.shp' %region))

# Current approach: Load the training region shapefile
# This will be used as the reference boundary for the analysis
gdf_countries = gpd.read_file(os.path.join(input_path, '%s.shp' %training))

## Load Presence/Occurrence Data
This section loads the species occurrence data (presence points) that will be used to generate pseudo-absence points. The presence data represents known locations where the species has been observed or recorded.

In [None]:
print('load presence')

In [None]:
# =============================================================================
# PREPARE RASTER TEMPLATES FOR SAMPLING
# =============================================================================
# Create raster templates from shapefiles to define the sampling area for pseudo-absence points

occurences = {}  # Dictionary to store occurrence data for train/test regions

# Process both training and testing regions
for mode in ['train', 'test']:
    # Determine region and file path based on mode
    if mode == 'train':
        region = training
        file_path = train_path
    else:  # mode == 'test'
        region = interest
        file_path = test_path

    # Convert region shapefile to raster format for sampling
    # This creates a binary raster mask defining the study area
    shpfile = gpd.read_file(os.path.join(input_path, '%s.shp' %training))
    
    # Create a raster grid from the shapefile with 0.05 degree resolution
    # Resolution: 0.05 degrees â‰ˆ 5.5 km at the equator
    grid = make_geocube(shpfile, resolution=(0.05, -0.05))
    
    # Save the raster template with compression for efficient storage
    grid.rio.to_raster(os.path.join(file_path, '%s.tif' %training), compress='zstd')

## 1. Random Pseudo-Absence Generation
This method generates pseudo-absence points by randomly sampling locations within the study area. This is the simplest approach and serves as a baseline for comparison with more sophisticated methods.

In [None]:
if pseudoabsence == 'random':
    print('random')
    
    # =============================================================================
    # RANDOM PSEUDO-ABSENCE POINT GENERATION
    # =============================================================================
    # Generate randomly distributed background points within the study area
    
    pseudoabsence_random = {}  # Dictionary to store random background points
    raster_files = {}  # Dictionary to store raster file paths
    
    # Process both training and testing regions
    for mode in ['train', 'test']:
        # Determine region and file path based on mode
        if mode == 'train':
            region = training
            file_path = train_path
        else:  # mode == 'test'
            region = interest
            file_path = test_path
        
        # Define raster file path for the region
        raster_files[mode] = os.path.join(file_path, '%s.tif' %region)
        
        # Generate random background points using elapid's sample_raster function
        # This samples 'count' number of random points within the raster area
        pseudoabsence_random[mode] = ela.sample_raster(raster_files[mode], count=count)
    
        # Load presence/occurrence data for the region
        occurences_file_name = os.path.join(file_path, '%s_presence_%s_%s.csv' %(specie, region, iteration))
        df = pd.read_csv(occurences_file_name)
        
        # Convert presence data to GeoDataFrame with point geometry
        occurences[mode] = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat))
    
        # Display summary statistics
        print('Number of presences in %s is: %s' %(region, len(occurences[mode])))
        print('Number of random selected background points in %s is: %s' %(region, len(pseudoabsence_random[mode])))
        
        # Prepare background points for saving
        # Convert geometry to separate longitude and latitude columns
        pseudoabsence_random[mode] = pseudoabsence_random[mode].to_frame(name='geometry')
        pseudoabsence_random[mode]['lon'] = pseudoabsence_random[mode]['geometry'].x
        pseudoabsence_random[mode]['lat'] = pseudoabsence_random[mode]['geometry'].y
        
        # Save background points to CSV file
        pseudoabsence_random[mode].to_csv(
            os.path.join(file_path, '%s_background_random_%s.csv' %(specie, region)), 
            columns=['lon', 'lat'], 
            index=False
        )

In [None]:
if pseudoabsence == 'random':
    # =============================================================================
    # VISUALIZE RANDOM PSEUDO-ABSENCE POINTS
    # =============================================================================
    # Create maps showing the distribution of random background points and presence points
    
    resolution = '50m'  # Cartographic feature resolution
    
    # Create a 2-column map layout for training and testing regions
    fig, axes = make_map(figsize=(18,10), ncols=2, projection=projection, res=resolution)
    
    # Plot training region data (left panel)
    pseudoabsence_random['train'].plot(ax=axes[0], markersize=1, color='tab:red', 
                                      label='Background points')
    occurences['train'].plot(ax=axes[0], marker='*', markersize=100, color='tab:blue', 
                            label='Presence points')
    
    # Plot testing region data (right panel)
    pseudoabsence_random['test'].plot(ax=axes[1], markersize=1, color='tab:red', 
                                     label='Background points')
    occurences['test'].plot(ax=axes[1], marker='*', markersize=100, color='tab:green', 
                           label='Presence points')
    
    # Add grid lines to both maps for better spatial reference
    for ax in axes:
        ax.gridlines(color='grey', linestyle=':', draw_labels=True, rotate_labels=False)

In [None]:
if pseudoabsence == 'random':
    if savefig:
        fig.savefig(os.path.join(figs_path, '02_background_random_%s.png' %specie), transparent=True)

## 2. Biased Pseudo-Absence Generation
This section implements more sophisticated methods for generating pseudo-absence points by biasing the selection towards areas that are environmentally similar to where the species is known to occur. This approach can improve model performance by focusing on ecologically relevant areas.

### 2.1 Land Cover-Based Biased Sampling
This method uses the Copernicus Global Land Cover dataset to bias pseudo-absence point selection towards forest types where the species is more likely to occur. The approach assigns different weights to different forest types based on their suitability for the target species.

**Data Source**: Copernicus Global Land Cover (https://land.copernicus.eu/global/products/lc)

In [None]:
if pseudoabsence == 'biased-land-cover':
    print('biased-land-cover')
    raster_files = {}

    land_cover_file = os.path.join(data_path, 'land-cover', 'PROBAV_LC100_global_v3.0.1_2019-nrt_Forest-Type-layer_EPSG-4326.tif')
    land_cover_glo = rioxr.open_rasterio(land_cover_file, masked=True,cache=False,dask=True)
    
    # land_cover_glo = rioxr.open_rasterio('/scratch/bweeding/raster_test.tif')
    
    # subset global dataset to regions
    for mode in ['train', 'test']:
        if mode == 'train':
            region = training
            file_path = train_path
        else:  # mode == 'test'
            region = interest
            file_path = test_path
    
        # raster = rioxr.open_rasterio(raster_files[region], masked=True,chunks={"y":5969,"x":3939},lock=False)
        raster_files[mode] = os.path.join(file_path, '%s.tif' %region)
        raster = rioxr.open_rasterio(raster_files[mode], masked=True,lock=False,cache=False,dask=True)
        
        print("raster opened")
        
        land_cover_box = land_cover_glo.rio.clip_box(
            minx=raster.x.min() - 1,
            miny=raster.y.min() - 1,
            maxx=raster.x.max() + 1,
            maxy=raster.y.max() + 1,
        )
    
        print("land_cover_box")
        
        #land_cover_box_file = os.path.join('/mnt','bweeding_workspace','ACIAR','temp','land-cover_box_%s.tif' %region)
        land_cover_box_file = os.path.join(input_path, 'land-cover_box_%s.tif' %region)
        
        # if not os.path.exists('temp'):
        #     os.makedirs('temp')
    
        print("lcb_path_and_dir_created")
        
        land_cover_box.rio.to_raster(land_cover_box_file,compress='zstd',windowed=True)
        # land_cover_box.to_raster
    
        print("land_cover_box_to_raster_done")
        
        land_cover_box.close()
    land_cover_glo.close()
    
    print('regrid regional datasets to coarser grid')
    
    # regrid regional datasets to coarser grid
    land_cover_regions = {}
    
    for mode in ['train', 'test']:
        if mode == 'train':
            region = training
            file_path = train_path
        else:  # mode == 'test'
            region = interest
            file_path = test_path
            
        #land_cover_box_file = os.path.join('/mnt','bweeding_workspace','ACIAR','temp', 'land-cover_box_%s.tif' %region)
        #land_cover_output_file = os.path.join('/mnt','bweeding_workspace','ACIAR','temp', 'land-cover_%s.tif' %region)
        land_cover_box_file = os.path.join(input_path, 'land-cover_box_%s.tif' %region)
        land_cover_output_file = os.path.join(input_path, 'land-cover_%s.tif' %region)
        shapes = [shape for shape in gdf_countries.geometry]
        
        with gw.config.update(ref_res=ref_res):
            with gw.open(land_cover_box_file, resampling="bilinear", nodata=255, chunks=1024) as src:
                data = src.isel(band=0).rio.clip(geometries=shapes)
                data.rio.to_raster(land_cover_output_file,compress='zstd')
    
        # load forest type for regions
        land_cover_regions[mode] = rioxr.open_rasterio(land_cover_output_file, mask_and_scale=True).isel(band=0)

In [None]:
if pseudoabsence == 'biased-land-cover':
    print('Processing forest type land cover dataset...')
    
    # =============================================================================
    # FOREST TYPE WEIGHTING SYSTEM
    # =============================================================================
    # Define weights for different forest types based on species habitat preferences
    # Each entry contains: [Code, Description, Weight (0-1), Color for visualization]
    # Weight indicates likelihood that species occurs in this forest type (0=never, 1=always)
    
    forest_dict = {
        0: ['N/K', 'Unknown', 0, 'white'],           # Unknown areas - no preference
        1: ['ENF', 'Evergreen Needleleaf Forest', 0.8, 'red'],    # High suitability
        2: ['EBF', 'Evergreen Broadleaf Forest', 1.0, 'orange'],  # Highest suitability
        3: ['DNF', 'Deciduous Needleleaf Forest', 0.2, 'green'],  # Low suitability
        4: ['DBF', 'Deciduous Broadleaf Forest', 0.4, 'blue'],    # Medium suitability
        5: ['MF', 'Mixed Forest', 0.5, 'purple']                  # Medium-high suitability
    }
    
    # Display the weighting scheme for reference
    print("\033[1mForest Type Weights (0=unsuitable, 1=highly suitable):\033[0m")
    for key, val in forest_dict.items():
        print(f"{val[1]}: {val[2]}")

In [None]:
if pseudoabsence == 'biased-land-cover':
    land_cover_bias_file_names = {}
    
    for mode in ['train', 'test']:
        if mode == 'train':
            region = training
            file_path = train_path
        else:  # mode == 'test'
            region = interest
            file_path = test_path
        
        # changes flag values to normalised and weighted 
        land_cover_region_norm = land_cover_regions[mode].copy(deep=False)
        values = land_cover_region_norm.values
    
        # make raster based on likelihood values
        for key, val in forest_dict.items():
            values[np.where(values == key)] = val[2]
        land_cover_region_norm.values = values
        
        # save biased raster to file
        land_cover_bias_file_names[mode] = 'land-cover_biased_%s.tif' %region
        land_cover_region_norm.rio.to_raster(os.path.join(file_path, land_cover_bias_file_names[mode]),compress='zstd')

In [None]:
if pseudoabsence == 'biased-land-cover':
    pseudoabsence_bias_land_cover = {}
    
    for mode in ['train', 'test']:
        if mode == 'train':
            region = training
            file_path = train_path
        else:  # mode == 'test'
            region = interest
            file_path = test_path
    
        pseudoabsence_bias_land_cover[mode] = ela.sample_bias_file(os.path.join(file_path, land_cover_bias_file_names[mode]), count=count)

        occurences_file_name = os.path.join(file_path, '%s_presence_%s_%s.csv' %(specie, region, iteration))
        df = pd.read_csv(occurences_file_name)
        occurences[mode] = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat))

        print('Number of presences in %s is: %s' %(region, len(occurences[mode])))
        print('Number of random selected background points in %s is: %s' %(region, len(pseudoabsence_bias_land_cover[mode])))
        
        # save background points to file
        pseudoabsence_bias_land_cover[mode] = pseudoabsence_bias_land_cover[mode].to_frame(name='geometry')
        pseudoabsence_bias_land_cover[mode]['lon'] = pseudoabsence_bias_land_cover[mode]['geometry'].x
        pseudoabsence_bias_land_cover[mode]['lat'] = pseudoabsence_bias_land_cover[mode]['geometry'].y
        pseudoabsence_bias_land_cover[mode].to_csv(os.path.join(file_path, '%s_background_biased-land-cover_%s.csv' %(specie, region)), columns=['lon', 'lat'], index=False)

In [None]:
if pseudoabsence == 'biased-land-cover':
    # fig, axes, gl = make_map(figsize=(18,6), projection=projection, res=resolution, ncols=2)
    
    nrows, ncols = 2, 2
    fig, axes = plt.subplots(figsize=(18,8), ncols=ncols, nrows=nrows, constrained_layout=True)
    
    labels = [forest_dict[x][0] for x in forest_dict.keys()]
    legend = [forest_dict[x][1] for x in forest_dict.keys()]
    
    cmap_lc = mpl.colors.ListedColormap([forest_dict[x][3] for x in forest_dict.keys()])
    # cmap.set_bad('transparent')
    norm_lc = mpl.colors.BoundaryNorm(np.arange(-0.5,6), cmap_lc.N) 
    fmt = mpl.ticker.FuncFormatter(lambda x, pos: labels[norm_lc(x)])
    
    for iax in range(axes.size):
        r, c = iax // ncols, iax % ncols
        if r == 0:
            gdf_countries.plot(ax=axes[0, c], facecolor='lightgray', edgecolor='k')
        if r == 1:
            gdf_countries.plot(ax=axes[1, c], facecolor='lightgray', edgecolor='k')
    
    pcol = land_cover_regions['train'].plot(ax=axes[0, 0], cmap=cmap_lc, norm=norm_lc, add_colorbar=False, add_labels=False)
    plt.colorbar(pcol, format=fmt, ticks=np.linspace(0,5,6))
    pcol = land_cover_regions['test'].plot(ax=axes[1, 0], cmap=cmap_lc, norm=norm_lc, add_colorbar=False, add_labels=False)
    plt.colorbar(pcol, format=fmt, ticks=np.linspace(0,5,6))
    axes[0, 0].set_title('Forest Type based on Copernicus Land Cover dataset')
    
    scat = pseudoabsence_bias_land_cover['train'].plot(ax=axes[0, 1], markersize=1, color='tab:red', label='background')
    occurences['train'].plot(ax=axes[0, 1], marker='*', markersize=100, color='tab:blue', label='presence')
    pseudoabsence_bias_land_cover['test'].plot(ax=axes[1, 1], markersize=1, color='tab:red', label='background')
    occurences['test'].plot(ax=axes[1, 1], marker='*', markersize=100, color='tab:green', label='presence')
    axes[0, 1].set_title('Background and presence points')
    axes[0, 1].legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')

In [None]:
if pseudoabsence == 'biased-land-cover':
    if savefig:
        fig.savefig(os.path.join(figs_path, '02_background_biased-land-cover_%s_%s_%s.png' %(specie, training, iteration)), transparent=True)

### 2.2 Planted Forest-Based Biased Sampling
This method uses a specialized dataset of planted forests in East Asia to bias pseudo-absence point selection towards areas with planted Eucalyptus forests, which are known to be suitable habitats for the target species.

**Data Source**: Planted Forest Dataset for East Asia (Abbasi et al. 2023)
**Reference**: https://www.nature.com/articles/s41597-023-02383-w

In [None]:
# Check if planted forest-based biased sampling is selected
if pseudoabsence == 'biased':
    print('Generating planted forest-biased pseudo-absence points...')
    
    # Use training region for this analysis
    region = training

In [None]:
if pseudoabsence == 'biased':
    # =============================================================================
    # PLANTED FOREST TYPE WEIGHTING
    # =============================================================================
    # Define weights for different forest types based on species preferences
    # Higher weights indicate higher likelihood of species occurrence
    
    forest_type_dict = {
        'Tropical Forest and Savanna': 0,  # No preference (weight = 0)
        'Temperate Forest': 0,             # No preference (weight = 0)  
        'Eucalyptus': 1                    # High preference (weight = 1) - target species prefers Eucalyptus
    }

In [None]:
if pseudoabsence == 'biased':
    forest_type_bias_file = os.path.join(train_path, 'forest-type_biased_%s.tif' %region)
    if not os.path.isfile(forest_type_bias_file):
     
        planted_forest_file = os.path.join('zip://', data_path, 'planted-forest-east-asia', 'planted-forest-east-asia.zip')
    
        # planted_eucalyptus = gpd.read_file(planteplanted_forest_file, where="Genus='Eucalyptus'")
        tropical_forest = gpd.read_file(planted_forest_file, where="Biome='Tropical Forest and Savanna'", include_fields=['Biome', 'Genus', 'Type'])
        temperate_forest = gpd.read_file(planted_forest_file, where="Biome='Temperate Forest'", include_fields=['Biome', 'Genus', 'Type'])
        forest_type_region = pd.concat([tropical_forest, temperate_forest])
    
    
        # indicate likelihood that specie occurs in Genus or Biome type type with a number between 0 and 1
        # changes flag values to normalised and weighted
        forest_type_region['norm'] = 0
        for key, val in forest_type_dict.items():
            forest_type_region.loc[forest_type_region.Biome == key, 'norm'] = val
            if key == 'Eucalyptus':
                forest_type_region.loc[forest_type_region.Genus == key, 'norm'] = val
    #         temperate_forest.loc[
    #             temperate_forest['potential'] = 0.5
    #             temperate_forest.loc[temperate_forest.Genus == 'Eucalyptus', 'potential'] = 1
    #             tropical_forest['potential'] = 1
        
        # rasterize vector data
        forest_type_raster = make_geocube(
        vector_data=forest_type_region,
        measurements=['norm'],
        resolution=(0.01, -0.01),#0.0174532925199433
    #     fill = 0
        )
        
        # save raster to file
        forest_type_raster.rio.to_raster(forest_type_bias_file,compress='zstd')
    else:
        forest_type_raster = rioxr.open_rasterio(forest_type_bias_file)
        
    pseudoabsence_bias_forest = ela.sample_bias_file(forest_type_bias_file, count=count)
    
    print('Number of presences in %s is: %s' %(region, len(occurences[mode])))
    print('Number of background points with a bias towards forests in %s is: %s' %(region, len(pseudoabsence_bias_forest)))

In [None]:
if pseudoabsence == 'biased':
    # save background points to file
    pseudoabsence_bias_forest = pseudoabsence_bias_forest.to_frame(name='geometry')
    pseudoabsence_bias_forest['lon'] = pseudoabsence_bias_forest['geometry'].x
    pseudoabsence_bias_forest['lat'] = pseudoabsence_bias_forest['geometry'].y
    pseudoabsence_bias_forest.to_csv(os.path.join(train_path, '%s_background_biased_forest-type_%s.csv' %(specie, region)), columns=['lon', 'lat'], index=False)

In [None]:
if pseudoabsence == 'biased':
    # fig, axes, gl = make_map(figsize=(18,6), projection=projection, res=resolution, ncols=2)
    fig, axes = plt.subplots(figsize=(18,5), ncols=2)
    
    cmap_ft = mpl.colors.ListedColormap(['white', 'orange', 'green'])
    cmap_ft.set_bad('k', alpha=0)
    bounds=[0,.333,.667,1]
    norm_ft = mpl.colors.BoundaryNorm(bounds, cmap_ft.N)
    
    gdf_countries.plot(ax=axes[0], facecolor='lightgray', edgecolor='k')
    try:
        pcol = forest_type_raster.norm.plot(ax=axes[0], cmap=cmap_ft, norm=norm_ft, add_colorbar=False, add_labels=False)
    except AttributeError:
        pcol = forest_type_raster.plot(ax=axes[0], cmap=cmap_ft, norm=norm_ft, add_colorbar=False, add_labels=False)
    
    plt.colorbar(pcol)
    axes[0].set_title('Forest Type normalised and weighted, based on Abbasi et al. 2023')
    axes[0].set_xlim(72, 146)
        
    gdf_countries.plot(ax=axes[1], facecolor='lightgray', edgecolor='k')
    pseudoabsence_bias_forest.plot(ax=axes[1], markersize=1, color='tab:red', label='background')
    occurences[mode].plot(ax=axes[1], marker='*', markersize=100, color='tab:blue', label='presence')
    axes[1].legend()
    
    plt.tight_layout()

In [None]:
if pseudoabsence == 'biased':
    if savefig:
        fig.savefig(os.path.join(figs_path, '02_background_biased-forest-type_%s_%s_%s.png' %(specie, region, iteration)), transparent=True)