# Pseudo-Absence Generation

In this notebook pseudo-absence data is generated using the techniques described in this [paper](https://www.sciencedirect.com/science/article/abs/pii/S030438001500215X)


In [5]:
# install packages

# install.packages('comprehenr')
# install.packages("raster", dependencies=TRUE)
# install.packages('stringr')
# install.packages('ncdf4')
# install.packages('dplyr')

In [1]:
library(raster)
library(ncdf4)
library(rgdal)
library(comprehenr)
library(glue)
library(stringr)
library(mopa)
library(dplyr)

Loading required package: sp

Please note that rgdal will be retired by the end of 2023,
plan transition to sf/stars/terra functions using GDAL and PROJ
at your earliest convenience.

rgdal: version: 1.5-25, (SVN revision 1143)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 2.4.0, released 2018/12/14
Path to GDAL shared files: /usr/share/gdal
GDAL binary built with GEOS: TRUE 
Loaded PROJ runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
Path to PROJ shared files: (autodetected)
Linking to sp version:1.4-5


Attaching package: ‘glue’


The following object is masked from ‘package:raster’:

    trim



Attaching package: ‘dplyr’


The following object is masked from ‘package:glue’:

    collapse


The following objects are masked from ‘package:raster’:

    intersect, select, union


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, 

In [2]:
env_profilers <- list(
#     'Swnet_tavg', 
#     'Lwnet_tavg', 
#     'Qle_tavg', 
#     'Qh_tavg', 
#     'Qg_tavg', 
#     'Snowf_tavg', 
#     'Rainf_tavg', 
#     'Evap_tavg', 
#     'Qs_acc', 
#     'Qsb_acc', 
#     'Qsm_acc', 
#     'AvgSurfT_inst', 
#     'Albedo_inst', 
#     'SWE_inst', 
#     'SnowDepth_inst', 
    'SoilMoi0_10cm_inst'
#     'SoilMoi10_40cm_inst', 
#     'SoilTMP0_10cm_inst', 
#     'SoilTMP10_40cm_inst', 
#     'SoilTMP40_100cm_inst', 
#     'SoilTMP100_200cm_inst', 
#     'PotEvap_tavg', 
#     'ECanop_tavg', 
#     'Tveg_tavg', 
#     'ESoil_tavg', 
#     'RootMoist_inst', 
#     'CanopInt_inst', 
#     'Wind_f_inst', 
#     'Rainf_f_tavg', 
#     'Tair_f_inst'
#     'Qair_f_inst', 
#     'Psurf_f_inst', 
#     'SWdown_f_tavg', 
#     'LWdown_f_tavg'
)

In [3]:
length(env_profilers)

In [4]:
get_countrySPDF <- function(country)
{
    if (typeof(country) == 'list'){
        countrySPDF <- getData("GADM",country=country[[1]],level=0) 
        for (country_ in tail(country, -1)){
            countrySPDF <- raster::bind(countrySPDF, getData("GADM",country=country_[[1]],level=0))
        }
    } else if (typeof(country) == 'character'){
        countrySPDF <- getData("GADM",country=country,level=0)
    }
    return(countrySPDF)
}


cropCountry <- function(baseRaster, country)
{
    countrySPDF <- get_countrySPDF(country)
    croppedRaster <- mask(crop(baseRaster, extent(countrySPDF)), countrySPDF)
    return(croppedRaster)
}


aggregateSM_NASA <- function(year, month, basePath, env_profilers){
    base_name <- glue("{basePath}/GLDAS_NOAH025_3H.A{year}{str_pad(month, width=2, pad='0')}")
    dekad1 <- unlist(to_list(for (day in 1:10) Sys.glob(glue("{base_name}{sprintf('%02d', day)}*.nc4"))))
    dekad2 <- unlist(to_list(for (day in 11:20) Sys.glob(glue("{base_name}{sprintf('%02d', day)}*.nc4"))))
    dekad3 <- unlist(to_list(for (day in 21:31) Sys.glob(glue("{base_name}{sprintf('%02d', day)}*.nc4"))))
    
    dekad1_stacks <- list()
    dekad2_stacks <- list()
    dekad3_stacks <- list()
    i <- 1
    for (env_variable in env_profilers){
        dekad1_stacks[[i]] <- calc(stack(dekad1, varname=env_variable), mean)
        dekad2_stacks[[i]] <- calc(stack(dekad2, varname=env_variable), mean)
        dekad3_stacks[[i]] <- calc(stack(dekad3, varname=env_variable), mean)
        i <- i + 1
    }
    dekad1_stacks <- stack(dekad1_stacks)
    names(dekad1_stacks) <- paste0(env_profilers, "_dekad1")
    dekad2_stacks <- stack(dekad2_stacks)
    names(dekad2_stacks) <- paste0(env_profilers, "_dekad2")
    dekad3_stacks <- stack(dekad3_stacks)
    names(dekad3_stacks) <- paste0(env_profilers, "_dekad3")
    
    sm <- stack(dekad1_stacks, dekad2_stacks, dekad3_stacks)
    return(sm)
}
                          
subsetCountry <- function(country, data){
    countrySPDF <- get_countrySPDF(country)
    locs <- data.frame(x=data$X, y=data$Y)
    coordinates(locs) <- c("x","y")
    projection(locs) <- CRS("+proj=longlat +init=epsg:4326")
    projection(countrySPDF) <- CRS("+proj=longlat +init=epsg:4326")
    countryID <- over(locs, countrySPDF)
    data_subset <- data
    data_subset['country'] <- countryID$NAME_0
    
    if (typeof(country) == 'list'){
        data_subset <- data_subset[data_subset$country %in% country,]
    } else if (typeof(country) == 'character'){
        data_subset <- data_subset[data_subset$country == country,]
    }
    
    return(data_subset)
}


finalize_data <- function(pa_generation, data, no_generation){
    if (no_generation){
        final_data <- data.frame(x=pa_generation$x, y=pa_generation$y, presence=pa_generation$v)
    } else {
        final_data <- data.frame(x=pa_generation$species1$PA01[[1]]$x, y=pa_generation$species1$PA01[[1]]$y, presence=pa_generation$species1$PA01[[1]]$v)
    }
    final_data['year'] = data$year
    final_data['month'] = data$month
    final_data['day'] = 0    
    final_data[final_data$presence != 0, 'day'] <- data$day
    
#     countrySPDF <- get_countrySPDF(country)
#     locs <- data.frame(x=data$X, y=data$Y)
#     coordinates(locs) <- c("x","y")
#     projection(locs) <- CRS("+proj=longlat +init=epsg:4326")
#     projection(countrySPDF) <- CRS("+proj=longlat +init=epsg:4326")
#     countryID <- over(locs, countrySPDF)
#     final_data['country'] <- countryID$NAME_0
    
    if (! no_generation){
        random_days <- sampleInt(28, length(final_data[final_data$presence != 1, 'day']), replace=TRUE)
        final_data[final_data$presence != 1, 'day'] <- random_days

    }
    return(final_data)
}


extract_values <- function(locs, raster_data){
    sp <- SpatialPoints(locs)
    data <- extract(raster_data, sp, method='bilinear')
    return(data)
}

In [5]:
get_soil_profile <- function(basePath, raster_extent, country) {
    clay0_5 <- cropCountry(raster(glue('{basePath}/clay_0-5cm_mean.tif')), country)
    extent(clay0_5) <- raster_extent
#     res(clay0_5) <- c(0.01, 0.01)

    clay5_15 <- cropCountry(raster(glue('{basePath}/clay_5-15cm_mean.tif')), country)
    extent(clay5_15) <- raster_extent
#     res(clay5_15) <- c(0.01, 0.01)

    sand0_5 <- cropCountry(raster(glue('{basePath}/sand_0-5cm_mean.tif')), country)
    extent(sand0_5) <- raster_extent
#     res(sand0_5) <- c(0.01, 0.01)

    sand5_15 <- cropCountry(raster(glue('{basePath}/sand_5-15cm_mean.tif')), country)
    extent(sand5_15) <- raster_extent
#     res(sand5_15) <- c(0.01, 0.01)

    silt0_5 <- cropCountry(raster(glue('{basePath}/silt_0-5cm_mean.tif')), country)
    extent(silt0_5) <- raster_extent
#     res(silt0_5) <- c(0.01, 0.01)

    silt5_15 <- cropCountry(raster(glue('{basePath}/silt_5-15cm_mean.tif')), country)
    extent(silt5_15) <- raster_extent
#     res(silt5_15) <- c(0.01, 0.01)

    soil_profile <- stack(c(clay0_5, clay5_15, sand0_5, sand5_15, silt0_5, silt5_15))
    soil_profile <- soil_profile/1000
    soil_profile[is.na(soil_profile)] <- 0
    return(soil_profile)
}

In [6]:
dataset <- read.csv(file = 'Hoppers.csv')
date <- as.POSIXct(dataset[['STARTDATE']], format = "%Y/%m/%d %H:%M:%S")
dataset[['yearmonth']] <- format(date, format="%Y%m")
dataset[['year']] <- as.numeric(format(date, format="%Y"))
dataset[['month']] <- as.numeric(format(date, format="%m"))
dataset[['day']] <- as.numeric(format(date, format="%d"))
head(dataset)

Unnamed: 0_level_0,X,Y,OBJECTID,STARTDATE,TmSTARTDAT,FINISHDATE,TmFINISHDA,EXACTDATE,PARTMONTH,LOCNAME,⋯,CTLAPPVEHI,CTLAPPAIR,CTLAPPMECH,CTLAPPUNK,CTLESTKILL,CAT,yearmonth,year,month,day
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,⋯,<int>,<int>,<int>,<int>,<int>,<fct>,<chr>,<dbl>,<dbl>,<dbl>
1,37.333333,19.31667,1,1985/12/30 00:00:00+00,12:00,1985/12/30 00:00:00+00,12:00,No,Late,Khor Hambokeib,⋯,0,0,0,0,0,Hopper,198512,1985,12,30
2,37.35,19.23333,2,1985/12/30 00:00:00+00,12:00,1985/12/30 00:00:00+00,12:00,No,Late,Khor Handub,⋯,0,0,0,0,0,Hopper,198512,1985,12,30
3,-14.917778,16.95167,3,1985/10/18 00:00:00+00,12:00,1985/10/18 00:00:00+00,12:00,No,Middle,,⋯,0,0,0,0,0,Hopper,198510,1985,10,18
4,-0.38165,18.66083,4,1985/10/15 00:00:00+00,12:00,1985/10/15 00:00:00+00,12:00,Yes,,,⋯,0,0,0,0,0,Hopper,198510,1985,10,15
5,1.522778,20.93833,5,1985/11/06 00:00:00+00,12:00,1985/11/06 00:00:00+00,12:00,No,Early,,⋯,0,0,0,0,0,Hopper,198511,1985,11,6
6,4.216111,18.635,6,1985/11/15 00:00:00+00,12:00,1985/11/15 00:00:00+00,12:00,Yes,,,⋯,0,0,0,0,0,Hopper,198511,1985,11,15


In [7]:
train_val <- dataset[((dataset$year >= 2000) & (dataset$year < 2015)),]
test <- dataset[(dataset$year >= 2015), ]

In [8]:
c(max(train_val$year), max(test$year))

In [9]:
# countries = ccodes()
# african_countries <- countries[countries$continent == 'Africa',]$NAME
# print(african_countries)

In [14]:
country_ids <- list('NG', 'OM', 'SA', 'SO', 'SU', 'IN', 'PA', 'ER', 'ET', 'IR', 'MR' ) #as.list(african_countries) #list('Mauritania', 'Mali')
l <- ccodes()
country <- list('Mauritania', 'Mali') #, 'Saudi Arabia') #as.list(l[l$ISO2 %in% country,]$NAME)
soil_profile <- get_soil_profile('full_isric_data', extent(get_countrySPDF(country)), country)
train_val_subset <- subsetCountry(country, train_val)
test_subset <- subsetCountry(country, test)

In [15]:
unique(train_val_subset$country)

In [16]:
train_val_subset

Unnamed: 0_level_0,X,Y,OBJECTID,STARTDATE,TmSTARTDAT,FINISHDATE,TmFINISHDA,EXACTDATE,PARTMONTH,LOCNAME,⋯,CTLAPPAIR,CTLAPPMECH,CTLAPPUNK,CTLESTKILL,CAT,yearmonth,year,month,day,country
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,⋯,<int>,<int>,<int>,<int>,<fct>,<chr>,<dbl>,<dbl>,<dbl>,<chr>
4512,-13.89944,19.15694,4512,2000/11/27 00:00:00+00,00:00,2000/11/27 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,27,Mauritania
4513,-13.91028,19.14444,4513,2000/11/27 00:00:00+00,00:00,2000/11/27 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,27,Mauritania
4514,-13.92306,19.14667,4514,2000/11/27 00:00:00+00,00:00,2000/11/27 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,27,Mauritania
4515,-12.94583,19.14639,4515,2000/11/28 00:00:00+00,00:00,2000/11/28 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,28,Mauritania
4516,-12.94972,19.13306,4516,2000/11/28 00:00:00+00,00:00,2000/11/28 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,28,Mauritania
4517,-13.94778,19.17194,4517,2000/11/29 00:00:00+00,00:00,2000/11/29 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,29,Mauritania
4518,-13.95194,19.19611,4518,2000/11/29 00:00:00+00,00:00,2000/11/29 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,29,Mauritania
4519,-13.87833,19.15500,4519,2000/11/29 00:00:00+00,00:00,2000/11/29 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,29,Mauritania
4520,-13.86917,19.15833,4520,2000/11/29 00:00:00+00,00:00,2000/11/29 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,29,Mauritania
4521,-12.93889,19.13000,4521,2000/11/29 00:00:00+00,00:00,2000/11/29 00:00:00+00,00:00,Yes,,,⋯,0,0,0,0,Hopper,200011,2000,11,29,Mauritania


In [17]:
unique(train_val_subset$year)

In [18]:
nasa_pa_generation <- function(basePath, country_data, yearmonths, method, env_profilers){

    dataframe_chunks <- list()
    i <- 1
    no_generation_limit <- 3 # 0-3
    random_generation_limit <- 50 # 3 - 50
    for (yearmonth in yearmonths){
        print(yearmonth)
        month <- yearmonth %% 100
        year <- yearmonth %/% 100
        
        success <- FALSE
        tryCatch({
            sm <- cropCountry(aggregateSM_NASA(year, month, basePath, env_profilers), country)
            env_data <- stack(sm, resample(soil_profile, sm))
            success <- TRUE
        }, error = function(e){
            print(paste('Error: ', e))
        })
        if (! success) next

        data_subset <- country_data[(country_data$yearmonth == as.character(yearmonth)),]
        presence <- data_subset[(data_subset$LOCPRESENT == 1),]
        true_absence <- data_subset[(data_subset$LOCPRESENT == 2),]
        geo_locs <- data.frame(x=presence$X, y=presence$Y)
        geo_locs_len <- dim(geo_locs)[1]
        bg <- backgroundGrid(env_data)
        
        success <- FALSE
        tryCatch({
            if (geo_locs_len < 1){
                next
            }
            else{
                if (method == 'random'){
                    if (geo_locs_len <= no_generation_limit){
                        generated_data <- geo_locs
                        generated_data['v'] = 1
                    } else{
                        bg_extents <- backgroundRadius(xy = geo_locs, background = bg$xy, 
                               start = 0.083*5, by = 0.083*20, 
                               unit = "decimal degrees")
                        generated_data <-pseudoAbsences(xy = geo_locs, background = bg_extents, 
                                       exclusion.buffer = 0.083*1, 
                                       prevalence = 0.5, kmeans = FALSE)
                    }
                }
                else if (method == "ep_random"){
                    if (geo_locs_len <= no_generation_limit){
                        generated_data <- geo_locs
                        generated_data['v'] = 1
                    }
                    else if ((geo_locs_len > no_generation_limit) & (geo_locs_len <= random_generation_limit)) {
                        # generate data randomly
                        bg_extents <- backgroundRadius(xy = geo_locs, background = bg$xy, 
                               start = 0.083*5, by = 0.083*20, 
                               unit = "decimal degrees")
                        generated_data <-pseudoAbsences(xy = geo_locs, background = bg_extents[[1]]$km50, 
                                           exclusion.buffer = 0.083*1, 
                                           prevalence = 0.5, kmeans = FALSE)
                    } else {
                        bg_profiled <- OCSVMprofiling(xy = geo_locs, varstack = env_data, 
                                              background = bg$xy)
                        bg_extents <- backgroundRadius(xy = geo_locs, background = bg_profiled$absence, 
                               start = 0.083*5, by = 0.083*20, 
                               unit = "decimal degrees")
                        # generate data using environmental profiling
                        generated_data <-pseudoAbsences(xy = geo_locs, background = bg_extents, 
                                           exclusion.buffer = 0.083*1, 
                                           prevalence = 0.5, kmeans = FALSE, 
                                           varstack = env_data)
                    }
                }
                else if (method == "ep_kmeans"){
                    if (geo_locs_len <= no_generation_limit){
                        generated_data <- geo_locs
                        generated_data['v'] = 1
                    }
                    else if ((geo_locs_len > no_generation_limit) & (geo_locs_len <= random_generation_limit)) {
                        # generate data randomly
                        bg_extents <- backgroundRadius(xy = geo_locs, background = bg$xy, 
                               start = 0.083*5, by = 0.083*20, 
                               unit = "decimal degrees")
                        generated_data <-pseudoAbsences(xy = geo_locs, background = bg_extents, 
                                           exclusion.buffer = 0.083*1, 
                                           prevalence = 0.5, kmeans = FALSE)
                    } else {
                        bg_profiled <- OCSVMprofiling(xy = geo_locs, varstack = env_data, 
                                              background = bg$xy)
                        bg_extents <- backgroundRadius(xy = geo_locs, background = bg_profiled$absence, 
                               start = 0.083*10, by = 0.083*20, 
                               unit = "decimal degrees")
                        # generate data using environmental profiling
                        generated_data <-pseudoAbsences(xy = geo_locs, background = bg_extents, 
                                           exclusion.buffer = 0.083*1, 
                                           prevalence = 0.5, kmeans = TRUE, 
                                           varstack = env_data)
                    }
                }
            }  
            success <- TRUE
        }, error = function(e){
            print(paste('Error: ', e))
        })
        if (! success) next
        
        generated_data <- finalize_data(generated_data, presence, geo_locs_len <= no_generation_limit)
        true_absence_geo_locs <- data.frame(x=true_absence$X, y=true_absence$Y)
        if (dim(true_absence_geo_locs)[1] > 0){
            true_absence_geo_locs['v'] = 2
            true_absence <- finalize_data(true_absence_geo_locs, true_absence, TRUE)
            final_data <- dplyr::bind_rows(generated_data, true_absence)
        } else {
            final_data <- generated_data
        }
        raster_data <- extract_values(data.frame(x=final_data$x, y=final_data$y), env_data)
        final_data <- cbind(final_data, raster_data)


        dataframe_chunks[[i]] <- final_data
        i <- i + 1
    }
    return(dplyr::bind_rows(dataframe_chunks))
}

In [19]:
basePath <- '/mnt/disks/nasa/NASA'
# basePath <- 'NASA Data'

### Generate Random

In [20]:
as.numeric(unique(test_subset$yearmonth))

In [21]:
as.numeric(unique(train_val_subset$yearmonth))

In [22]:
test_gen <- nasa_pa_generation(basePath, test_subset, as.numeric(unique(test_subset$yearmonth)), 'random', env_profilers)

[1] 201510
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:11:47] Generating pseudo-absences for species 1

:::[2021-09-21 07:11:47] Realization 1



[1] 201511
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:13:02] Generating pseudo-absences for species 1

:::[2021-09-21 07:13:02] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 201512
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:14:21] Generating pseudo-absences for species 1

:::[2021-09-21 07:14:21] Realization 1



[1] 201509
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:15:41] Generating pseudo-absences for species 1

:::[2021-09-21 07:15:41] Realization 1



[1] 201610
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:17:04] Generating pseudo-absences for species 1

:::[2021-09-21 07:17:04] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 201603
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:18:49] Generating pseudo-absences for species 1

:::[2021-09-21 07:18:49] Realization 1



[1] 201609
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:20:17] Generating pseudo-absences for species 1

:::[2021-09-21 07:20:17] Realization 1



[1] 201611
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:21:45] Generating pseudo-absences for species 1

:::[2021-09-21 07:21:45] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 201604
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:23:16] Generating pseudo-absences for species 1

:::[2021-09-21 07:23:16] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 201605
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:24:45] Generating pseudo-absences for species 1

:::[2021-09-21 07:24:45] Realization 1



[1] 201602
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:26:12] Generating pseudo-absences for species 1

:::[2021-09-21 07:26:12] Realization 1



[1] 201601
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:27:43] Generating pseudo-absences for species 1

:::[2021-09-21 07:27:43] Realization 1



[1] 201606
[1] 201607
[1] 201608
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:32:16] Generating pseudo-absences for species 1

:::[2021-09-21 07:32:16] Realization 1



[1] 201612
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:33:52] Generating pseudo-absences for species 1

:::[2021-09-21 07:33:52] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 201701
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:35:25] Generating pseudo-absences for species 1

:::[2021-09-21 07:35:25] Realization 1



[1] 201702
[1] 201709
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:38:25] Generating pseudo-absences for species 1

:::[2021-09-21 07:38:25] Realization 1



[1] 201712
[1] 201710
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:41:39] Generating pseudo-absences for species 1

:::[2021-09-21 07:41:39] Realization 1



[1] 201711
[1] 201707
[1] 201810
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:46:40] Generating pseudo-absences for species 1

:::[2021-09-21 07:46:40] Realization 1



[1] 201811
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:48:17] Generating pseudo-absences for species 1

:::[2021-09-21 07:48:17] Realization 1



[1] 201812
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:50:06] Generating pseudo-absences for species 1

:::[2021-09-21 07:50:06] Realization 1



[1] 201904
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:55:26] Generating pseudo-absences for species 1

:::[2021-09-21 07:55:26] Realization 1



[1] 201908
[1] 201910
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 07:59:05] Generating pseudo-absences for species 1

:::[2021-09-21 07:59:05] Realization 1



[1] 201911
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:00:55] Generating pseudo-absences for species 1

:::[2021-09-21 08:00:55] Realization 1



[1] 201912
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:02:50] Generating pseudo-absences for species 1

:::[2021-09-21 08:02:50] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 202001
[1] 202010
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:06:49] Generating pseudo-absences for species 1

:::[2021-09-21 08:06:49] Realization 1



[1] 202011
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:08:45] Generating pseudo-absences for species 1

:::[2021-09-21 08:08:45] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 202012
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:10:45] Generating pseudo-absences for species 1

:::[2021-09-21 08:10:45] Realization 1



[1] 202101


In [23]:
write.csv(test_gen,'test_random_v1.csv')
test_gen

x,y,presence,year,month,day,SoilMoi0_10cm_inst_dekad1,SoilMoi0_10cm_inst_dekad2,SoilMoi0_10cm_inst_dekad3,clay_0.5cm_mean,clay_5.15cm_mean,sand_0.5cm_mean,sand_5.15cm_mean,silt_0.5cm_mean,silt_5.15cm_mean
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
-14.525278,18.11000,1,2015,10,19,6.509741,6.264692,7.191211,0.2237804,0.2206993,0.6134937,0.6184359,0.1627266,0.1608605
-14.534167,18.12389,1,2015,10,19,6.052448,5.890046,6.985578,0.2256825,0.2226590,0.6084904,0.6133977,0.1658286,0.1639382
-14.369444,19.74750,1,2015,10,23,11.631600,11.093846,12.787071,0.1451429,0.1492571,0.6091758,0.6049553,0.2450131,0.2451067
-14.021944,19.62639,1,2015,10,27,9.991642,9.434800,10.192380,0.1639236,0.1687236,0.6156020,0.6125561,0.2195952,0.2178435
-14.083333,19.77583,1,2015,10,27,11.938381,11.392351,12.442991,0.1533333,0.1568739,0.6139158,0.6104353,0.2324357,0.2323789
-14.365000,19.77028,1,2015,10,25,11.707907,11.191413,12.861095,0.1439763,0.1479170,0.6082985,0.6041855,0.2470803,0.2472422
-14.361389,19.76972,1,2015,10,21,11.709399,11.192108,12.860637,0.1441209,0.1480607,0.6084434,0.6043412,0.2468004,0.2469524
-14.290556,19.91750,1,2015,10,29,12.644180,12.186436,13.501651,0.1401176,0.1433034,0.6054943,0.6013461,0.2540506,0.2550118
-13.967500,19.02111,1,2015,10,31,10.673976,9.567369,10.251418,0.2093382,0.2182331,0.5794448,0.5721163,0.2108617,0.2092813
-12.766667,18.12056,1,2015,10,31,8.466154,7.715625,9.076551,0.2638299,0.2567824,0.5295042,0.5359720,0.2066835,0.2072403


In [None]:
train_val_gen <- nasa_pa_generation(basePath, train_val_subset, as.numeric(unique(train_val_subset$yearmonth)), 'random', env_profilers)

[1] 200011
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:14:45] Generating pseudo-absences for species 1

:::[2021-09-21 08:14:45] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200001
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:16:49] Generating pseudo-absences for species 1

:::[2021-09-21 08:16:49] Realization 1



[1] 200012
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:18:51] Generating pseudo-absences for species 1

:::[2021-09-21 08:18:51] Realization 1



[1] 200010
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:20:56] Generating pseudo-absences for species 1

:::[2021-09-21 08:20:56] Realization 1



[1] 200003
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:23:02] Generating pseudo-absences for species 1

:::[2021-09-21 08:23:02] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200002
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:25:03] Generating pseudo-absences for species 1

:::[2021-09-21 08:25:03] Realization 1



[1] 200008
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:27:11] Generating pseudo-absences for species 1

:::[2021-09-21 08:27:11] Realization 1



[1] 200004
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:29:15] Generating pseudo-absences for species 1

:::[2021-09-21 08:29:15] Realization 1



[1] 200009
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:31:22] Generating pseudo-absences for species 1

:::[2021-09-21 08:31:22] Realization 1



[1] 200101
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:33:30] Generating pseudo-absences for species 1

:::[2021-09-21 08:33:30] Realization 1



[1] 200110
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:35:41] Generating pseudo-absences for species 1

:::[2021-09-21 08:35:41] Realization 1



[1] 200111
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:37:45] Generating pseudo-absences for species 1

:::[2021-09-21 08:37:45] Realization 1



[1] 200112
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:39:57] Generating pseudo-absences for species 1

:::[2021-09-21 08:39:57] Realization 1



[1] 200102
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:41:56] Generating pseudo-absences for species 1

:::[2021-09-21 08:41:56] Realization 1



[1] 200108
[1] 200109
[1] 200211
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:48:29] Generating pseudo-absences for species 1

:::[2021-09-21 08:48:29] Realization 1



[1] 200212
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:50:50] Generating pseudo-absences for species 1

:::[2021-09-21 08:50:50] Realization 1



[1] 200210
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 08:53:14] Generating pseudo-absences for species 1

:::[2021-09-21 08:53:14] Realization 1



[1] 200201
[1] 200208
[1] 200209
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:00:22] Generating pseudo-absences for species 1

:::[2021-09-21 09:00:22] Realization 1



[1] 200311
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:02:47] Generating pseudo-absences for species 1

:::[2021-09-21 09:02:47] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200312
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:05:22] Generating pseudo-absences for species 1

:::[2021-09-21 09:05:22] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200301
[1] 200302
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:10:09] Generating pseudo-absences for species 1

:::[2021-09-21 09:10:09] Realization 1



[1] 200308
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:12:41] Generating pseudo-absences for species 1

:::[2021-09-21 09:12:41] Realization 1



[1] 200309
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:15:08] Generating pseudo-absences for species 1

:::[2021-09-21 09:15:08] Realization 1



[1] 200310
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:17:41] Generating pseudo-absences for species 1

:::[2021-09-21 09:17:41] Realization 1



[1] 200401
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:20:13] Generating pseudo-absences for species 1

:::[2021-09-21 09:20:13] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200303
[1] 200307
[1] 200412
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:27:47] Generating pseudo-absences for species 1

:::[2021-09-21 09:27:47] Realization 1



[1] 200411
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:30:18] Generating pseudo-absences for species 1

:::[2021-09-21 09:30:18] Realization 1



[1] 200410
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:32:53] Generating pseudo-absences for species 1

:::[2021-09-21 09:32:53] Realization 1



[1] 200406
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:35:30] Generating pseudo-absences for species 1

:::[2021-09-21 09:35:30] Realization 1



[1] 200408
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:38:09] Generating pseudo-absences for species 1

:::[2021-09-21 09:38:09] Realization 1



[1] 200402
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:40:37] Generating pseudo-absences for species 1

:::[2021-09-21 09:40:37] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200404
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:43:17] Generating pseudo-absences for species 1

:::[2021-09-21 09:43:17] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200403
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:46:01] Generating pseudo-absences for species 1

:::[2021-09-21 09:46:01] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200409
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:48:47] Generating pseudo-absences for species 1

:::[2021-09-21 09:48:47] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200405
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:51:32] Generating pseudo-absences for species 1

:::[2021-09-21 09:51:32] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200407
[1] 200512
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:57:10] Generating pseudo-absences for species 1

:::[2021-09-21 09:57:10] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200511
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 09:59:58] Generating pseudo-absences for species 1

:::[2021-09-21 09:59:58] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200508
[1] 200509
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:05:38] Generating pseudo-absences for species 1

:::[2021-09-21 10:05:38] Realization 1



[1] 200510
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:08:30] Generating pseudo-absences for species 1

:::[2021-09-21 10:08:30] Realization 1



[1] 200601
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:11:22] Generating pseudo-absences for species 1

:::[2021-09-21 10:11:22] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200507
[1] 200612
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:17:09] Generating pseudo-absences for species 1

:::[2021-09-21 10:17:09] Realization 1



[1] 200610
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:20:02] Generating pseudo-absences for species 1

:::[2021-09-21 10:20:02] Realization 1



[1] 200611
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:22:54] Generating pseudo-absences for species 1

:::[2021-09-21 10:22:54] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200608
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:25:55] Generating pseudo-absences for species 1

:::[2021-09-21 10:25:55] Realization 1



[1] 200609
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:28:46] Generating pseudo-absences for species 1

:::[2021-09-21 10:28:46] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200604
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:31:43] Generating pseudo-absences for species 1

:::[2021-09-21 10:31:43] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200602
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:34:25] Generating pseudo-absences for species 1

:::[2021-09-21 10:34:25] Realization 1



[1] 200603
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:37:26] Generating pseudo-absences for species 1

:::[2021-09-21 10:37:26] Realization 1



[1] 200712
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:40:28] Generating pseudo-absences for species 1

:::[2021-09-21 10:40:28] Realization 1



[1] 200701
[1] 200711
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:46:28] Generating pseudo-absences for species 1

:::[2021-09-21 10:46:28] Realization 1



[1] 200709
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:49:25] Generating pseudo-absences for species 1

:::[2021-09-21 10:49:25] Realization 1



[1] 200710
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:52:29] Generating pseudo-absences for species 1

:::[2021-09-21 10:52:29] Realization 1



[1] 200801
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:55:38] Generating pseudo-absences for species 1

:::[2021-09-21 10:55:38] Realization 1



[1] 200702
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 10:58:26] Generating pseudo-absences for species 1

:::[2021-09-21 10:58:26] Realization 1



[1] 200812
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:01:31] Generating pseudo-absences for species 1

:::[2021-09-21 11:01:31] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200811
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:04:32] Generating pseudo-absences for species 1

:::[2021-09-21 11:04:32] Realization 1



[1] 200804
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:07:33] Generating pseudo-absences for species 1

:::[2021-09-21 11:07:33] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200803
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:10:43] Generating pseudo-absences for species 1

:::[2021-09-21 11:10:43] Realization 1



[1] 200805
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:13:54] Generating pseudo-absences for species 1

:::[2021-09-21 11:13:54] Realization 1



[1] 200802
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:16:53] Generating pseudo-absences for species 1

:::[2021-09-21 11:16:53] Realization 1



[1] 200810
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:20:05] Generating pseudo-absences for species 1

:::[2021-09-21 11:20:05] Realization 1



[1] 200809
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:23:12] Generating pseudo-absences for species 1

:::[2021-09-21 11:23:12] Realization 1



[1] 200808
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:26:25] Generating pseudo-absences for species 1

:::[2021-09-21 11:26:25] Realization 1



[1] 200901
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:29:37] Generating pseudo-absences for species 1

:::[2021-09-21 11:29:37] Realization 1



[1] 200910
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:32:51] Generating pseudo-absences for species 1

:::[2021-09-21 11:32:51] Realization 1

Background km50 is too small for sampling and will be ignored



[1] 200909
[1] "creating background point-grids for species 1 out of 1"


[2021-09-21 11:36:00] Generating pseudo-absences for species 1

:::[2021-09-21 11:36:00] Realization 1



In [30]:
write.csv(train_val_gen,'train_val_random_v1.csv')
train_val_gen

x,y,presence,year,month,day,SoilMoi0_10cm_inst_dekad1,SoilMoi0_10cm_inst_dekad2,SoilMoi0_10cm_inst_dekad3,clay_0.5cm_mean,clay_5.15cm_mean,sand_0.5cm_mean,sand_5.15cm_mean,silt_0.5cm_mean,silt_5.15cm_mean
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
-15.45583,20.58194,1,2000,1,11,8.912033,8.880046,10.657040,0.1347601,0.1405453,0.6511468,0.6479411,0.2140997,0.2115278
-10.93333,25.38333,1,2000,1,29,10.845142,10.790489,11.725162,0.1615377,0.1532316,0.6154153,0.6215836,0.2230543,0.2251888
-16.04528,19.47111,1,2000,1,4,4.783819,5.027987,7.008550,0.1788870,0.1824752,0.5549563,0.5544556,0.2293090,0.2269686
-14.76083,18.96639,1,2000,1,2,6.515716,6.332933,6.969298,0.1957404,0.1997934,0.5749793,0.5656550,0.2291442,0.2344186
-15.44028,20.64306,1,2000,1,17,9.319436,9.276956,11.089324,0.1344158,0.1405118,0.6536095,0.6509237,0.2119822,0.2085770
-15.58278,20.85333,1,2000,1,16,9.341120,9.259498,11.697745,0.1424898,0.1469887,0.6479274,0.6469692,0.2095990,0.2060225
-15.48306,20.59444,1,2000,1,14,9.107726,9.074269,10.909244,0.1345819,0.1403452,0.6509386,0.6474940,0.2144860,0.2121725
-11.05639,25.47250,1,2000,1,12,11.407526,11.267804,12.192828,0.1635954,0.1557374,0.6096864,0.6164168,0.2267271,0.2278491
-15.43361,20.59389,1,2000,1,12,8.971724,8.937010,10.684989,0.1345469,0.1405091,0.6523396,0.6494226,0.2131207,0.2100839
-15.60167,20.86306,1,2000,1,6,9.353610,9.268899,11.764984,0.1433194,0.1476657,0.6472426,0.6463960,0.2094563,0.2059162


### Generate with Environmental Profiling + Random

In [None]:
train_val_gen <- nasa_pa_generation(basePath, train_val_subset, as.numeric(unique(train_val_subset$yearmonth)), 'ep_random', env_profilers)

In [None]:
test_gen <- nasa_pa_generation(basePath, test_subset, as.numeric(unique(test_subset$yearmonth)), 'ep_random', env_profilers)

In [None]:
write.csv(train_val_gen,'train_val_gen_ep_random_v1.csv')
write.csv(test_gen,'test_ep_random_v1.csv')

### Generate with Environmental Profiling + KMeans

In [None]:
train_val_gen <- nasa_pa_generation(basePath, train_val_subset, as.numeric(unique(train_val_subset$yearmonth)), 'ep_kmeans', env_profilers)

In [None]:
test_gen <- nasa_pa_generation(basePath, test_subset, as.numeric(unique(test_subset$yearmonth)), 'ep_kmeans', env_profilers)

In [None]:
write.csv(train_val_gen,'train_val_gen_ep_kmeans_v1.csv')
write.csv(test_gen,'test_ep_kmeans_v1.csv')