# Presence only data modelling - Generate Random Points and Plots
This notebook generates the background data points required for presence only models. For more on background points, read [this](https://rspatial.org/raster/sdm/3_sdm_absence-background.html).

## Downloads and imports

In [None]:
# Install if you are are't running through docker
# install.packages(c("maxnet","tictoc","MLmetrics","dplyr","sp","rworldmap"),repos = "http://cran.us.r-project.org")
# Manually install raster if you have problems with terra install
# url <- "https://cran.r-project.org/src/contrib/Archive/raster/raster_3.4-13.tar.gz"
# download.file(url, basename(url),  mode="wb")
# install.packages(basename(url), repos=NULL, type="source")

In [None]:
library(maxnet)
library(glmnet)
library(sp)
library(rworldmap)
library(raster)
require(maps)

## Get data

In [None]:
train <- read.csv("../data/train_val_random.csv", header=TRUE)
test  <- read.csv("../data/test_random.csv", header=TRUE)

# Presense only train data
train_only_presence <- train[train$presence == "1", ]

In [None]:
training_data_dates <- train_only_presence["observation_date"]

## Get countries from coords 
https://stackoverflow.com/questions/14334970/convert-latitude-and-longitude-coordinates-to-country-name-in-r

In [None]:
# The single argument to this function, points, is a data.frame in which:
#   - column 1 contains the longitude in degrees
#   - column 2 contains the latitude in degrees
coords2country = function(points)
{  
  countriesSP <- getMap(resolution='low')
    
  #setting CRS directly to that from rworldmap
  pointsSP = SpatialPoints(points, proj4string=CRS(proj4string(countriesSP)))  


  # use 'over' to get indices of the Polygons object containing each point 
  indices = over(pointsSP, countriesSP)
    
  print(unique(cbind(as.character(indices$ADMIN), as.character(indices$REGION))))

  return((indices$ADMIN))
}

#### List of countries in train data

In [None]:
train_countries = unique(coords2country(train[c("x","y")]))

In [None]:
train_countries

#### Get bounding box for relevant countries

In [None]:
r <-  getData('alt' , country="Mauritania", level=0, mask=TRUE)
r1 <- getData('alt' , country="Mali", level=0, mask=TRUE)
r2 <- getData('alt' , country="Somalia", level=0, mask=TRUE)

r3 <- getData('alt' , country="Egypt", level=0, mask=TRUE)
r4 <- getData('alt' , country="Morocco", level=0, mask=TRUE)
r5 <- getData('alt' , country="Algeria", level=0, mask=TRUE)
r6 <- getData('alt' , country="Sudan", level=0, mask=TRUE)
r7 <- getData('alt' , country="Niger", level=0, mask=TRUE)
r8 <- getData('alt' , country="Eritrea", level=0, mask=TRUE)
r9 <- getData('alt' , country="Senegal", level=0, mask=TRUE)
r10 <- getData('alt' , country="Libya", level=0, mask=TRUE)
# r11 <- getData('alt' , country="Western Sahara", level=0, mask=TRUE)
r12 <- getData('alt' , country="Tunisia", level=0, mask=TRUE) # check
r13 <- getData('alt' , country="Cape Verde", level=0, mask=TRUE)
r14 <- getData('alt' , country="Chad", level=0, mask=TRUE)
r15 <- getData('alt' , country="Ethiopia", level=0, mask=TRUE)
r16<- getData('alt' , country="Djibouti", level=0, mask=TRUE)  # check
r17<- getData('alt' , country="Kenya", level=0, mask=TRUE)

combined_raster <- mosaic(r,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r12,r13,r14,r15,r16,r17, fun = min,  tolerance = 0.35)

In [None]:
plot(combined_raster)

In [None]:
map(database = "world", regions =  c('Mauritania', 'Mali', 'Somalia','Egypt',"Morocco","Algeria","Sudan","Eritrea","Senegal","Libya","Western Sahara","Tunisia","Niger","Chad","Ethiopia","Djibouti","Kenya"),exact=TRUE)

# Generate Background Points
Randomly generate bakcground points.

## Randomly Generate x and y within country boundaries

In [None]:
library(dismo)
library(tictoc)
set.seed(42)
tic()

background_points <- randomPoints(combined_raster, nrow(train_only_presence))
nrow(background_points)
plot(background_points)

toc()

## Generate random dates within date range of training data 

In [None]:
min_date = min(as.Date(training_data_dates$observation_date,format="%Y-%m-%d"))
max_date = max(as.Date(training_data_dates$observation_date,format="%Y-%m-%d"))

background = as.data.frame(background_points)
background$date = sample(seq(min_date, max_date, by="day"), nrow(background_points), replace=TRUE)

In [None]:
head(background_points)

In [None]:
write.csv(x=background, file="../data/presence_only/background/background_points.csv")

## Plot Different Absences
Plotting Pseudo absences.

In [None]:
plot_absence_presence <- function(whole_region,data,method,cex=3.5) {
  pseudo_absence <- data[data$presence == "0", ][c("x","y")]
  presence <- data[data$presence == "1", ][c("x","y")]

  title <- method
  file <- sprintf("../images/%s_sampling_size_%s.pdf", method,cex)
  pdf(file=file)
  map(database = "world", regions =  c('Mauritania', 'Mali', 'Somalia','Egypt',"Morocco","Algeria","Sudan","Eritrea","Senegal","Libya","Western Sahara","Tunisia","Niger","Chad","Ethiopia","Djibouti","Kenya"),exact=TRUE)
  points(pseudo_absence,col= "red", pch = ".", cex = cex)
  points(presence, col= "blue", pch = ".", cex = cex)
  dev.off()
}

In [None]:
cex=1
plot_absence_presence(combined_raster_mali_mauri,read.csv("../data/train_val_random.csv", header=TRUE),"RS",cex=cex)

In [None]:
cex=1
plot_absence_presence(combined_raster_mali_mauri,read.csv("../data/train_val_ep_random.csv", header=TRUE),"RSEP",cex=cex)

Plot background data

In [None]:
cex=1
# Read already generated background data
background <- read.csv("../data/presence_only/background/background_updated_full_data.csv", header=TRUE)

background[,"presence"] = 0

train <- read.csv("../data/train_val_random.csv", header=TRUE)

# Presense only train data
train_presence <- train[train$presence == "1", ]

# combine presence and background
train <- rbind(train_presence[c("x","y","presence")], background[c("x","y","presence")]) 
plot_absence_presence(combined_raster_mali_mauri,train,"BD",cex=cex)

Plot all data together

In [None]:
par(mfrow=c(2,2))
cex=2
plot_absence_presence(combined_raster_mali_mauri,read.csv("../data/train_val_random.csv", header=TRUE),"RS",cex=cex)
plot_absence_presence(combined_raster_mali_mauri,read.csv("../data/train_val_ep_random.csv", header=TRUE),"RSEP",cex=cex)
# plot_absence_presence(combined_raster_mali_mauri,read.csv("../data/train_val_ep_kmeans.csv", header=TRUE),"RSEP-KM",cex=cex)


# Read already generated background data
background <- read.csv("../data/presence_only/background/background_updated_full_data.csv", header=TRUE)

background[,"presence"] = 0

train <- read.csv("../data/train_val_random.csv", header=TRUE)

# Presense only train data
train_presence <- train[train$presence == "1", ]

# combine presence and background
train <- rbind(train_presence[c("x","y","presence")], background[c("x","y","presence")]) 
plot_absence_presence(combined_raster_mali_mauri,train,"BD",cex=cex)