# 🐢 Turtles sightings 🐢
This notebooks illustrates the computation of a _density map_ using observation locations. 

Dataset: 
> Marine Turtles	National Biodiversity Network Trust. Marine Turtles. National Biodiversity Network Trust, Newark, UK.      
https://doi.org/10.15468/fyt9hw,       
https://portal.obis.org/dataset/1cfc4d23-9fcd-42b2-95bf-9c4ee9bc50f6

In [None]:
using DIVAnd
using Makie, CairoMakie, GeoMakie
using Statistics
using DelimitedFiles
using LinearAlgebra
using Random
include("../config.jl")

## Read the data retrieved from OBIS
The dataset consists of locations (longitude, latitude) of turtle sightings.

In [None]:
download_check(turtlefile, turtlefileURL)
AA = readdlm(turtlefile)
@show size(AA);
lon = AA[:, 1]
lat = AA[:, 2]
@show mean(lon), mean(lat);

### Observation map

In [None]:
fig = Figure()
ga = GeoAxis(
    fig[1, 1];
    dest = "+proj=merc",
    title = "Location of the observations",
    xticks = -20:5:5,
    yticks = 40:5.0:65.0,
)
scatter!(lon, lat, markersize = 3)
fig

## Simple heatmap without land mask
### Domain and data section

In [None]:
NX = 300
NY = 250

# Box size
LX = 18.0
LY = 15.0

# Bounding box
xleft = -14.0
ybot = 47.0
xright = xleft + LX
ytop = ybot + LY

xo = lon
yo = lat

# Eliminate points out of the box
sel = (xo .> xleft) .& (xo .< xright) .& (yo .> ybot) .& (yo .< ytop)
xo = xo[sel]
yo = yo[sel]
inflation = ones(size(xo));

### Observation map

In [None]:
fig = Figure()
ga = GeoAxis(
    fig[1, 1];
    dest = "+proj=merc",
    title = "Location of the observations\nin grid box",
    xticks = -20:5:5,
    yticks = 40:5.0:65.0,
)
scatter!(xo, yo, markersize = 3)
fig

### Heatmap computation
#### Define grid and metrics 

In [None]:
dx = LX / (NX)
dy = LY / (NY)

xg = xleft+dx/2:dx:xleft+LX
yg = ybot+dy/2:dy:ybot+LY
# for pyplot
xp = xleft:dx:xleft+LX
yp = ybot:dy:ybot+LY
maskp, (pmp, pnp), (xip, yip) = DIVAnd.DIVAnd_rectdom(xp, yp)
mask, (pm, pn), (xi, yi) = DIVAnd.DIVAnd_rectdom(xg, yg);

# adding a mask
#mask[(xi.+0.25)./0.95 .+ (yi.-2.4)./1.1 .<1 ].=false
#mask[2*xi.+yi .<3.4 ].=false

#### Perform computation
The function requires
- the mask;
- the metrics `pm`, `pn`;
- the grid coordinates `xi`, `yi`;
- the observation coordinates `xo`, `yo`;
- the inflation vector `inflation`;
- the length scales `Labs`; if set to zero, an empirical estimate is computed. 

In [None]:
Labs = 0
@time dens1, LHM, LCV, LSCV = DIVAnd_heatmap(
    mask,
    (pm, pn),
    (xi, yi),
    (xo, yo),
    inflation,
    Labs;
    Ladaptiveiterations = 1,
);

The outputs are:
- `dens1`, the 2D field storing the _heatmap_ (data density field);
- `Ltuple`, the _bandwidth_ (either the input value or the calculated ones); 
- `LCV`, the _Likelihood Cross Validation_ estimator value (the higher the better) 
- `LSCV`, the `Least Square Cross Validation` estimator (the lower the better).

### Plot results

In [None]:
fig = Figure()
ga = GeoAxis(
    fig[1, 1];
    dest = "+proj=merc",
    title = "Density of observations",
    xticks = -20:5:5,
    yticks = 40:5.0:65.0,
)
hm = heatmap!(ga, xp, yp, dens1)
scatter!(ga, xo, yo, markersize = 3, color = :white, alpha = 0.5)
Colorbar(fig[1, 2], hm)
colsize!(fig.layout, 1, Aspect(1, 0.65))
fig

### Plot logarithm

In [None]:
fig = Figure()
ga = GeoAxis(
    fig[1, 1];
    dest = "+proj=merc",
    title = "Density (log)",
    xticks = -20:5:5,
    yticks = 40:5.0:65.0,
)
hm = heatmap!(ga, xp, yp, log.(dens1))
scatter!(ga, xo, yo, markersize = 3, color = :white, alpha = 0.25)
Colorbar(fig[1, 2], hm)
colsize!(fig.layout, 1, Aspect(1, 0.65))
fig

## Heatmap with land-sea mask
The previous case did not take into account the coastline.
### Prepare land mask from topography

In [None]:
bathname = gebco04file
download_check(gebco04file, gebco04fileURL)
bx, by, b = load_bath(bathname, true, xg, yg)
plot_bathy(bx, by, b[:, :, 1])

In [None]:
for j = 1:size(b, 2)
    for i = 1:size(b, 1)
        mask[i, j] = b[i, j] >= 0
    end
end

plot_mask(bx, by, mask)

### First heatmap with uniform and automatic bandwidth

In [None]:
@time dens1, LHM, LCV, LSCV = DIVAnd_heatmap(
    mask,
    (pm, pn),
    (xi, yi),
    (xo, yo),
    inflation,
    0;
    Ladaptiveiterations = 0,
)

In [None]:
fig = Figure()
ga = GeoAxis(
    fig[1, 1];
    dest = "+proj=merc",
    title = "Density (log)",
    xticks = -20:5:5,
    yticks = 40:5.0:65.0,
)
hm = heatmap!(ga, xp, yp, log.(dens1))
scatter!(ga, xo, yo, markersize = 3, color = :red, alpha = 0.25)
Colorbar(fig[1, 2], hm)
colsize!(fig.layout, 1, Aspect(1, 0.65))
display(fig)
@show LCV, LSCV, mean(LHM[1]), mean(LHM[2])

### Now with adapted bandwidth
The parameter `Ladaptiveiterations` is set to 1.

In [None]:
@time dens1, LHM, LCV, LSCV = DIVAnd_heatmap(
    mask,
    (pm, pn),
    (xi, yi),
    (xo, yo),
    inflation,
    0;
    Ladaptiveiterations = 1,
)

In [None]:
fig = Figure()
ga = GeoAxis(
    fig[1, 1];
    dest = "+proj=merc",
    title = "Density (log)",
    xticks = -20:5:5,
    yticks = 40:5.0:65.0,
)
hm = heatmap!(ga, xp, yp, log.(dens1))
scatter!(ga, xo, yo, markersize = 3, color = :red, alpha = 0.25)
Colorbar(fig[1, 2], hm)
colsize!(fig.layout, 1, Aspect(1, 0.65))
display(fig)
@show LCV, LSCV, mean(LHM[1]), mean(LHM[2])

### But how much iterations ? Cross validation indicators can help 
We test different values (from 0 to 5) for the parameter `Ladaptiveiterations`.

In [None]:
for iii = 0:5
    dens1, LHM, LCV, LSCV = DIVAnd_heatmap(
        mask,
        (pm, pn),
        (xi, yi),
        (xo, yo),
        inflation,
        0;
        Ladaptiveiterations = iii,
    )

    fig = Figure()
    ga = GeoAxis(
        fig[1, 1];
        dest = "+proj=merc",
        title = "$(mean(LHM[1]))\n$LCV\n$LSCV",
        xticks = -20:5:5,
        yticks = 40:5.0:65.0,
    )
    hm = heatmap!(ga, xp, yp, log.(dens1))
    scatter!(ga, xo, yo, markersize = 3, color = :red, alpha = 0.25)
    Colorbar(fig[1, 2], hm)
    colsize!(fig.layout, 1, Aspect(1, 0.65))
    display(fig)
end

## Best number of iterations
4 iterations yield highest likelyhood and lowest RMS.

In [None]:
fig = Figure()
ga = GeoAxis(
    fig[1, 1];
    dest = "+proj=merc",
    title = "$(mean(LHM[1]))\n$LCV\n$LSCV",
    xticks = -20:5:5,
    yticks = 40:5.0:65.0,
)
hm = heatmap!(ga, xp, yp, log.(dens1))
scatter!(ga, xo, yo, markersize = 3, color = :red, alpha = 0.25)
Colorbar(fig[1, 2], hm)
colsize!(fig.layout, 1, Aspect(1, 0.65))
display(fig)

In [None]:
dens1, LHM, LCV, LSCV = DIVAnd_heatmap(
    mask,
    (pm, pn),
    (xi, yi),
    (xo, yo),
    inflation,
    0;
    Ladaptiveiterations = 4,
)

In [None]:
fig = Figure()
ga = GeoAxis(
    fig[1, 1];
    dest = "+proj=merc",
    title = "Surface of bandwidth (log)",
    xticks = -20:5:5,
    yticks = 40:5.0:65.0,
)
hm = heatmap!(ga, xp, yp, log.(LHM[1] .* LHM[2]))
scatter!(ga, xo, yo, markersize = 3, color = :red, alpha = 0.25)
Colorbar(fig[1, 2], hm)
colsize!(fig.layout, 1, Aspect(1, 0.65))
fig

<div class="alert alert-block alert-info">
ℹ️ There is no information used on the effort of looking for turtles.<br>
Obviously more are seen close to coastlines because of easier spotting.
</div>