# MARIS seawater data 

> A Spatial analysis treatment

Refs:  
- https://darribas.org/gds_course/content/bG/lab_G.html
- https://geographicdata.science/book/notebooks/00_toc.html
- https://geodacenter.github.io/documentation.html

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import geopandas as gpd

from shapely.geometry import Point

from pysal.explore import esda
from pysal.lib import weights

import matplotlib.pyplot as plt
import seaborn
import contextily

from splot import esda as esdaplot

## Utilities

In [None]:
def quick_plot(db, column='activity', alpha=0.5, ms=3):
    f, ax = plt.subplots(1, figsize=(9, 9))
    db.plot(
        column=column,
        cmap="viridis",
        scheme="quantiles",
        k=5,
        edgecolor="white",
        linewidth=0.0,
        alpha=alpha,
        legend=True,
        legend_kwds={"loc": 2},
        ax=ax,
        markersize=ms
    )
    contextily.add_basemap(
        ax,
        crs=db.crs,
        source=contextily.providers.CartoDB.Positron
        
    )
    ax.set_axis_off()

## Data loading

In [None]:
fname = Path.home() / 'pro/data/maris/maris-seawater-lamer-2023-07-25.csv'
df = pd.read_csv(fname); df.shape

In [None]:
# Filtering out North Atlantic Ocean
#df = df[df['area_name'] != 'North Atlantic Ocean']

In [None]:
df['begperiod'] = pd.to_datetime(df['begperiod'])

In [None]:
df.head()

## Preview

In [None]:
geometry = [Point(lon, lat) for lon, lat in zip(df['lon'], df['lat'])]

db = (
    (gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326"))
    .to_crs(epsg=3857)[
    ["area_name", "begperiod", "activity", "geometry"]
    ]
    .dropna()
    )
db.info()

In [None]:
# Plotting activity
f, ax = plt.subplots(1, figsize=(9, 9))
db.plot(
    column="activity",
    cmap="viridis",
    scheme="quantiles",
    k=5,
    edgecolor="white",
    linewidth=0.0,
    alpha=0.5,
    legend=True,
    legend_kwds={"loc": 2},
    ax=ax,
    markersize=3
)
contextily.add_basemap(
    ax,
    crs=db.crs,
    source=contextily.providers.CartoDB.Positron
    
)
ax.set_axis_off()

## Global spatial autocorrelation

In [None]:
# Generate W from the GeoDataFrame
w = weights.KNN.from_dataframe(db, k=8)
# Row-standardization
w.transform = "R"

In [None]:
db["activity_lag"] = weights.spatial_lag.lag_spatial(
    w, db["activity"]
)

In [None]:
db.head()

### Smoothing nature of spatial lag

In [None]:
f, axs = plt.subplots(1, 2, figsize=(20, 10))
ax1, ax2 = axs

db.plot(
    column="activity",
    cmap="viridis",
    scheme="quantiles",
    k=5,
    edgecolor="white",
    linewidth=0.0,
    alpha=0.5,
    legend=True,
    markersize=3,
    ax=ax1,
)
ax1.set_axis_off()
ax1.set_title("Activity")
contextily.add_basemap(
    ax1,
    crs=db.crs,
    source=contextily.providers.CartoDB.Positron,
)

db.plot(
    column="activity_lag",
    cmap="viridis",
    scheme="quantiles",
    k=5,
    edgecolor="white",
    linewidth=0.0,
    alpha=0.5,
    legend=True,
    markersize=3,
    ax=ax2,
)
ax2.set_axis_off()
ax2.set_title("Activity Lag")
contextily.add_basemap(
    ax2,
    crs=db.crs,
    source=contextily.providers.CartoDB.Positron,
)

### Moran Plot and Moran’s I

In [None]:
db['activity_log'] = np.log10(db.activity)

In [None]:
db["activity_std"] = db["activity_log"] - db["activity_log"].mean()
db["activity_lag_std"] = weights.lag_spatial(
    w, db["activity_std"]
)

In [None]:
db.head()

In [None]:
f, ax = plt.subplots(1, figsize=(6, 6))
seaborn.regplot(
    x="activity_std",
    y="activity_lag_std",
    ci=None,
    data=db,
    line_kws={"color": "r", "lw":1.5},
    scatter_kws={"alpha": 0.3, "s": 2}
)
ax.axvline(0, c="k", alpha=0.5)
ax.axhline(0, c="k", alpha=0.5)
ax.set_xlabel("activity")
ax.set_ylabel("activity lag")
ax.set_title("Moran Plot - Activity (log-transformed and standardized)");

The plot displays a positive relationship between both variables. This is indicates the presence of positive spatial autocorrelation: similar values tend to be located close to each other. This means that the overall trend is for high values to be close to other high values, and for low values to be surrounded by other low values. This, however, does not mean that this is the only case in the dataset: there can of course be particular situations where high values are surrounded by low ones, and vice versa. But it means that, if we had to summarize the main pattern of the data in terms of how clustered similar values are, the best way would be to say they are positively correlated and, hence, clustered over space. 

In [None]:
w.transform = "R"
moran = esda.moran.Moran(db["activity_log"], w)

In [None]:
moran.I

In [None]:
moran.p_sim

#### By IHO region

In [None]:
db.area_name.unique()

In [None]:
morans = {'area_name': [], 'moran.I': [], 'moran.p_sim': [], 'n': []}
knn = 8
for name, df in db.groupby('area_name'):
    if len(df) > 1:
        w_iho = weights.KNN.from_dataframe(df, k=knn, silence_warnings=True)
        w_iho.transform = "R"
        moran = esda.moran.Moran(df["activity_log"], w_iho)
        morans['area_name'].append(name)
        morans['moran.I'].append(moran.I)
        morans['moran.p_sim'].append(moran.p_sim)
        morans['n'].append(len(df))
    else:
        print(f'area_name: {name} # of measurements < # of knn: {knn}')
        
df_morans = pd.DataFrame(morans)
df_morans.sort_values(by=['moran.I'], ascending=False)

In [None]:
area_name = "Irish Sea and St. George's Channel"
quick_plot(db[db.area_name==area_name], column='activity_log', alpha=1, ms=5)

In [None]:
area_name = "Kattegat"
quick_plot(db[db.area_name==area_name], column='activity_log', alpha=1, ms=5)

In [None]:
area_name = "English Channel"
quick_plot(db[db.area_name==area_name], column='activity_log', alpha=1, ms=5)

In [None]:
for name, df in db.groupby('area_name'):
    f, ax = plt.subplots(1, figsize=(3, 3))
    seaborn.regplot(
        x="activity_std",
        y="activity_lag_std",
        ci=None,
        data=df,
        line_kws={"color": "r", "lw":1.5},
        scatter_kws={"alpha": 0.5, "s": 2}
    )
    ax.axvline(0, c="k", alpha=0.5)
    ax.axhline(0, c="k", alpha=0.5)
    ax.set_xlabel("activity")
    ax.set_ylabel("activity lag")
    ax.set_title(f"Moran Plot: {name} - Activity (log-transformed and standardized)")

## Local spatial autocorrelation

Moran’s 
 does not indicate areas within the map where specific types of values (e.g., high, low) are clustered, or instances of explicit dispersion. In other words, Moran’s I can tell us whether values in our map cluster together (or disperse) overall, but it will not inform us about where specific clusters (or outliers) are.

In [None]:
lisa = esda.moran.Moran_Local(db["activity_log"], w)

In [None]:
ax = seaborn.kdeplot(lisa.Is)
seaborn.rugplot(lisa.Is, ax=ax, alpha=0.2);

In [None]:
# Set up figure and axes
f, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 12))
# Make the axes accessible with single indexing
axs = axs.flatten()

# Subplot 1 #
# Choropleth of local statistics
# Grab first axis in the figure
ax = axs[0]
# Assign new column with local statistics on-the-fly
db.assign(
    Is=lisa.Is
    # Plot choropleth of local statistics
).plot(
    column="Is",
    cmap="plasma",
    scheme="quantiles",
    k=5,
    edgecolor="white",
    linewidth=0.1,
    alpha=0.5,
    legend=True,
    markersize=3,
    ax=ax,
)

# Subplot 2 #
# Quadrant categories
# Grab second axis of local statistics
ax = axs[1]
# Plot Quadrant colors (note to ensure all polygons are assigned a
# quadrant, we "trick" the function by setting significance level to
# 1 so all observations are treated as "significant" and thus assigned
# a quadrant color
esdaplot.lisa_cluster(lisa, db, p=1, ax=ax, markersize=0.1, alpha=0.5)

# Subplot 3 #
# Significance map
# Grab third axis of local statistics
ax = axs[2]
#
# Find out significant observations
labels = pd.Series(
    1 * (lisa.p_sim < 0.05),  # Assign 1 if significant, 0 otherwise
    index=db.index  # Use the index in the original data
    # Recode 1 to "Significant and 0 to "Non-significant"
).map({1: "Significant", 0: "Non-Significant"})
# Assign labels to `db` on the fly
db.assign(
    cl=labels
    # Plot choropleth of (non-)significant areas
).plot(
    column="cl",
    categorical=True,
    k=2,
    cmap="Paired",
    linewidth=0.1,
    edgecolor="white",
    legend=True,
    alpha=0.8,
    markersize=1,
    ax=ax,
)

# Subplot 4 #
# Cluster map
# Grab second axis of local statistics
ax = axs[3]
# Plot Quadrant colors In this case, we use a 5% significance
# level to select polygons as part of statistically significant
# clusters
esdaplot.lisa_cluster(lisa, db, p=0.05, ax=ax, markersize=0.5, alpha=0.5)

# Figure styling #
# Set title to each subplot
for i, ax in enumerate(axs.flatten()):
    ax.set_axis_off()
    ax.set_title(
        [
            "Local Statistics",
            "Scatterplot Quadrant",
            "Statistical Significance",
            "Moran Cluster Map",
        ][i],
        y=0,
    )
# Tight layout to minimize in-between white space
f.tight_layout()

## Point pattern analysis

In [None]:
db['longitude'] = db.get_coordinates().x
db['latitude'] = db.get_coordinates().y

In [None]:
seaborn.jointplot(x="longitude", y="latitude", data=db, s=0.5);

In [None]:
# Generate scatter plot
joint_axes = seaborn.jointplot(
    x="longitude", y="latitude", data=db, s=0.5
)
contextily.add_basemap(
    joint_axes.ax_joint,
    crs=db.crs,
    source=contextily.providers.CartoDB.PositronNoLabels,
);

In [None]:
# Set up figure and axis
f, ax = plt.subplots(1, figsize=(12, 9))
# Generate and add hexbin with 50 hexagons in each
# dimension, no borderlines, half transparency,
# and the reverse viridis colormap
hb = ax.hexbin(
    db["longitude"],
    db["latitude"],
    gridsize=50,
    linewidths=0,
    alpha=0.5,
    cmap="viridis_r",
)
# Add basemap
contextily.add_basemap(
    ax, source=contextily.providers.CartoDB.Positron
)
# Add colorbar
plt.colorbar(hb)
# Remove axes
ax.set_axis_off()

In [None]:
# Set up figure and axis
f, ax = plt.subplots(1, figsize=(9, 9))
# Generate and add KDE with a shading of 50 gradients
# coloured contours, 75% of transparency,
# and the reverse viridis colormap
seaborn.kdeplot(
    x="longitude",
    y="latitude",
    data=db,
    n_levels=50,
    shade=True,
    alpha=0.55,
    cmap="viridis_r",
)
# Add basemap
contextily.add_basemap(
    ax, source=contextily.providers.CartoDB.Positron
)
# Remove axes
ax.set_axis_off()