From 1d9e1e6b5bd5e8b310cf4bbbfef2596d28e069b7 Mon Sep 17 00:00:00 2001
From: Leonardo Uieda <leouieda@gmail.com>
Date: Wed, 26 Oct 2022 16:12:27 +0100
Subject: [PATCH] Deprecate the scatter method of BaseGridder (#357)

The method is a bit useless since we rarely want to predict on random
points. Copy the code to the `CheckerBoard` function since that's where
it is most useful. Issue a `FutureWarning` from the base `scatter`
method.
---
 doc/gallery_src/checkerboard.py |  4 +-
 verde/base/base_classes.py      | 12 +++++
 verde/synthetic.py              | 84 ++++++++++++++++++++++++++++++++-
 verde/tests/test_base.py        |  3 +-
 verde/tests/test_synthetic.py   | 33 +++++++++++++
 5 files changed, 132 insertions(+), 4 deletions(-)
 create mode 100644 verde/tests/test_synthetic.py

diff --git a/doc/gallery_src/checkerboard.py b/doc/gallery_src/checkerboard.py
index c17a8324a..277e0aca6 100644
--- a/doc/gallery_src/checkerboard.py
+++ b/doc/gallery_src/checkerboard.py
@@ -9,8 +9,8 @@
 =====================
 
 The :class:`verde.synthetic.CheckerBoard` class generates synthetic data in a
-checkerboard pattern. It has the same data generation methods that most
-gridders have: predict, grid, scatter, and profile.
+checkerboard pattern. It has different data generation methods, some of which
+are shared with most other gridders: predict, grid, profile, and scatter.
 """
 import matplotlib.pyplot as plt
 
diff --git a/verde/base/base_classes.py b/verde/base/base_classes.py
index fa74970da..30414da2c 100644
--- a/verde/base/base_classes.py
+++ b/verde/base/base_classes.py
@@ -531,6 +531,12 @@ def scatter(
         dimensions and the data field(s) in the output
         :class:`pandas.DataFrame`. Default names are provided.
 
+        .. warning::
+
+            The ``scatter`` method is deprecated and will be removed in Verde
+            2.0.0. Use :func:`verde.scatter_points` and the ``predict`` method
+            instead.
+
         Parameters
         ----------
         region : list = [W, E, S, N]
@@ -570,6 +576,12 @@ def scatter(
             The interpolated values on a random set of points.
 
         """
+        warnings.warn(
+            "The 'scatter' method is deprecated and will be removed in Verde "
+            "2.0.0. Use 'verde.scatter_points' and the 'predict' method "
+            "instead.",
+            FutureWarning,
+        )
         dims = self._get_dims(dims)
         region = get_instance_region(self, region)
         coordinates = scatter_points(region, size, random_state=random_state, **kwargs)
diff --git a/verde/synthetic.py b/verde/synthetic.py
index 762e9966b..3303c06de 100644
--- a/verde/synthetic.py
+++ b/verde/synthetic.py
@@ -5,9 +5,12 @@
 # This code is part of the Fatiando a Terra project (https://www.fatiando.org)
 #
 import numpy as np
+import pandas as pd
 
 from .base import BaseGridder
-from .coordinates import check_region
+from .base.base_classes import get_instance_region, project_coordinates
+from .base.utils import check_data
+from .coordinates import check_region, scatter_points
 
 
 class CheckerBoard(BaseGridder):
@@ -113,3 +116,82 @@ def predict(self, coordinates):
             * np.cos((2 * np.pi / self.w_north_) * northing)
         )
         return data
+
+    def scatter(
+        self,
+        region=None,
+        size=300,
+        random_state=0,
+        dims=None,
+        data_names=None,
+        projection=None,
+        **kwargs,
+    ):
+        """
+        Generate values on a random scatter of points.
+
+        Point coordinates are generated by :func:`verde.scatter_points`. Other
+        arguments for this function can be passed as extra keyword arguments
+        (``kwargs``) to this method.
+
+        By default, the region specified when creating the class instance will
+        be used if ``region=None``.
+
+        Use the *dims* and *data_names* arguments to set custom names for the
+        dimensions and the data field(s) in the output
+        :class:`pandas.DataFrame`. Default names are provided.
+
+        Parameters
+        ----------
+        region : list = [W, E, S, N]
+            The west, east, south, and north boundaries of a given region.
+        size : int
+            The number of points to generate.
+        random_state : numpy.random.RandomState or an int seed
+            A random number generator used to define the state of the random
+            permutations. Use a fixed seed to make sure computations are
+            reproducible. Use ``None`` to choose a seed automatically
+            (resulting in different numbers with each run).
+        dims : list or None
+            The names of the northing and easting data dimensions,
+            respectively, in the output dataframe. Default is determined from
+            the ``dims`` attribute of the class. Must be defined in the
+            following order: northing dimension, easting dimension.
+            **NOTE: This is an exception to the "easting" then
+            "northing" pattern but is required for compatibility with xarray.**
+        data_names : str, list or None
+            The name(s) of the data variables in the output dataframe. Defaults
+            to ``'scalars'`` for scalar data,
+            ``['east_component', 'north_component']`` for 2D vector data, and
+            ``['east_component', 'north_component', 'vertical_component']`` for
+            3D vector data.
+        projection : callable or None
+            If not None, then should be a callable object
+            ``projection(easting, northing) -> (proj_easting, proj_northing)``
+            that takes in easting and northing coordinate arrays and returns
+            projected northing and easting coordinate arrays. This function
+            will be used to project the generated scatter coordinates before
+            passing them into ``predict``. For example, you can use this to
+            generate a geographic scatter from a Cartesian gridder.
+
+        Returns
+        -------
+        table : pandas.DataFrame
+            The interpolated values on a random set of points.
+
+        """
+        dims = self._get_dims(dims)
+        region = get_instance_region(self, region)
+        coordinates = scatter_points(region, size, random_state=random_state, **kwargs)
+        if projection is None:
+            data = check_data(self.predict(coordinates))
+        else:
+            data = check_data(
+                self.predict(project_coordinates(coordinates, projection))
+            )
+        data_names = self._get_data_names(data, data_names)
+        columns = [(dims[0], coordinates[1]), (dims[1], coordinates[0])]
+        extra_coords_names = self._get_extra_coords_names(coordinates)
+        columns.extend(zip(extra_coords_names, coordinates[2:]))
+        columns.extend(zip(data_names, data))
+        return pd.DataFrame(dict(columns), columns=[c[0] for c in columns])
diff --git a/verde/tests/test_base.py b/verde/tests/test_base.py
index 45fa26e0f..0f55ca378 100644
--- a/verde/tests/test_base.py
+++ b/verde/tests/test_base.py
@@ -150,7 +150,8 @@ def test_basegridder():
     # Grid on profile
     prof = grd.profile((0, -10), (10, -10), 30)
     # Grid on scatter
-    scat = grd.scatter(region=region, size=1000, random_state=0)
+    with pytest.warns(FutureWarning):
+        scat = grd.scatter(region=region, size=1000, random_state=0)
 
     for grid in grids:
         npt.assert_allclose(grid.scalars.values, data_true)
diff --git a/verde/tests/test_synthetic.py b/verde/tests/test_synthetic.py
new file mode 100644
index 000000000..5f4bd1f39
--- /dev/null
+++ b/verde/tests/test_synthetic.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2017 The Verde Developers.
+# Distributed under the terms of the BSD 3-Clause License.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
+#
+"""
+Test the synthetic data generation functions and classes.
+"""
+import numpy.testing as npt
+
+from ..synthetic import CheckerBoard
+
+
+def test_checkerboard_scatter_projection():
+    "Test generating scattered points when passing in a projection"
+
+    # Lets say the projection is doubling the coordinates
+    def proj(lon, lat, inverse=False):
+        "Project from the new coordinates to the original"
+        if inverse:
+            return (lon / 2, lat / 2)
+        return (lon * 2, lat * 2)
+
+    region = (0, 10, -10, -5)
+    region_proj = (0, 5, -5, -2.5)
+    checker = CheckerBoard(region=region)
+    checker_proj = CheckerBoard(region=region_proj)
+    scatter = checker.scatter(region, 1000, random_state=0, projection=proj)
+    scatter_proj = checker_proj.scatter(region, 1000, random_state=0)
+    npt.assert_allclose(scatter.scalars, scatter_proj.scalars)
+    npt.assert_allclose(scatter.easting, scatter_proj.easting)
+    npt.assert_allclose(scatter.northing, scatter_proj.northing)