From 1d9e1e6b5bd5e8b310cf4bbbfef2596d28e069b7 Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Wed, 26 Oct 2022 16:12:27 +0100 Subject: [PATCH] Deprecate the scatter method of BaseGridder (#357) The method is a bit useless since we rarely want to predict on random points. Copy the code to the `CheckerBoard` function since that's where it is most useful. Issue a `FutureWarning` from the base `scatter` method. --- doc/gallery_src/checkerboard.py | 4 +- verde/base/base_classes.py | 12 +++++ verde/synthetic.py | 84 ++++++++++++++++++++++++++++++++- verde/tests/test_base.py | 3 +- verde/tests/test_synthetic.py | 33 +++++++++++++ 5 files changed, 132 insertions(+), 4 deletions(-) create mode 100644 verde/tests/test_synthetic.py diff --git a/doc/gallery_src/checkerboard.py b/doc/gallery_src/checkerboard.py index c17a8324a..277e0aca6 100644 --- a/doc/gallery_src/checkerboard.py +++ b/doc/gallery_src/checkerboard.py @@ -9,8 +9,8 @@ ===================== The :class:`verde.synthetic.CheckerBoard` class generates synthetic data in a -checkerboard pattern. It has the same data generation methods that most -gridders have: predict, grid, scatter, and profile. +checkerboard pattern. It has different data generation methods, some of which +are shared with most other gridders: predict, grid, profile, and scatter. """ import matplotlib.pyplot as plt diff --git a/verde/base/base_classes.py b/verde/base/base_classes.py index fa74970da..30414da2c 100644 --- a/verde/base/base_classes.py +++ b/verde/base/base_classes.py @@ -531,6 +531,12 @@ def scatter( dimensions and the data field(s) in the output :class:`pandas.DataFrame`. Default names are provided. + .. warning:: + + The ``scatter`` method is deprecated and will be removed in Verde + 2.0.0. Use :func:`verde.scatter_points` and the ``predict`` method + instead. + Parameters ---------- region : list = [W, E, S, N] @@ -570,6 +576,12 @@ def scatter( The interpolated values on a random set of points. """ + warnings.warn( + "The 'scatter' method is deprecated and will be removed in Verde " + "2.0.0. Use 'verde.scatter_points' and the 'predict' method " + "instead.", + FutureWarning, + ) dims = self._get_dims(dims) region = get_instance_region(self, region) coordinates = scatter_points(region, size, random_state=random_state, **kwargs) diff --git a/verde/synthetic.py b/verde/synthetic.py index 762e9966b..3303c06de 100644 --- a/verde/synthetic.py +++ b/verde/synthetic.py @@ -5,9 +5,12 @@ # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # import numpy as np +import pandas as pd from .base import BaseGridder -from .coordinates import check_region +from .base.base_classes import get_instance_region, project_coordinates +from .base.utils import check_data +from .coordinates import check_region, scatter_points class CheckerBoard(BaseGridder): @@ -113,3 +116,82 @@ def predict(self, coordinates): * np.cos((2 * np.pi / self.w_north_) * northing) ) return data + + def scatter( + self, + region=None, + size=300, + random_state=0, + dims=None, + data_names=None, + projection=None, + **kwargs, + ): + """ + Generate values on a random scatter of points. + + Point coordinates are generated by :func:`verde.scatter_points`. Other + arguments for this function can be passed as extra keyword arguments + (``kwargs``) to this method. + + By default, the region specified when creating the class instance will + be used if ``region=None``. + + Use the *dims* and *data_names* arguments to set custom names for the + dimensions and the data field(s) in the output + :class:`pandas.DataFrame`. Default names are provided. + + Parameters + ---------- + region : list = [W, E, S, N] + The west, east, south, and north boundaries of a given region. + size : int + The number of points to generate. + random_state : numpy.random.RandomState or an int seed + A random number generator used to define the state of the random + permutations. Use a fixed seed to make sure computations are + reproducible. Use ``None`` to choose a seed automatically + (resulting in different numbers with each run). + dims : list or None + The names of the northing and easting data dimensions, + respectively, in the output dataframe. Default is determined from + the ``dims`` attribute of the class. Must be defined in the + following order: northing dimension, easting dimension. + **NOTE: This is an exception to the "easting" then + "northing" pattern but is required for compatibility with xarray.** + data_names : str, list or None + The name(s) of the data variables in the output dataframe. Defaults + to ``'scalars'`` for scalar data, + ``['east_component', 'north_component']`` for 2D vector data, and + ``['east_component', 'north_component', 'vertical_component']`` for + 3D vector data. + projection : callable or None + If not None, then should be a callable object + ``projection(easting, northing) -> (proj_easting, proj_northing)`` + that takes in easting and northing coordinate arrays and returns + projected northing and easting coordinate arrays. This function + will be used to project the generated scatter coordinates before + passing them into ``predict``. For example, you can use this to + generate a geographic scatter from a Cartesian gridder. + + Returns + ------- + table : pandas.DataFrame + The interpolated values on a random set of points. + + """ + dims = self._get_dims(dims) + region = get_instance_region(self, region) + coordinates = scatter_points(region, size, random_state=random_state, **kwargs) + if projection is None: + data = check_data(self.predict(coordinates)) + else: + data = check_data( + self.predict(project_coordinates(coordinates, projection)) + ) + data_names = self._get_data_names(data, data_names) + columns = [(dims[0], coordinates[1]), (dims[1], coordinates[0])] + extra_coords_names = self._get_extra_coords_names(coordinates) + columns.extend(zip(extra_coords_names, coordinates[2:])) + columns.extend(zip(data_names, data)) + return pd.DataFrame(dict(columns), columns=[c[0] for c in columns]) diff --git a/verde/tests/test_base.py b/verde/tests/test_base.py index 45fa26e0f..0f55ca378 100644 --- a/verde/tests/test_base.py +++ b/verde/tests/test_base.py @@ -150,7 +150,8 @@ def test_basegridder(): # Grid on profile prof = grd.profile((0, -10), (10, -10), 30) # Grid on scatter - scat = grd.scatter(region=region, size=1000, random_state=0) + with pytest.warns(FutureWarning): + scat = grd.scatter(region=region, size=1000, random_state=0) for grid in grids: npt.assert_allclose(grid.scalars.values, data_true) diff --git a/verde/tests/test_synthetic.py b/verde/tests/test_synthetic.py new file mode 100644 index 000000000..5f4bd1f39 --- /dev/null +++ b/verde/tests/test_synthetic.py @@ -0,0 +1,33 @@ +# Copyright (c) 2017 The Verde Developers. +# Distributed under the terms of the BSD 3-Clause License. +# SPDX-License-Identifier: BSD-3-Clause +# +# This code is part of the Fatiando a Terra project (https://www.fatiando.org) +# +""" +Test the synthetic data generation functions and classes. +""" +import numpy.testing as npt + +from ..synthetic import CheckerBoard + + +def test_checkerboard_scatter_projection(): + "Test generating scattered points when passing in a projection" + + # Lets say the projection is doubling the coordinates + def proj(lon, lat, inverse=False): + "Project from the new coordinates to the original" + if inverse: + return (lon / 2, lat / 2) + return (lon * 2, lat * 2) + + region = (0, 10, -10, -5) + region_proj = (0, 5, -5, -2.5) + checker = CheckerBoard(region=region) + checker_proj = CheckerBoard(region=region_proj) + scatter = checker.scatter(region, 1000, random_state=0, projection=proj) + scatter_proj = checker_proj.scatter(region, 1000, random_state=0) + npt.assert_allclose(scatter.scalars, scatter_proj.scalars) + npt.assert_allclose(scatter.easting, scatter_proj.easting) + npt.assert_allclose(scatter.northing, scatter_proj.northing)