Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SEDONA-493] Update default behavior of RS_NormalizeAll #1234

Merged
merged 6 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -443,37 +443,37 @@ public static double[] normalize(double[] bandValues) {
}

public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom) {
return normalizeAll(rasterGeom, 0d, 255d, null, null, null, true);
return normalizeAll(rasterGeom, 0d, 255d, true, null, null, null);
}

public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minLim, double maxLim) {
return normalizeAll(rasterGeom, minLim, maxLim, null, null, null, true);
return normalizeAll(rasterGeom, minLim, maxLim, true, null, null, null);
}

public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minLim, double maxLim, double noDataValue) {
return normalizeAll(rasterGeom, minLim, maxLim, noDataValue, null, null, true);
public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minLim, double maxLim, boolean normalizeAcrossBands) {
return normalizeAll(rasterGeom, minLim, maxLim, normalizeAcrossBands, null, null, null);
}

public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minLim, double maxLim, Double noDataValue, boolean normalizeAcrossBands) {
return normalizeAll(rasterGeom, minLim, maxLim, noDataValue, null, null, normalizeAcrossBands);
public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minLim, double maxLim, boolean normalizeAcrossBands, Double noDataValue) {
return normalizeAll(rasterGeom, minLim, maxLim, normalizeAcrossBands, noDataValue, null, null);
}

public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minLim, double maxLim, Double noDataValue, Double minValue, Double maxValue) {
return normalizeAll(rasterGeom, minLim, maxLim, noDataValue, minValue, maxValue, true);
return normalizeAll(rasterGeom, minLim, maxLim, true, noDataValue, minValue, maxValue);
}

/**
*
* @param rasterGeom Raster to be normalized
* @param minLim Lower limit of normalization range
* @param maxLim Upper limit of normalization range
* @param normalizeAcrossBands flag to determine the normalization method
* @param noDataValue NoDataValue used in raster
* @param minValue Minimum value in raster
* @param maxValue Maximum value in raster
* @param normalizeAcrossBands flag to determine the normalization method
* @return a raster with all values in all bands normalized between minLim and maxLim
*/
public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minLim, double maxLim, Double noDataValue, Double minValue, Double maxValue, boolean normalizeAcrossBands) {
public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minLim, double maxLim, boolean normalizeAcrossBands, Double noDataValue, Double minValue, Double maxValue) {
if (minLim > maxLim) {
throw new IllegalArgumentException("minLim cannot be greater than maxLim");
}
Expand All @@ -491,6 +491,10 @@ public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minL
Arrays.fill(minValues, Double.MAX_VALUE);
Arrays.fill(maxValues, -Double.MAX_VALUE);

// Trigger safe mode if noDataValue is null - noDataValue is set to maxLim and data values are normalized to range [minLim, maxLim-1].
// This is done to prevent setting valid data as noDataValue.
double safetyTrigger = (noDataValue == null) ? 1 : 0;

// Compute global min and max values across all bands if necessary and not provided
if (minValue == null || maxValue == null) {
for (int bandIndex = 0; bandIndex < numBands; bandIndex++) {
Expand Down Expand Up @@ -530,7 +534,7 @@ public static GridCoverage2D normalizeAll(GridCoverage2D rasterGeom, double minL
} else {
for (int i = 0; i < bandValues.length; i++) {
if (bandValues[i] != bandNoDataValue) {
double normalizedValue = minLim + ((bandValues[i] - currentMin) * (maxLim - minLim)) / (currentMax - currentMin);
double normalizedValue = minLim + ((bandValues[i] - currentMin) * (maxLim - safetyTrigger - minLim)) / (currentMax - currentMin);
bandValues[i] = castRasterDataType(normalizedValue, rasterDataType);
} else {
bandValues[i] = noDataValue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -362,20 +362,26 @@ public void testNormalizeAll() throws FactoryException {
raster3 = RasterBandEditors.setBandNoDataValue(raster3, 1, 16.0);
raster3 = RasterBandEditors.setBandNoDataValue(raster3, 2, 1.0);

GridCoverage2D normalizedRaster1 = MapAlgebra.normalizeAll(raster1, 0, 255, -9999.0, false);
GridCoverage2D normalizedRaster2 = MapAlgebra.normalizeAll(raster1, 256d, 511d, -9999.0, false);
GridCoverage2D normalizedRaster1 = MapAlgebra.normalizeAll(raster1, 0, 255, false, -9999.0);
GridCoverage2D normalizedRaster2 = MapAlgebra.normalizeAll(raster1, 256d, 511d, false, -9999.0);
GridCoverage2D normalizedRaster3 = MapAlgebra.normalizeAll(raster2);
GridCoverage2D normalizedRaster4 = MapAlgebra.normalizeAll(raster3, 0, 255, 95.0);
GridCoverage2D normalizedRaster5 = MapAlgebra.normalizeAll(raster4, 0, 255);
GridCoverage2D normalizedRaster4 = MapAlgebra.normalizeAll(raster3, 0, 255, true, 95.0);
GridCoverage2D normalizedRaster5 = MapAlgebra.normalizeAll(raster4, 0, 255, true, 255.0);
GridCoverage2D normalizedRaster6 = MapAlgebra.normalizeAll(raster5, 0.0, 255.0, -9999.0, 0.0, 30.0);
GridCoverage2D normalizedRaster7 = MapAlgebra.normalizeAll(raster5, 0, 255, -9999.0, false);
GridCoverage2D normalizedRaster7 = MapAlgebra.normalizeAll(raster5, 0, 255, false, -9999.0);
GridCoverage2D normalizedRaster8 = MapAlgebra.normalizeAll(raster3, 0, 255);
GridCoverage2D normalizedRaster9 = MapAlgebra.normalizeAll(raster3, 0, 255, false);

double[] expected1 = {0.0, 17.0, 34.0, 51.0, 68.0, 85.0, 102.0, 119.0, 136.0, 153.0, 170.0, 187.0, 204.0, 221.0, 238.0, 255.0};
double[] expected2 = {256.0, 273.0, 290.0, 307.0, 324.0, 341.0, 358.0, 375.0, 392.0, 409.0, 426.0, 443.0, 460.0, 477.0, 494.0, 511.0};
double[] expected3 = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
double[] expected4 = {0.0, 17.0, 34.0, 51.0, 68.0, 85.0, 102.0, 119.0, 136.0, 153.0, 170.0, 187.0, 204.0, 221.0, 238.0, 95.0};
double[] expected5 = {95.0, 17.0, 34.0, 51.0, 68.0, 85.0, 102.0, 119.0, 136.0, 153.0, 170.0, 187.0, 204.0, 221.0, 238.0, 255.0};
double[] expected6 = {0.0, 18.214285714285715, 36.42857142857143, 54.642857142857146, 72.85714285714286, 91.07142857142857, 109.28571428571429, 127.5, 145.71428571428572, 163.92857142857142, 182.14285714285714, 200.35714285714286, 218.57142857142858, 236.78571428571428, 255.0, 255.0};
double[] expected7 = {0.0, 16.0, 33.0, 50.0, 67.0, 84.0, 101.0, 118.0, 135.0, 152.0, 169.0, 186.0, 203.0, 220.0, 237.0, 255.0};
double[] expected8 = {255.0, 16.0, 33.0, 50.0, 67.0, 84.0, 101.0, 118.0, 135.0, 152.0, 169.0, 186.0, 203.0, 220.0, 237.0, 254.0};
double[] expected9 = {0.0, 18.0, 36.0, 54.0, 72.0, 90.0, 108.0, 127.0, 145.0, 163.0, 181.0, 199.0, 217.0, 235.0, 254.0, 255.0};
double[] expected10 = {255.0, 0.0, 18.0, 36.0, 54.0, 72.0, 90.0, 108.0, 127.0, 145.0, 163.0, 181.0, 199.0, 217.0, 235.0, 254.0};

// Step 3: Validate the results for each band
for (int band = 1; band <= 2; band++) {
Expand All @@ -402,6 +408,10 @@ public void testNormalizeAll() throws FactoryException {
assertEquals(Arrays.toString(expected3), Arrays.toString(MapAlgebra.bandAsArray(normalizedRaster3, 1)));
assertEquals(Arrays.toString(expected4), Arrays.toString(MapAlgebra.bandAsArray(normalizedRaster4, 1)));
assertEquals(Arrays.toString(expected5), Arrays.toString(MapAlgebra.bandAsArray(normalizedRaster4, 2)));
assertEquals(Arrays.toString(expected7), Arrays.toString(MapAlgebra.bandAsArray(normalizedRaster8, 1)));
assertEquals(Arrays.toString(expected8), Arrays.toString(MapAlgebra.bandAsArray(normalizedRaster8, 2)));
assertEquals(Arrays.toString(expected9), Arrays.toString(MapAlgebra.bandAsArray(normalizedRaster9, 1)));
assertEquals(Arrays.toString(expected10), Arrays.toString(MapAlgebra.bandAsArray(normalizedRaster9, 2)));
}

@Test
Expand All @@ -428,7 +438,7 @@ private void testNormalizeAll2(int width, int height, String pixelType) throws F
// Check the normalized values and data type
double[] normalizedBandValues = MapAlgebra.bandAsArray(normalizedRaster, 1);
for (int i = 0; i < bandValues.length; i++) {
double expected = (bandValues[i] - 0) * (255 - 0) / (99 - 0);
double expected = (bandValues[i] - 0) * (254 - 0) / (99 - 0);
double actual = normalizedBandValues[i];
switch (normalizedRaster.getRenderedImage().getSampleModel().getDataType()) {
case DataBuffer.TYPE_BYTE:
Expand Down
12 changes: 7 additions & 5 deletions docs/api/sql/Raster-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -2482,14 +2482,16 @@ SELECT RS_Normalize(band)

### RS_NormalizeAll

Introduction: Normalizes values in all bands of a raster between a given normalization range. The function maintains the data type of the raster values by ensuring that the normalized values are cast back to the original data type of each band in the raster. By default, the values are normalized to range [0, 255]. RS_NormalizeAll can take upto 6 of the following arguments.
Introduction: Normalizes values in all bands of a raster between a given normalization range. The function maintains the data type of the raster values by ensuring that the normalized values are cast back to the original data type of each band in the raster. By default, the values are normalized to range [0, 255]. RS_NormalizeAll can take upto 7 of the following arguments.

- `raster`: The raster to be normalized.
- `minLim` and `maxLim` (Optional): The lower and upper limits of the normalization range. By default, normalization range is set to [0, 255].
- `noDataValue` (Optional): Defines the value to be used for missing or invalid data in raster bands. By default, noDataValue is set to `maxLim`.
- `noDataValue` (Optional): Defines the value to be used for missing or invalid data in raster bands. By default, noDataValue is set to `maxLim` and Safety mode is triggered.
- `minValue` and `maxValue` (Optional): Optionally, specific minimum and maximum values of the input raster can be provided. If not provided, these values are computed from the raster data.
- `normalizeAcrossBands` (Optional): A boolean flag to determine the normalization method. If set to true (default), normalization is performed across all bands based on global min and max values. If false, each band is normalized individually based on its own min and max values.

A safety mode is triggered when `noDataValue` is null, setting it to `maxLim` and normalizing data values to the range [minLim, maxLim-1]. This is to avoid replacing valid data that might coincide with the new `noDataValue`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does your current implementation allow null as the input of a Sedona Spark (not just sedona-common)? Do we have a test?

If yes, then the sentence here should be when noDataValueis null or not given). If no, it should bewhen noDataValue is not given`

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an important consideration, thanks!
Current implementation allows noDataValue to be set to null in Sedona Spark as well. Confirmed this by testing it in Sedona-spark.
Have added tests for the same under Sedona-common. Would it be better to add tests under Sedona-spark as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Accordingly updated the documentation too

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@prantogg https://github.com/apache/sedona/blob/master/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/MapAlgebra.scala#L182

InferredExpression is null safe, which means it returns null directly if one of the input is null. Please double check.

In addition, RS_NormarlizeAll should not be put in MapAlgebra.scala. It should be put in RasterEditors.scala.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad, I misunderstood

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

InferredExpression is indeed null safe. Have updated the docs accordingly and moved RS_NormalizeAll to RasterEditors.


!!! Warning
Using a noDataValue that falls within the normalization range can lead to loss of valid data. If any data value within a raster band matches the specified noDataValue, it will be replaced and cannot be distinguished or recovered later. Exercise caution in selecting a noDataValue to avoid unintentional data alteration.

Expand All @@ -2501,16 +2503,16 @@ RS_NormalizeAll (raster: Raster)`
RS_NormalizeAll (raster: Raster, minLim: Double, maxLim: Double)
```
```
RS_NormalizeAll (raster: Raster, minLim: Double, maxLim: Double, noDataValue: Double)
RS_NormalizeAll (raster: Raster, minLim: Double, maxLim: Double, normalizeAcrossBands: Boolean)
```
```
RS_NormalizeAll (raster: Raster, minLim: Double, maxLim: Double, noDataValue: Double, normalizeAcrossBands: Boolean)
RS_NormalizeAll (raster: Raster, minLim: Double, maxLim: Double, normalizeAcrossBands: Boolean, noDataValue: Double)
```
```
RS_NormalizeAll (raster: Raster, minLim: Double, maxLim: Double, noDataValue: Double, minValue: Double, maxValue: Double)
```
```
RS_NormalizeAll (raster: Raster, minLim: Double, maxLim: Double, noDataValue: Double, minValue: Double, maxValue: Double, normalizeAcrossBands: Boolean)
RS_NormalizeAll (raster: Raster, minLim: Double, maxLim: Double, normalizeAcrossBands: Boolean, noDataValue: Double, minValue: Double, maxValue: Double )
```

Since: `v1.6.0`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ class rasteralgebraTest extends TestBaseScala with BeforeAndAfter with GivenWhen
var df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/test1.tiff")
df = df.selectExpr("RS_FromGeoTiff(content) as raster")
val result1 = df.selectExpr("RS_NormalizeAll(raster, 0, 255) as normalized").first().get(0)
val result2 = df.selectExpr("RS_NormalizeAll(raster, 0, 255, 0) as normalized").first().get(0)
val result2 = df.selectExpr("RS_NormalizeAll(raster, 0, 255, false) as normalized").first().get(0)
assert(result1.isInstanceOf[GridCoverage2D])
assert(result2.isInstanceOf[GridCoverage2D])
}
Expand Down