unmixing/lsma.py

'''
Adjustments to the pysptools library to support linear spectral mixture
analysis (LSMA). Includes classes and functions:

* `PPI`
* `NFINDR`
* `FCLSAbundanceMapper`
* `combine_endmembers_and_normalize()`
* `convex_hull_graham()`
* `endmembers_by_maximum_angle()`
* `endmembers_by_maximum_area()`
* `endmembers_by_maximum_volume()`
* `endmembers_by_query()`
* `hall_rectification()`
* `iterate_endmember_combinations()`
* `normalize_reflectance_within_image()`
* `predict_spectra_from_abundance()`
* `point_to_pixel_geometry()`
* `ravel()`
* `ravel_and_filter()`
* `report_raster_dynamic_range()`
* `subtract_endmember_and_normalize()`
'''

import itertools
import json
import os
import re
import numpy as np
from concurrent.futures import ProcessPoolExecutor
from functools import reduce, partial, wraps
from unmixing.transform import mnf_rotation
from unmixing.utils import array_to_raster, as_array, as_raster, dump_raster, partition, xy_to_pixel, pixel_to_xy, spectra_at_xy, rmse
from lxml import etree
from osgeo import gdal, ogr, osr
from pykml.factory import KML_ElementMaker as KML
import pysptools.eea as sp_extract
import pysptools.abundance_maps as sp_abundance
import pysptools.classification as sp_classify
import pysptools.material_count as sp_matcount

class AbstractAbundanceMapper(object):
    def __init__(self, mixed_raster, gt, wkt, nodata=-9999, processes=1):
        assert np.all(np.greater(mixed_raster.shape, 0)), 'Raster array cannot have any zero-length axis'
        self.shp = mixed_raster.shape
        self.mixed_raster = mixed_raster
        self.hsi = mixed_raster.T # HSI form: (p x m x n) as (n x m x p)
        self.gt = gt
        self.wkt = wkt
        self.nodata = nodata
        self.num_processes = processes


class AbstractExtractor(object):
    def get_idx_as_kml(self, path, gt, wkt, data_dict=None):
        '''
        Exports a KML file containing the locations of the extracted endmembers
        as point markers.
        '''
        # Despite that the HSI cube is the transpose of our raster array, the
        #   coordinates returned by `get_idx()` are already in the right order
        #   (longitude, latitude) because the (m by n) == (y by x) order
        #   transposed is (n by m) == (x by y); the row index is the latitude
        #   and the column index is the longitude.
        coords = pixel_to_xy(self.get_idx(), gt=gt, wkt=wkt, dd=True)

        if any(map(lambda x: x[0] == 0 and x[1] == 0, self.get_idx())):
            print('Warning: Target endmember chosen at (0,0)')
            print('One or more endmembers may be photometric shade')

        if data_dict is None:
            data_dict = {
                'wavelength': range(1, len(coords) + 1),
                'wavelength units': 'MNF Component',
                'z plot titles': ['', '']
            }

        ico = 'http://maps.google.com/mapfiles/kml/paddle/%i.png'
        pmarks = []
        for i, pair in enumerate(coords):
            pmarks.append(KML.Placemark(
                KML.Style(
                    KML.IconStyle(
                        KML.Icon(KML.href(ico % (i + 1))))),
                KML.name(data_dict['wavelength units'] + ' %d' % (i + 1)),
                KML.Point(KML.coordinates('%f,%f' % pair))))

        doc = KML.kml(KML.Folder(*pmarks))
        with open(path, 'wb') as source:
            source.write(etree.tostring(doc, pretty_print=True))

    def get_idx_as_shp(self, path, gt, wkt):
        '''
        Exports a Shapefile containing the locations of the extracted
        endmembers. Assumes the coordinates are in decimal degrees.
        '''
        coords = pixel_to_xy(self.get_idx(), gt=gt, wkt=wkt, dd=True)

        driver = ogr.GetDriverByName('ESRI Shapefile')
        ds = driver.CreateDataSource(path)
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(4326)

        layer = ds.CreateLayer(path.split('.')[0], srs, ogr.wkbPoint)
        for pair in coords:
            feature = ogr.Feature(layer.GetLayerDefn())

            # Create the point from the Well Known Text
            point = ogr.CreateGeometryFromWkt('POINT(%f %f)' %  pair)
            feature.SetGeometry(point) # Set the feature geometry
            layer.CreateFeature(feature) # Create the feature in the layer
            feature.Destroy() # Destroy the feature to free resources

        # Destroy the data source to free resources
        ds.Destroy()


class PPI(sp_extract.PPI, AbstractExtractor):
    pass


class NFINDR(sp_extract.NFINDR, AbstractExtractor):
    pass


class FCLSAbundanceMapper(AbstractAbundanceMapper):
    '''
    A class for generating an abundance map, containing both the raw spectral
    (mixed) data and the logic to unmix the data into an abundance map with
    the fully constrained least-squares (FCLS) approach. The "full"
    constraints are the sum-to-one and non-negativity constraints. Given
    q endmembers and p spectral bands, the mapper is forced to find
    abundances within a simplex in a (q-1)-dimensional subspace.
    NOTE: The mixed_raster provided in instantation is assumed to correspond
    exactly to the mixing space used to induce endmembers; if mixed_raster
    was MNF- or PCA-transformed, for instance, the endmember spectra provided
    in, e.g., map_abundances(), must match this transformation exactly.
    Arguments:
        mixed_raster    The raster to be unmixed; should NOT be in HSI form
                        but should be MNF- or PCA-transformed to match any
                        endmember spectra provided
        gt              A GDAL GeoTransform tuple for the mixed_raster
        wkt             Projection information as Well-Known Text for the
                        mixed_raster
        nodata          The NoData value for mixed_raster
        processes       The number of processes to create in mixture analysis
    '''
    def __init__(self, *args, **kwargs):
        super(FCLSAbundanceMapper, self).__init__(*args, **kwargs)
        self.mapper = sp_abundance.FCLS()

    def __lsma__(self, cases, endmembers):
        # For regular LSMA with single endmember spectra
        # c is number of pixels, k is number of bands
        c, k = cases.shape if len(cases.shape) > 1 else (1, cases.shape[0])
        return self.mapper.map(cases.reshape((1, c, k)), endmembers,
            normalize = False)

    def __mesma__(self, array_pairs):
        # For multiple endmember spectra, in chunks
        cases, endmembers = array_pairs
        # c is number of pixels, k is number of bands
        c, k = cases.shape if len(cases.shape) > 1 else (1, cases.shape[0])
        return [
            self.mapper.map(cases[i,...].reshape((1, 1, k)), endmembers[i,...],
                normalize = False) for i in range(0, c)
        ]

    def __mesma2__(self, array_pairs):
        # For multiple endmember spectra, pixel-wise
        # NOTE: This pixel-wise implementation might be slower than __mesma__
        #   for large arrays
        cases, endmembers = array_pairs
        # c is number of pixels, k is number of bands
        c, k = cases.shape if len(cases.shape) > 1 else (1, cases.shape[0])
        return self.mapper.map(cases.reshape((1, c, k)), endmembers,
            normalize = False)

    def map_abundance(self, endmembers, pixelwise=False):
        '''
        Arguments:
            endmembers  A numpy.ndarray of endmembers; either (q x p) array
                        of q endmembers and p bands (for regular LSMA) or a
                        (c x q x p) array, where c = m*n, for multiple
                        endmember spectra for each pixel.

        Returns: An (m x n x q) numpy.ndarray (in HSI form) that contains
        the abundances for each of q endmember types.
        '''
        q = endmembers.shape[-2]
        # FCLS with the sum-to-one constraint has an extra degree of freedom so it
        #   is able to form a simplex of q corners in (q-1) dimensions:
        #   q <= n (Settle and Drake, 1993)
        k = q - 1 # Find q corners of simplex in (q-1) dimensions
        endmembers = endmembers[...,0:k]
        shp = self.hsi.shape
        base_array = self.hsi[:,:,0:k].reshape((shp[0] * shp[1], k))

        # Get indices for each process' work range
        work = partition(base_array, self.num_processes, axis=0)

        with ProcessPoolExecutor(max_workers = self.num_processes) as executor:
            # We're working with multiple endmembers
            if endmembers.ndim == 3 and pixelwise:
                result = executor.map(self.__mesma2__, [ # Work done pixel-wise
                    (base_array[i,...], endmembers[i,...]) for i in range(0, base_array.shape[0])
                ])

            elif endmembers.ndim == 3:
                result = executor.map(self.__mesma__, [
                    (base_array[i:j,...], endmembers[i:j,...]) for i, j in work
                ])

            # We're working with a single endmember per class
            else:
                # Curry an unmixing function with the present endmember array
                unmix = partial(self.__lsma__, endmembers = endmembers)
                result = executor.map(unmix, [
                    base_array[i:j,...] for i, j in work
                ])

        combined_result = list(result) # Executes the multiprocess suite
        if endmembers.ndim == 3 and not pixelwise:
            # When chunking with multiple endmembers, we get list of lists
            ext_array = [y for x in combined_result for y in x] # Flatten once
            return np.concatenate(ext_array, axis = 1)\
                .reshape((shp[0], shp[1], q))

        return np.concatenate(combined_result, axis = 1)\
            .reshape((shp[0], shp[1], q))

    def validate_by_forward_model(
            self, ref_image, abundances, ref_spectra=None,
            ref_em_locations=None, dd=False, nodata=-9999, r=10000,
            as_pct=True, convert_nodata=False):
        '''
        Validates LSMA result in the forward model of reflectance, i.e.,
        compares the observed reflectance in the original (mixed) image to the
        abundance predicted by a forward model of reflectance using the
        provided endmember spectra. NOTE: Does not apply in the case of
        multiple endmember spectra; requires only one spectral profile per
        endmember type.
        Arguments:
            ref_image   A raster array of the reference spectra (not MNF-
                        transformed data).
            abundances  A raster array of abundances; a (q x m x n) array for
                        q abundance types (q endmembers).
            ref_spectra With single endmember spectra, user can provide the
                        reference spectra, e.g., the observed reflectance for
                        each endmember (not MNF spectra).
            ref_em_locations With single endmember spectra, user can provide
                        the coordinates of each endmember, so that reference
                        spectra can be extracted for validation.
            dd          True if ref_em_locations provided and the coordinates
                        are in decimal degrees.
            nodata      The NoData value to use.
            r           The number of random samples to take in calculating
                        RMSE.
            as_pct      Report normalized RMSE (as a percentage).
            convert_nodata
                        True to convert all NoData values to zero (as in zero
                        reflectance)
        '''
        rastr = ref_image.copy()
        assert (ref_spectra is not None) or (ref_em_locations is not None), 'When single endmember spectra are used, either ref_spectra or ref_em_locations must be provided'

        if ref_spectra is not None:
            assert ref_spectra.shape[0] == abundances.shape[0], 'One reference spectra must be provided for each endmember type in abundance map'

        else:
            # Get the spectra for each endmember from the reference dataset
            ref_spectra = spectra_at_xy(ref_image, ref_em_locations,
                self.gt, self.wkt, dd = dd)

        # Convert the NoData values to zero reflectance
        if convert_nodata:
            rastr[rastr == nodata] = 0
            ref_spectra[ref_spectra == nodata] = 0

        shp = rastr.shape # Reshape the arrays
        arr = rastr.reshape((shp[0], shp[1]*shp[2]))
        # Generate random sampling indices
        idx = np.random.choice(np.arange(0, arr.shape[1]), r)
        # Get the predicted reflectances
        preds = predict_spectra_from_abundance(ravel(abundances), ref_spectra)
        assert preds.shape == arr.shape, 'Prediction and observation matrices are not the same size'

        # Take the mean RMSE (sum of RMSE divided by number of pixels), after
        #   the residuals are normalized by the number of endmembers
        rmse_value = rmse(arr, preds, idx, n = ref_spectra.shape[0], nodata = nodata).sum() / r
        norm = 1
        if as_pct:
            # Divide by the range of the measured data; minimum is zero
            norm = arr.max()
            return str(round(rmse_value / norm * 100, 2)) + '%'

        return round(rmse_value / norm, 2)


def combine_endmembers_and_normalize(
        abundances, es=(1, 2), at_end=True, nodata=-9999):
    '''
    Combines two endmembers from a fraction image into a single endmember.
    If the original endmember abundances summed to one, they will sum to one
    in the resulting image as well. Arguments:
        abundances  The raster array of endmember abundances
        es          A two-element tuple of the endmembers indices to combine
        at_end      Place the combined endmembers at the end of the array?
        nodata      The NoData value to ignore
    '''
    shp = abundances.shape
    rast = abundances.copy() # Copy raster array
    rast[rast == nodata] = 0 # Replace NoData values
    c0 = rast[es[0], ...] # Get the endmembers to be combined
    c1 = rast[es[1], ...]

    # Stack the remaining bands
    abunds = []
    for e in range(0, shp[0]):
        if e not in es:
            abunds.append(rast[e, ...])

    if at_end:
        comps = (abunds, c0 + c1.reshape(1, shp[1], shp[2]))

    else:
        comps = (c0 + c1.reshape(1, shp[1], shp[2]), abunds)

    rast = None
    return np.vstack(comps)


def convex_hull_graham(points, indices=False):
    '''
    Returns points on convex hull of an array of points in CCW order according
    to Graham's scan algorithm. By Tom Switzer <thomas.switzer@gmail.com>.
    Arguments:
        points      The points for which a convex hull is sought
        indices     True to return a tuple of (indices, hull)
    '''
    TURN_LEFT, TURN_RIGHT, TURN_NONE = (1, -1, 0)

    def cmp(a, b):
        return (a > b) - (a < b)

    def turn(p, q, r):
        return cmp((q[0] - p[0])*(r[1] - p[1]) - (r[0] - p[0])*(q[1] - p[1]), 0)

    def keep_left(hull, r):
        while len(hull) > 1 and turn(hull[-2], hull[-1], r) != TURN_LEFT:
            hull.pop()
        if not len(hull) or hull[-1] != r:
            hull.append(r)
        return hull

    pts_sorted = sorted(points)
    l = reduce(keep_left, pts_sorted, [])
    u = reduce(keep_left, reversed(pts_sorted), [])
    hull = l.extend(u[i] for i in range(1, len(u) - 1)) or l

    if indices:
        return ([points.index(h) for h in hull], hull)

    return hull


def endmembers_by_maximum_angle(
        rast, targets, ref_target, gt=None, wkt=None, dd=False):
    '''
    Locates endmembers in (2-dimensional) feature space as the triad (3-corner
    simplex) that maximizes the angle formed with a reference endmember target.
    Returns the endmember coordinates in feature (not geographic) space.
    Arguments:
        rast        The raster that describes the feature space
        ref_target  The coordinates (in feature space) of a point held fixed
        targets     The coordinates (in feature space) of all other points
        gt          The GDAL GeoTransform
        wkt         The GDAL WKT projection
        dd          True for coordinates in decimal degrees

    Angle calculation from:
    http://stackoverflow.com/questions/2827393/angles-between-two-n-dimensional-vectors-in-python/13849249#13849249
    '''
    def unit_vector(vector):
        # Returns the unit vector of the vector.
        return vector / np.linalg.norm(vector)

    def angle_between(v1, v2):
        # Returns the angle in radians between vectors 'v1' and 'v2'
        v1_u = unit_vector(v1)
        v2_u = unit_vector(v2)
        return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

    # Can accept either a gdal.Dataset or numpy.array instance
    if not isinstance(rast, np.ndarray):
        rastr = rast.ReadAsArray()
        gt = rast.GetGeoTransform()
        wkt = rast.GetProjection()

    else:
        assert gt is not None and wkt is not None, 'gt and wkt arguments required'
        rastr = rast.copy()

    # Get the spectra for these targets; this works in two dimensions only
    ref_spec = spectra_at_xy(rast, (ref_target,), gt, wkt)[...,0:2].reshape((2,))
    target_specs = spectra_at_xy(rast, targets, gt, wkt)[...,0:2]

    # All combinations of 2 of the targets
    combos = list(itertools.combinations(range(max(target_specs.shape)), 2))
    spec_map = [
        [target_specs[i,:] for i in triad] for triad in combos
    ]
    coord_map = [
        [targets[i] for i in triad] for triad in combos
    ]

    # Find vectors from ref_spec, not from origin (by vector subtraction)
    #   If (cx) is the ref_spec vector (line from origin to ref_spec),
    #   and (ca) and (cb) are the vectors to the points that form the angle
    #   (axb), then [(cx) - (ca)] and [(cx) - (cb)] are the vectors from point
    #   x to the points a and b, respectively.
    vectors = [(ref_spec - a, ref_spec - b) for a, b in spec_map]
    angles = [angle_between(v1, v2) for v1, v2 in vectors]
    idx = angles.index(max(angles))
    specs = spec_map[idx] # The optimized spectra
    locs = coord_map[idx] # The optimized coordinates
    specs.insert(0, ref_spec) # Add the reference target
    locs.insert(0, ref_target) # Add the reference coordinates
    return (np.array(specs), locs)


def endmembers_by_maximum_area(
        rast, targets, area_dim=2, gt=None, wkt=None, dd=False):
    '''
    Find up to four (4) endmembers by findng the maximum volume of the mixing
    space defined by every possible combination of endmembers where each
    endmember type appears only once. Arguments:
        rast        The raster whose mixing space is to be investigated
        targets     The pseudo-invariant feature sets, a dictionary
        area_dim    The number of dimensions to use in area calculation;
                    only 2 or 3 dimensions are supported
        gt          The GDAL GeoTransform
        wkt         The GDAL WKT projection
        dd          True for coordinates in decimal degrees
    '''
    def area(a, b, c) :
        # Courtesy of Corentin Lapeyre
        # (http://code.activestate.com/recipes/576896-3-point-area-finder/)
        return 0.5 * np.linalg.norm(np.cross(b-a, c-a))

    spec_map, coord_map = iterate_endmember_combinations(rast, targets,
        ref_target=None, ndim=3, gt=gt, wkt=wkt, dd=dd)

    area_totals = [area(*[f[0:area_dim] for f in each]) for each in spec_map]
    idx = area_totals.index(max(area_totals)) # Which set is optimal?
    specs = spec_map[idx] # The optimized spectra
    locs = list(coord_map[idx])
    return (np.array(specs), locs)


def endmembers_by_maximum_volume(
        rast, targets, ref_target=None, ndim=3, gt=None, wkt=None, dd=False):
    '''
    Arguments:
        rast        The raster that describes the feature space
        targets     The coordinates (in feature space) of all other points
        ref_target  (Optional) Can constrain the volume formed by one point
        ndim        The number of dimensions in which to calculate the solution
        gt          The GDAL GeoTransform
        wkt         The GDAL WKT projection
        dd          True for coordinates in decimal degrees

    Angle calculation from:
    http://stackoverflow.com/questions/2827393/angles-between-two-n-dimensional-vectors-in-python/13849249#13849249

    NOTE: Argument ndim determines how many endmembers are returned; the
    production of combinations depends on a square matrix, so ndim must also
    be equal to or less than the dimensionality of the image array.
    '''
    def calc_volume(spec_map):
        # Calculate the absolute volume of the determinant of the volume spanned
        #   by the construction vectors; this is proportional to the true volume
        #   of the mixing space spanned by these endmembers
        return list(map(np.abs, map(np.linalg.det, spec_map)))

    spec_map, coord_map = iterate_endmember_combinations(
        rast, targets, ref_target, ndim, gt, wkt, dd)

    volumes = calc_volume(spec_map)
    idx = volumes.index(max(volumes))
    specs = spec_map[idx] # The optimized spectra
    locs = list(coord_map[idx]) # The optimized coordinates

    if ref_target is not None:
        locs = list(locs)
        locs.insert(0, ref_target)

    return (np.array(specs), locs)


def endmembers_by_query(rast, query, gt, wkt, dd=False):
    '''
    Returns a list of endmember locations based on a provided query, e.g.:
    > query = rast[1,...] < -25 # Band 2 should be less than -25
    > endmembers_by_query(rast, query, gt, wkt)
    Arguments:
        rast    The raster array to find endmembers within
        query   A NumPy boolean array representing a query in the feature space
        gt      The GDAL GeoTransform
        wkt     The GDAL WKT projection
        dd      True for coordinates in decimal degrees
    '''
    assert isinstance(rast, np.ndarray), 'Requires a NumPy array'
    shp = rast.shape
    idx = np.indices((shp[-2], shp[-1]))

    # Execute query on the indices (pixel locations), then return the coordinates
    return list(pixel_to_xy([
        (x, y) for y, x in idx[:,query].T
    ], gt, wkt, dd=dd))


def hall_rectification(
        reference, subject, out_path, ref_set, sub_set, dd=False,
        nodata=-9999, dtype=np.int32, keys=('High/Bright', 'Low/Dark'),
        verbose=False):
    '''
    Performs radiometric rectification after Hall et al. (1991) in Remote
    Sensing of Environment. Assumes first raster is the reference image and
    that none of the targets are NoData pixels in the reference image (they
    are filtered out in the subject images). Arguments:
        reference   The reference image, a gdal.Dataset
        subject     The subject image, a gdal.Dataset
        out_path    Path to a directory where the rectified images should be stored
        ref_set     Sequence of two sequences: "bright" radiometric control set,
                    then "dark" radiometric control set for reference image
        sub_set     As with ref_set, a sequence of sequences (e.g., list of two
                    lists): [[<bright targets>], [<dark targets]]
        dd          Coordinates are in decimal degrees?
        dtype       Date type (NumPy dtype) for the array; default is 32-bit Int
        nodata      The NoData value to use fo all the rasters
        keys        The names of the dictionary keys for the bright, dark sets,
                    respectively
    '''
    # Unpack bright, dark control sets for subject image
    bright_targets, dark_targets = (sub_set[keys[0]], sub_set[keys[1]])

    # Calculate the mean reflectance in each band for bright, dark targets
    brights = spectra_at_xy(reference, ref_set[keys[0]], dd=dd)
    bright_ref = brights[brights[:,0] != nodata].mean(axis = 0)
    darks = spectra_at_xy(reference, ref_set[keys[1]], dd=dd)
    dark_ref = darks[darks[:,0] != nodata].mean(axis = 0)
    if verbose:
        print('-- Bright reference:', str(np.round(bright_ref, 0).tolist()))
        print('-- Dark reference:', str(np.round(dark_ref, 0).tolist()))

    # Calculate transformation for the target image
    brights = spectra_at_xy(subject, bright_targets, dd=dd) # Prepare to filter NoData pixels
    darks = spectra_at_xy(subject, dark_targets, dd=dd)
    # Get the "subject" image means for each radiometric control set
    mean_bright = brights[brights[:,0] != nodata].mean(axis = 0)
    mean_dark = darks[darks[:,0] != nodata].mean(axis = 0)
    if verbose:
        print('-- Bright subject mean:', str(np.round(mean_bright, 0).tolist()))
        print('-- Dark subject mean:', str(np.round(mean_dark, 0).tolist()))

    # Calculate the coefficients of the linear transformation
    m = (bright_ref - dark_ref) / (mean_bright - mean_dark)
    b = (dark_ref * mean_bright - mean_dark * bright_ref) / (mean_bright - mean_dark)

    arr = subject.ReadAsArray()
    shp = arr.shape # Remember the original shape
    mask = arr.copy() # Save the NoData value locations
    m = m.reshape((shp[0], 1))
    b = b.reshape((shp[0], 1)).T.repeat(shp[1] * shp[2], axis=0).T
    arr2 = ((arr.reshape((shp[0], shp[1] * shp[2])) * m) + b).reshape(shp)
    arr2[mask == nodata] = nodata # Re-apply NoData values

    # Dump the raster to a file
    out_path = os.path.join(out_path, 'rect_%s' % os.path.basename(subject.GetDescription()))
    dump_raster(
        array_to_raster(
            arr2, subject.GetGeoTransform(), subject.GetProjection(),
        dtype=dtype), out_path)


def iterate_endmember_combinations(
        rast, targets, ref_target=None, ndim=3, gt=None, wkt=None, dd=False):
    '''
    Creates all possible combinations of endmembers from a common pool or from
    among groups of possible endmembers (when `targets` is a dictionary). When
    a dictionary is provided, endmember combinations will contain (only) one
    endmember from each group, where the groups are defined by dictionary keys,
    i.e., `{'group1': [(x, y), ...], 'group2': [(x, y), ...], ...}`.
    Arguments:
        rast        Input raster array
        targets     Possible endmembers; as a list or dictionary
        ref_target  (Optional) Constrain the optimization to always include
                    this endmember
        ndim        Number of dimensions to limit the search to
        gt          (Optional) A GDAL GeoTransform, required for array input
        wkt         (Optional) A GDAL WKT projection, required for array input
        dd          True if the target coordinates are in decimal degrees
    '''
    # Can accept either a gdal.Dataset or numpy.array instance
    if not isinstance(rast, np.ndarray):
        rastr = rast.ReadAsArray()
        gt = rast.GetGeoTransform()
        wkt = rast.GetProjection()

    else:
        assert gt is not None and wkt is not None, 'gt and wkt arguments required'
        rastr = rast.copy()

    # `targets` is a dictionary
    if isinstance(targets, dict):
        # Get the spectra for these targets; this works in two dimensions only
        target_specs = {}
        for label, cases in targets.items():
            target_specs[label] = spectra_at_xy(rast, targets[label], gt, wkt)[...,0:ndim]

        ncom = ndim # Determinant only defined for square matrices
        if ref_target is not None:
            assert ndim == len(targets.keys()) + 1, 'Number of groups among target endmembers should be one less than the dimensionality when ref_target is used'

            ncom -= 1 # If reference target used, form combinations with 1 fewer
            ref_spec = spectra_at_xy(rast, (ref_target,), gt, wkt,
                dd=dd)[...,0:ndim].reshape((ndim,))

        # Find all possible combinations of (ncom) of these spectra
        spec_map = list(itertools.product(*[target_specs[label] for label in target_specs.keys()]))
        coord_map = list(itertools.product(*[t[1] for t in targets.items()]))

    else:
        # Get the spectra for these targets; this works in two dimensions only
        target_specs = spectra_at_xy(rast, targets, gt, wkt)[...,0:ndim]
        ncom = ndim # Determinant only defined for square matrices
        if ref_target is not None:
            ncom -= 1 # If reference target used, form combinations with 1 fewer
            ref_spec = spectra_at_xy(rast, (ref_target,), gt, wkt,
                dd=dd)[...,0:ndim].reshape((ndim,))

        # Find all possible combinations of (ncom) of these spectra
        combos = list(itertools.combinations(range(max(target_specs.shape)), ncom))
        spec_map = [[target_specs[i,:] for i in triad] for triad in combos]
        coord_map = [[targets[i] for i in triad] for triad in combos]

    # Add the reference target to each combination
    if ref_target is not None:
        spec_map = list(map(list, spec_map))
        for spec in spec_map:
            # FIXME Cannot use insert with tuples when dictionary input is provided
            spec.insert(0, ref_spec)

    return (spec_map, coord_map)


def normalize_reflectance_within_image(
    rast, band_range=(0, 5), nodata=-9999, scale=100):
    '''
    Following Wu (2004, Remote Sensing of Environment), normalizes the
    reflectances in each pixel by the average reflectance *across bands.*
    This is an attempt to mitigate within-endmember variability. Arguments:
        rast    A gdal.Dataset or numpy.array instance
        nodata  The NoData value to use (and value to ignore)
        scale   (Optional) Wu's definition scales the normalized reflectance
                by 100 for some reason; another reasonable value would
                be 10,000 (approximating scale of Landsat reflectance units);
                set to None for no scaling.
    '''
    # Can accept either a gdal.Dataset or numpy.array instance
    if not isinstance(rast, np.ndarray):
        rastr = rast.ReadAsArray()

    else:
        rastr = rast.copy()

    shp = rastr.shape
    b0, b1 = band_range # Get the beginning, end of band range
    b1 += 1 # Ranges in Python are not inclusive, so add 1
    rastr_normalized = np.divide(
        rastr.reshape((shp[0], shp[1]*shp[2])),
        rastr[b0:b1,...].mean(axis=0).reshape((1, shp[1]*shp[2])).repeat(shp[0], axis=0))

    # Recover original shape; scale if necessary
    rastr_normalized = rastr_normalized.reshape(shp)
    if scale is not None:
        rastr_normalized = np.multiply(rastr_normalized, scale)

    # Fill in the NoData areas from the original raster
    np.place(rastr_normalized, rastr == nodata, nodata)
    return rastr_normalized


def point_to_pixel_geometry(
        points, source_epsg=None, target_epsg=None, pixel_side_length=30):
    '''
    Where points is a list of X,Y tuples and X and Y are coordinates in
    meters, returns a series of OGR Polygons where each Polygon is the
    pixel extent with a given point at its center. Assumes square pixels.
    Arguments:
        points      Sequence of X,Y numeric pairs or OGR Point geometries
        source_epsg The EPSG code of the source projection (Optional)
        target_epsg The EPSG code of the target projection (Optional)
        pixel_side_length   The length of one side of the intended pixel
    '''
    polys = []
    # Convert points to atomic X,Y pairs if necessary
    if isinstance(points[0], ogr.Geometry):
        points = [(p.GetX(), p.GetY()) for p in points]

    source_ref = target_ref = None
    if all((source_epsg, target_epsg)):
        source_ref = osr.SpatialReference()
        target_ref = osr.SpatialReference()
        source_ref.ImportFromEPSG(source_epsg)
        target_ref.ImportFromEPSG(target_epsg)
        transform = osr.CoordinateTransformation(source_ref, target_ref)

    for p in points:
        r = pixel_side_length / 2 # Half the pixel width
        ring = ogr.Geometry(ogr.wkbLinearRing) # Create a ring
        vertices = [
            (p[0] - r, p[1] + r), # Top-left
            (p[0] + r, p[1] + r), # Top-right, clockwise from here...
            (p[0] + r, p[1] - r),
            (p[0] - r, p[1] - r),
            (p[0] - r, p[1] + r)  # Add top-left again to close ring
        ]

        # Coordinate transformation
        if all((source_ref, target_ref)):
            vertices = [transform.TransformPoint(*v)[0:2] for v in vertices]

        for vert in vertices:
            ring.AddPoint(*vert)
        poly = ogr.Geometry(ogr.wkbPolygon)
        poly.AddGeometry(ring)
        polys.append(poly)

    return polys


def predict_spectra_from_abundance(abundances, endmembers):
    '''
    Predicts the mixed image from the endmember spectra and the fractional
    abundances, i.e. R = AS + e, where R is the reflectance of a given pixel,
    S is the matrix of endmember spectra, and A is the matrix of abundances.
    Endmembers should be in (p x q) form, where each column corresponds to the
    spectra of a given endmember. Return either a (p x 1) vector of the
    predicted (mixed) spectra or a (p x (m*n)) matrix of the predicted (mixed)
    spectra for (m*n) total abundance estimates. Arguments:
        abundances  (c x q) vector of abundances for q abundance types for
                    c total pixels (1 or more)
        endmembers  (q x p) vector of endmember spectra for p spectral bands
    '''
    return np.dot(abundances, endmembers).swapaxes(0, 1)


def ravel(arr):
    '''
    Reshapes a (p, m, n) array to ((m*n), p) where p is the number of
    dimensions. Assumes the first axis is the shortest. Arguments:
        arr     A NumPy array with shape (p, m, n)
    '''
    return ravel_and_filter(arr, filter=False)


def ravel_and_filter(arr, filter=True, nodata=-9999):
    '''
    Reshapes a (p, m, n) array to ((m*n), p) where p is the number of
    dimensions and, optionally, filters out the NoData values. Assumes the
    first axis is the shortest. Arguments:
        arr     A NumPy array with shape (p, m, n)
        filter  True to filter out NoData values (otherwise, only ravels)
        nodata  The NoData value; only used in filtering
    '''
    shp = arr.shape

    # If the array has already been raveled
    if len(shp) == 1 and filter:
        return arr[arr != nodata]

    # If a "single-band" image
    if len(shp) == 2:
        arr = arr.reshape(1, shp[-2]*shp[-1]).swapaxes(0, 1)
        if filter:
            return arr[arr != nodata]

    # For multi-band images
    else:
        arr = arr.reshape(shp[0], shp[1]*shp[2]).swapaxes(0, 1)
        if filter:
            return arr[arr[:,0] != nodata]

    return arr


def report_raster_dynamic_range(
        path, bands=(1,2,3,4,5,7), tpl='HDF4_EOS:EOS_GRID:"%s":Grid:sr_band%d', lj=40):
    '''
    Prints out the dynamic range of a given raster, averaged across the bands.
    Arguments:
        path    The file path to a raster
        bands   The bands within which dynamic range should be calculated
        tpl     The template for the GDAL subdataset names
        lj      The left justification (for text output)
    '''
    # ComputeStatistics() returns the (0) minimum, (1) maximum, (2) mean and
    #   (3) standard deviation; DR is max - min
    dr = lambda stats: stats[1] - stats[0]
    stddev = lambda stats: stats[3]
    dr_by_band = [] # Dynamic range by band
    stddev_by_band = []
    rast, gt0, wkt = as_raster(path)

    # If the dataset is an HDF or some other linked dataset
    if rast.GetRasterBand(1) is None:
        for b in bands:
            subrast = gdal.Open(tpl % (path, b))
            try:
                stats = subrast.GetRasterBand(1).ComputeStatistics(False)

            except AttributeError:
                pass

            dr_by_band.append(dr(stats))
            stddev_by_band.append(dr(stats))

    # If, instead, the dataset is a flat stack like a GeoTIFF
    else:
        for i, b in enumerate(bands):
            stats = rast.GetRasterBand(i + 1).ComputeStatistics(False)
            dr_by_band.append(dr(stats))
            stddev_by_band.append(dr(stats))

    print('{:.2f} ({:.0f} max std. dev.) -- {:s}'.format(
        np.mean(dr_by_band), max(stddev_by_band),
            os.path.basename(path).ljust(lj)))


def subtract_endmember_and_normalize(abundances, e):
    '''
    Subtract the endmember at index e from the raster cube and normalize
    the remaining endmembers so that they sum to one. Arguments:
        abundances  A raster array cube of abundance estimates
        e           The index of the endmember to subtract
    '''
    f = e + 1 # Index after endmember index
    shp = abundances.shape
    stack = np.vstack((abundances[0:e, ...], abundances[f:shp[0], ...]))\
        .reshape((shp[0] - 1, shp[1]*shp[2]))

    return np.apply_along_axis(lambda x: x / x.sum(), 0, stack)\
        .reshape((shp[0] - 1, shp[1], shp[2]))