# Bug replication: setting statistics via updage_tags

The following notebook was written to test RasterIO's functionaility, and while working with it I appear to have exposed a bug.  The following code attempts to replicate the issue from scratch.

In [13]:
import rasterio
import os

In [14]:
# download a small (1227x1343) mosaic'ed LANDSAT image for the test.
fname = 'Midwest_Mosaic.tif'
url = 'https://github.com/ebo/pangeo-tutorials/raw/master/data/Landsat_Mosaics/'+fname

if not os.path.exists(fname):
    print("downloading test image file '%s'"%fname)
    import urllib.request
    urllib.request.urlretrieve(url, fname)

In [15]:
def to_tiff(data, fname, template=None, **kwargs):
    import numpy as np

    # check and promote the number of dimentio(1)ns for consistency
    nbands = data.ndim
    if 2 == nbands:
        # expand the array so that it is least 3D (ie stacks of surfaces)
        import numpy as np
        data = np.expand_dims(data,axis=0)
    elif 3 == nbands:
        # nothing to do since it is already 3D
        pass
    else:
        print("Error: to_tiff can only currently deal with 2D and 3D data")
        return

    profile = {}
    tags = {}
    tmpl = None
    if template:
        tmpl = rasterio.open(template,'r')
        profile = tmpl.profile.copy()
        tags = tmpl.tags()

    # the metadata should be appended.  Cache here to 
    # simplify variable replacement below.
    meta = {}
    if 'meta' in profile:
        meta.update(profile['meta'])
    if 'meta' in kwargs:
        meta.update(kwargs['meta'])
    
    # overwrite anything inheritied from the template with 
    # user supplied args
    profile.update(kwargs)
    
    # overwrite bits that write the array as geotiff and 
    # save the cached metadata
    profile['driver'] = 'GTiff'
    profile['count'] = data.shape[0]
    profile['width'] = data.shape[2]
    profile['height'] = data.shape[1]
    profile['meta'] = meta
    
    if 'dtype' not in profile:
        profile['dtype'] = type(data[0,0,0])
        
    # if you do not remove the previously associated .xml file, 
    # then the tags and metadata can get corrupted.
    try:
        os.remove(fname)
        os.remove(fname+".xml")
    except:
        pass
    
    with rasterio.open(fname,'w',**profile) as out:
        for b in range(data.shape[0]):
            print("\nprocessing band %d"%(b+1))
            out.write(data[b].astype(profile['dtype']), b+1)
            
            # not sure what the proper name for per band stats is in QGIS
            if True:
                stats = {
                    'STATISTICS_MINIMUM': np.nanmin(data[b]),
                    'STATISTICS_MAXIMUM': np.nanmax(data[b]),
                    'STATISTICS_MEAN': np.nanmean(data[b]),
                    'STATISTICS_STDDEV': np.nanstd(data[b])}
                out.update_tags(b+1,**stats)
                print("  stats= %s"%str(stats))

        stats = {
                'STATISTICS_MINIMUM': np.nanmin(data),
                'STATISTICS_MAXIMUM': np.nanmax(data),
                'STATISTICS_MEAN': np.nanmean(data),
                'STATISTICS_STDDEV': np.nanstd(data)}
        out.update_tags(**stats)
        print("\n  overall stats= %s\n"%str(stats))

        out.update_tags(**tags)
        if 'tags' in kwargs:
            out.update_tags(**kwargs['tags'])

    del tmpl

In [16]:
outname = os.path.splitext(fname)[0]+"_out.tif"

arr = rasterio.open(fname,'r')
print ("input file: %s"%fname)
print ("output file: %s"%outname)

tags={'status':'testing...'}

# output a new GeoTIFF from the array, and use the profile
# from the input file, and include some additional metadata
to_tiff(arr.read(), outname, template=fname, tags=tags)
#to_tiff(arr.read(), outname,meta=meta)

input file: Midwest_Mosaic.tif
output file: Midwest_Mosaic_out.tif

processing band 1
  stats= {'STATISTICS_MINIMUM': 0, 'STATISTICS_MAXIMUM': 255, 'STATISTICS_MEAN': 105.5556099695302, 'STATISTICS_STDDEV': 36.07316866813157}

processing band 2
  stats= {'STATISTICS_MINIMUM': 0, 'STATISTICS_MAXIMUM': 255, 'STATISTICS_MEAN': 146.58559732890092, 'STATISTICS_STDDEV': 33.19759538004106}

processing band 3
  stats= {'STATISTICS_MINIMUM': 0, 'STATISTICS_MAXIMUM': 255, 'STATISTICS_MEAN': 102.17084511375656, 'STATISTICS_STDDEV': 28.76794259426698}

  overall stats= {'STATISTICS_MINIMUM': 0, 'STATISTICS_MAXIMUM': 255, 'STATISTICS_MEAN': 118.10401747072923, 'STATISTICS_STDDEV': 38.529111337074234}



  transform = guard_transform(transform)


In [19]:
with rasterio.open(outname,'r') as out:
    print (out.shape)
    for b in range(1,4):
        print("band[%d].tags = %s\n"%(b,out.tags(b)))
    

(1227, 1343)
band[1].tags = {'STATISTICS_MAXIMUM': '4', 'STATISTICS_MEAN': '0.0037230081906179', 'STATISTICS_MINIMUM': '-3', 'STATISTICS_STDDEV': '1.0458122260225'}

band[2].tags = {'STATISTICS_MAXIMUM': '255', 'STATISTICS_MEAN': '146.58559732890092', 'STATISTICS_MINIMUM': '0', 'STATISTICS_STDDEV': '33.19759538004106'}

band[3].tags = {'STATISTICS_MAXIMUM': '255', 'STATISTICS_MEAN': '102.17084511375656', 'STATISTICS_MINIMUM': '0', 'STATISTICS_STDDEV': '28.76794259426698'}



In [18]:
# clean up
#os.remove(fname)
#os.remove(outname)