In [34]:
from joblib import Parallel

import os
import rasterio as rio
import numpy as np

from datetime import datetime
from skimage.segmentation import felzenszwalb

from skimage import measure
from skimage.measure import label

import geopandas as gpd

import gdal
import logging as logger

from joblib import Parallel, delayed

In [47]:
np.iinfo(np.int32)

iinfo(min=-2147483648, max=2147483647, dtype=int32)

In [59]:
def segmentImage(loc_NAIPFile, seg_img_dir, overwrite=False):
    print("\tStarting segmentation of NAIP File: %s" % loc_NAIPFile)

    ofile_name = os.path.basename(loc_NAIPFile)[:-4] + "_Segmented.tif"

    o_file = os.path.join(seg_img_dir, ofile_name)

    if not os.path.exists(o_file) or overwrite:
        seg_start = datetime.now()
        #time.sleep(1)
        start = datetime.now()
        print("Segmented file doesn't exist or overwrite set. Creating at %s" % o_file)

        with rio.open(loc_NAIPFile) as inras:
            prof = inras.profile

            #initialize = rio.open(o_file, 'w', **prof).close()  # prevent parallel process from working on same file

            ras_array = inras.read()

            ras_array = np.moveaxis(ras_array, 0, -1)  # scikit-image wants array in (height, width, bands). Rasterio returns (bands, height, width)
            bands_array_seg = segmentArray(ras_array)
        
        #print("DTYPE: ", bands_array_seg.dtype)
        
        #print("SHAPE: ", bands_array_seg.shape)

        #return bands_array_seg.astype(np.int32)
    
        prof.update(
            count=1,
            dtype=np.int32
        )

        with rio.open(o_file, 'w', **prof) as oras:
            oras.write_band(1, bands_array_seg.astype(np.int32))

        seg_end = datetime.now()
        print("\tSegmentation took {}".format(seg_end-seg_start))

    with rio.open(o_file) as ras:
        seg_array = ras.read(1).astype(np.int32)


    return seg_array


def segmentArray(image_array, seg_type="felzenswalb"):
    print("Beginning image segmentation on array")
    seg_start = datetime.now()
    if seg_type == "felzenswalb":
        segments_fz = felzenszwalb(image_array, scale=25, sigma=0.5, min_size=3, multichannel=True)
    else:
        print("Unknown segmentation algorithm. Exiting...")
        raise ValueError

    seg_end = datetime.now()
    print(
        "Felzenszwalb number of segments: {}. \n\t{} elapsed.".format(len(np.unique(segments_fz)), seg_end - seg_start))

    return segments_fz


def calcSegmentMean(labeled_array, regs, in_band):
    mean_start = datetime.now()
    # label_im = label(segments, connectivity=1) + 1
    #regions = measure.regionprops(labeled_array.astype(np.int64), intensity_image=in_band)

    def loopRegions(region, in_array):
        #for r in regions:
        segment = region.label
        bbox = region.bbox
        min_row, max_row, min_col, max_col = bbox


        sub_array = in_array[min_row:min_col, max_row:max_col]
        #print(sub_array)

        sub_array[sub_array == segment] = r.mean_intensity
        #print(sub_array)

        in_array[min_row:min_col, max_row:max_col] = sub_array

        return in_array
    
    print("\tBeginning mean calculation on segments...")
    mean_array = np.copy(labeled_array)
    mean_array = Parallel(n_jobs=4)(delayed(loopRegions)(r, mean_array) for r in regs)
    """
    for r in regs:
        segment = r.label
        bbox = r.bbox
        min_row, max_row, min_col, max_col = bbox

        sub_array = mean_array[min_row:min_col, max_row:max_col]
        # print(sub_array)

        sub_array[sub_array == segment] = r.mean_intensity
        # print(sub_array)

        mean_array[min_row:min_col, max_row:max_col] = sub_array
    """
    print("\t...Mean calculation complete.\n\t{} elapsed.".format(datetime.now() - mean_start))

    return mean_array



def vegIndexCalc(naip_array_list, indicies):
    print("Beginning VegIndexCalcs")
    bandRed = naip_array_list[0].astype(float)
    bandGreen = naip_array_list[1].astype(float)
    bandBlue = naip_array_list[2].astype(float)
    bandNIR = naip_array_list[3].astype(float)

    # IMPORTANT: Because of the soil line value in the SAVI indicies,
    #  all band value must be normalized between 0 and 1.
    bandRed /= 255.0
    bandNIR /= 255.0
    bandBlue /= 255.0

    # Allow division by zero
    np.seterr(divide='ignore', invalid='ignore')

    vi_calcs = {}
    for veg_index in indicies:

        if veg_index == "NDVI":
            vi = (bandNIR - bandRed) / (bandNIR + bandRed)

        elif veg_index == "SAVI":
            l = 0.5
            vi = ((bandNIR - bandRed) / (bandNIR + bandRed + l)) * (1 + l)

        elif veg_index == "MSAVI2":
            vi = ((2 * bandNIR + 1) - np.sqrt(np.square(2 * bandNIR + 1) - (8 * (bandNIR - bandRed)))) / 2

        elif veg_index == "EVI2":
            g = 2.5  # gain factor
            l = 1.0  # soil adjustment factor
            c_one = 6.0  # coefficient
            c_two = 7.5  # coefficient
            #vi = 2.5 * ((bandNIR - bandRed) / (bandNIR + (2.4 * bandRed) + 1))
            vi = g * ((bandNIR - bandRed) / ((bandNIR + c_one) * (bandRed - c_two) * (bandBlue + l)))

        elif veg_index == "OSAVI":
            vi = (bandNIR - bandRed) / (bandNIR + bandRed + 0.16)

        vi_calcs[veg_index] = vi * 1000  # multiply by 1000 so we can convert to int16 later without loosing precision

        del vi

    return vi_calcs


def getSubSetLandsat(naip_path, landsat_file, opath, overwrite=False):
    ssl_start = datetime.now()
    ofile = "Landsat8_" + os.path.basename(naip_path)

    landsat_opath = os.path.join(opath, ofile)

    if not os.path.exists(landsat_opath) or overwrite:
        start = datetime.now()
        reference_f = gdal.Open(naip_path)
        geo_transform = reference_f.GetGeoTransform()
        resx = geo_transform[1]
        resy = geo_transform[5]
        proj = reference_f.GetProjectionRef()
        minx = geo_transform[0]
        maxy = geo_transform[3]
        maxx = minx + (resx * reference_f.RasterXSize)
        miny = maxy + (resy * reference_f.RasterYSize)

        # build landsat tile from naip extent

        if "ndsi" in opath.lower() or "ndwi" in opath.lower():
            resampletype = "bilinear"
        else:
            resampletype = "bilinear"
            # resampletype = "near"

        gdal_warp = "gdalwarp -overwrite -tap -r %s -t_srs %s -tr %s %s -te_srs %s -te %s %s %s %s %s %s" % (
            resampletype, proj, resx, resy, proj, str(minx), str(miny), str(maxx), str(maxy), landsat_file,
            landsat_opath)
        logger.info("Executing gdal_warp operation on %s for footprint of naip file %s" % (landsat_file, naip_path))
        os.system(gdal_warp)

        logger.info("\tFinished qquad for %s landsat in %s" % (landsat_file, str(datetime.now() - ssl_start)))

    with rio.open(landsat_opath) as lras:
        lras_array = lras.read()

    if "ndsi" in opath.lower() or "ndwi" in opath.lower():
        lras_array = lras_array * 1000

    return lras_array.astype(np.int16)


def compoundArrays(array_stack, tiff_tags, arrays_dict):
    for name, array in arrays_dict.items():
        tiff_tags[len(tiff_tags) + 1] = name

        array_stack.append(array.astype(np.int16))

    return array_stack, tiff_tags

In [None]:
"""
start = datetime.now()

seg_mean_band_arrays = []

with rio.open(in_naip) as ras:
    prof = ras.profile
    n_bands = prof["count"]
    for b in range(1, n_bands+1):
        print(b)
        if b == 1:
            ras.update_tags(b,NAME = "RED")
        
        band_array = ras.read(b)
       
        band_array_seg = segmentImage(band_array)

        # Write out segmented image
        #prof.update(
        #    dtype=rio.uint32,
        #    count=1
        #)
        #out_seg = in_naip[:-4] + "_fzseg.tif
        #with rio.open(r"Q:/AARMP_TestBed/NAIP/m_3211042_nw_12_1_20150530_band1_fz_segs.tif", 'w', **prof) as outras:
        #    outras.write_band(1, band_array_seg.astype(rio.uint32))

        output_band_array = calcSegmentMean(in_band=band_array, segments=band_array_seg)

        seg_mean_band_arrays.append(output_band_array)

#print(seg_mean_band_arrays)
veg_arrays_dict = vegIndexCalc(seg_mean_band_arrays, veg_indicies)

for vi_name in veg_indicies:
    seg_mean_band_arrays.append(veg_arrays_dict[vi_name])
"""

In [6]:
def generateStack(loc_NAIPFile): #, training_stack_dir, naip_band_order, veg_indicies):
    print("Starting on NAIP File: %s" % loc_NAIPFile)
    ofile_name = os.path.basename(loc_NAIPFile)[:-4] + "_TrainingStack.tif"

    o_file = os.path.join(training_stack_dir, ofile_name)

    if not os.path.exists(o_file):
        start = datetime.now()
        print("Training stack doesn't exist. Creating at %s" % o_file)

        with rio.open(loc_NAIPFile) as ras:
            prof = ras.profile

            initialize = rio.open(o_file, 'w', **prof).close()  # prevent parallel process for working on same file

            n_bands = prof["count"]

            output_array_stack = []

            tags = dict(naip_band_order)
            ras_array = ras.read()
            ras_array = np.moveaxis(ras_array, 0,
                                    -1)  # scikit-image wants array in (height, width, bands). Rasterio returns (bands, height, width)
            bands_array_seg = segmentImage(ras_array)

            # Identify segments as individual labels
            label_im = label(bands_array_seg, connectivity=1).astype(np.float32) + 1

            del bands_array_seg

            # Iterate through R,G,B,NIR bands and use the segmented image to calculate mean in zones/labels
            for b in range(1, n_bands + 1):
                print(b)

                band_array = ras.read(b)

                seg_mean_band = calcSegmentMean(in_band=band_array, labeled_array=label_im, characteristic="mean")

                output_array_stack.append(seg_mean_band.astype(np.int16))

            # CREATED BANDS FOR SEGMENT SIZE CHARACERISTICS (Area/Perimeter of segment, and % area of bounding box)
            geometric_arrays = calcSegmentMean(in_band=band_array, labeled_array=label_im, characteristic="geometry")
            del label_im

            output_array_stack, tags = compoundArrays(output_array_stack, tags, geometric_arrays)

            # CREATE VEG INDEX ARRAYS
            veg_arrays_dict = vegIndexCalc(output_array_stack, veg_indicies)
            output_array_stack, tags = compoundArrays(output_array_stack, tags, veg_arrays_dict)

            # CREATE LANDSAT INDEX ARRAYS AND APPEND TO MAIN ARRAY WITH TAGS
            # GET ALL LANDSAT BAND ARRAYS AND APPEND TO MAIN ARRAY
            landsat_qquad_array = getSubSetLandsat(loc_NAIPFile, landsat_file, landsat_qquad_dir, overwrite=False)
            for i in range(len(landsat_qquad_array)):
                output_array_stack.append(landsat_qquad_array[i].astype(np.int16))
                tags[len(tags) + 1] = "L8_" + str(i + 1)

            # APPEND NDSI ARRAY TO MAIN ARRAY
            landsat_ndsi_array = getSubSetLandsat(loc_NAIPFile, ndsi_file, ndsi_qquad_dir, overwrite=False)
            output_array_stack.append(landsat_ndsi_array[
                                          0])  # full read has shape (1, 7500, 6900). Must be 7500,6900 for equal shape of other arrays
            tags[len(tags) + 1] = "L8_NDSI"

            # APPEND NDWI ARRAY TO MAIN ARRAY
            landsat_ndwi_array = getSubSetLandsat(loc_NAIPFile, ndwi_file, ndwi_qquad_dir, overwrite=False)
            output_array_stack.append(landsat_ndwi_array[0])
            tags[len(tags) + 1] = "L8_NDWI"

            # naip_end = datetime.now()

            out_array_stack_np = np.stack(output_array_stack, axis=0)
            # print(out_array_stack_np.shape)
            # print(out_array_stack_np.dtype)

        prof.update(
            dtype=rio.int16,
            count=len(out_array_stack_np)
        )

        with rio.open(o_file, 'w', **prof) as outras:
            for n, tag in tags.items():
                outras.update_tags(n, NAME=tag)
            outras.write(out_array_stack_np.astype(rio.int16))

        print("FINISHED")

        end = datetime.now()
        print("\tElapsed %s" % (str(end - start)))

In [5]:
logger.basicConfig(level=logger.INFO)

veg_indicies = ["NDVI", "SAVI", "OSAVI", "MSAVI2", "EVI2"]

naip_band_order = {1: "RED", 2: "GREEN", 3: "BLUE", 4: "NIR"}

base_datadir = os.path.abspath(r"M:\Data")
base_landsatdir = os.path.join(base_datadir, "Landsat8")
ndsi_qquad_dir = os.path.join(base_datadir, "NDSI")
ndwi_qquad_dir = os.path.join(base_datadir, "NDWI")
landsat_qquad_dir = os.path.join(base_landsatdir, "byNAIPDOY_QQuads")

landsat_file = os.path.os.path.join(base_landsatdir, "Landsat1to8_TOA_NAIPAcquiDate_merge_rectified.tif")
# LOCATION OF THE NDSI FILE
ndsi_file = os.path.join(ndsi_qquad_dir, "LandsatOLI_NDSI_30m.tif")
# LOCATION OF THE NDWI FILE
ndwi_file = os.path.join(ndwi_qquad_dir, "LandsatOLI_NDWI_30m.tif")

In [4]:

training_stack_dir = os.path.join(base_datadir, "TrainingImageStack")
if not os.path.exists(training_stack_dir):
    os.mkdir(training_stack_dir)

# Allow division by zero
np.seterr(divide='ignore', invalid='ignore')

print("Reading in class_points_file...")
loc_class_points = os.path.abspath(
    r"Q:\GoogleDrive\AridRiparianProject\WorkingDirectory\classificationPoints_join.shp")
training_data_df = gpd.read_file(loc_class_points, crs={'init': 'epsg:26912'})

Reading in class_points_file...


In [None]:
label_im = label(bands_array_seg, connectivity=1).astype(np.float64) + 1

In [None]:
with rio.open(loc_NAIPFile) as ras:
    band_array = ras.read(1)
    
    seg_mean_band1 = calcSegmentMean(in_band=band_array, labeled_array=label_im, characteristic="mean")
seg_mean_band1

In [None]:
with rio.open(loc_NAIPFile) as ras:
    band_array = ras.read(2)
    
    seg_mean_band2 = calcSegmentMean(in_band=band_array, labeled_array=label_im, characteristic="mean")
seg_mean_band2

In [None]:
with rio.open(loc_NAIPFile) as ras:
    band_array = ras.read(3)
    
    seg_mean_band3 = calcSegmentMean(in_band=band_array, labeled_array=label_im, characteristic="mean")

In [None]:
with rio.open(loc_NAIPFile) as ras:
    band_array = ras.read(4)
    
    seg_mean_band4 = calcSegmentMean(in_band=band_array, labeled_array=label_im, characteristic="mean")

In [60]:
segmentedImagesDir = r"Q:\Arid Riparian Project\Data\test"
bands_array_seg = segmentImage(r"Q:\Arid Riparian Project\Data\NAIP_2015_Compressed\m_3211337_se_12_1_20150725.tif",
                               segmentedImagesDir, overwrite=True)
start = datetime.now()
with rio.open(r"Q:\Arid Riparian Project\Data\NAIP_2015_Compressed\m_3211337_se_12_1_20150725.tif") as ras:
    prof = ras.profile
    #initialize = rio.open(o_file, 'w', **prof).close() # prevent parallel process for working on same file
    n_bands = ras.count

    output_array_stack = []

    tags = dict(naip_band_order)
    #ras_array = ras.read()
    #ras_array = np.moveaxis(ras_array, 0, -1)  # scikit-image wants array in (height, width, bands). Rasterio returns (bands, height, width)
    
     # Identify segments as individual labels
    label_im = label(bands_array_seg, connectivity=1).astype(np.float32) + 1

    # Iterate through R,G,B,NIR bands and use the segmented image to calculate mean in zones/labels
    for b in range(1, n_bands + 1):
        print(b)

        band_array = ras.read(b)

        #bands_array_seg = segmentImage(band_array)
        regions = measure.regionprops(label_im.astype(np.int32), intensity_image=band_array)

        # Identify segments as individual labels
        #label_im = label(bands_array_seg, connectivity=1).astype(np.float32) + 1

        seg_mean_band = calcSegmentMean(in_band=band_array, regs=regions, labeled_array=label_im)

        output_array_stack.append(seg_mean_band.astype(np.int16))

out_array_stack_np = np.stack(output_array_stack, axis=0)
            # print(out_array_stack_np.shape)
            # print(out_array_stack_np.dtype)

prof.update(
    dtype=rio.int16,
    count=len(out_array_stack_np)
) 

with rio.open(r"Q:\Arid Riparian Project\Data\test\seg_test_m_3211337_se_12_1_20150725_4band_mean.tif", 'w', **prof) as outras:
    #for n, tag in tags.items():
    #    outras.update_tags(n, NAME=tag)
    outras.write(out_array_stack_np.astype(rio.int16))
end = datetime.now()

print(end-start)

	Starting segmentation of NAIP File: Q:\Arid Riparian Project\Data\NAIP_2015_Compressed\m_3211337_se_12_1_20150725.tif
Segmented file doesn't exist or overwrite set. Creating at Q:\Arid Riparian Project\Data\test\m_3211337_se_12_1_20150725_Segmented.tif
Beginning image segmentation on array


  min_size=min_size)


Felzenszwalb number of segments: 3052247. 
	0:02:48.317097 elapsed.
	Segmentation took 0:02:54.461148
1
	Beginning mean calculation on segments...


NameError: name 'r' is not defined

In [32]:
with rio.open(r"Q:\Arid Riparian Project\Data\test\seg_test_m_3211337_se_12_1_20150725_4band_mean.tif", 'w', **prof) as outras:
    #for n, tag in tags.items():
    #    outras.update_tags(n, NAME=tag)
    outras.write(out_array_stack_np.astype(rio.int16))

print(end-start)

-1 day, 23:26:16.842730


In [28]:
for n, tag in tags.items():
    print(n,tag)

1 RED
2 GREEN
3 BLUE
4 NIR


In [None]:
count = 0
naip_files = []
for loc_NAIPFile, group in training_data_df.groupby("NAIP_FILE"):
    # print(loc_NAIPFile)
    count += 1
    # loc_NAIPFile.replace("\\", "/")  # normalize for windows paths
    # print(count, " - ", os.path.basename(loc_NAIPFile))
    naip_files.append(loc_NAIPFile)

    print("Starting on NAIP File: %s" % loc_NAIPFile)
    ofile_name = os.path.basename(loc_NAIPFile)[:-4] + "_TrainingStack.tif"

    o_file = os.path.join(training_stack_dir, ofile_name)

    if not os.path.exists(o_file):
        start = datetime.now()
        print("Training stack doesn't exist. Creating at %s" % o_file)

        with rio.open(loc_NAIPFile) as ras:
            prof = ras.profile
            initialize = rio.open(o_file, 'w', **prof).close() # prevent parallel process for working on same file
            n_bands = prof["count"]

            output_array_stack = []

            tags = dict(naip_band_order)
            ras_array = ras.read()
            ras_array = np.moveaxis(ras_array, 0,
                                    -1)  # scikit-image wants array in (height, width, bands). Rasterio returns (bands, height, width)
            bands_array_seg = segmentImage(ras_array)

            # Identify segments as individual labels
            label_im = label(bands_array_seg, connectivity=1).astype(np.float32) + 1

            # Iterate through R,G,B,NIR bands and use the segmented image to calculate mean in zones/labels
            for b in range(1, n_bands + 1):
                print(b)

                band_array = ras.read(b)

                seg_mean_band = calcSegmentMean(in_band=band_array, labeled_array=label_im, characteristic="mean")

                output_array_stack.append(seg_mean_band.astype(np.int16))

            # CREATED BANDS FOR SEGMENT SIZE CHARACERISTICS (Area/Perimeter of segment, and % area of bounding box)
            #geometric_arrays = calcSegmentMean(in_band=band_array, labeled_array=label_im, characteristic="geometry")

            # CREATE VEG INDEX ARRAYS
            #veg_arrays_dict = vegIndexCalc(output_array_stack, veg_indicies)

            #output_array_stack, tags = compoundArrays(output_array_stack, tags, geometric_arrays)
            #output_array_stack, tags = compoundArrays(output_array_stack, tags, veg_arrays_dict)


            # CREATE LANDSAT INDEX ARRAYS AND APPEND TO MAIN ARRAY WITH TAGS
            # GET ALL LANDSAT BAND ARRAYS AND APPEND TO MAIN ARRAY
            landsat_qquad_array = getSubSetLandsat(loc_NAIPFile, landsat_file, landsat_qquad_dir, overwrite=False)
            for i in range(len(landsat_qquad_array)):
                output_array_stack.append(landsat_qquad_array[i].astype(np.int16))
                tags[len(tags) + 1] = "L8_" + str(i + 1)

            # APPEND NDSI ARRAY TO MAIN ARRAY
            landsat_ndsi_array = getSubSetLandsat(loc_NAIPFile, ndsi_file, ndsi_qquad_dir, overwrite=False)
            output_array_stack.append(landsat_ndsi_array[0])  # full read has shape (1, 7500, 6900). Must be 7500,6900 for equal shape of other arrays
            tags[len(tags) + 1] = "L8_NDSI"

            # APPEND NDWI ARRAY TO MAIN ARRAY
            landsat_ndwi_array = getSubSetLandsat(loc_NAIPFile, ndwi_file, ndwi_qquad_dir, overwrite=False)
            output_array_stack.append(landsat_ndwi_array[0])
            tags[len(tags) + 1] = "L8_NDWI"

            # naip_end = datetime.now()

            out_array_stack_np = np.stack(output_array_stack, axis=0)
            # print(out_array_stack_np.shape)
            # print(out_array_stack_np.dtype)

        prof.update(
            dtype=rio.int16,
            count=len(out_array_stack_np)
        ) 

        with rio.open(o_file, 'w', **prof) as outras:
            for n, tag in tags.items():
                outras.update_tags(n, NAME=tag)
            outras.write(out_array_stack_np.astype(rio.int16))

        print("FINISHED")

        end = datetime.now()
        print("\tElapsed %s" % (str(end - start)))
        
    raise ValueError


In [None]:
out_array_stack_np.shape

In [None]:
#with rio.open(o_file+"_1")
prof.update(count=4)

In [None]:
prof.update(count=1)
ofile = o_file[:-4] + "_1.tif"
initialize = rio.open(ofile, 'w', **prof).close()

In [None]:
prof.update(count=14)
with rio.open(ofile, 'w', **prof) as outras:
    for n, tag in tags.items():
        outras.update_tags(n, NAME=tag)
    outras.write(out_array_stack_np.astype(rio.int16))

In [None]:
print(bands_array_seg.shape)
print(label_im.shape)

In [6]:
with rio.open(r"M:\Data\SegmentedNAIPImages\m_3110930_nw_12_1_20150621_Segmented.tif") as ras:
    prof = ras.profile
    bands_array_seg = ras.read(1)
    print(bands_array_seg.shape)

(7650, 6660)


In [7]:
#bands_array_seg = np.moveaxis(bands_array_seg, 0, -1)
print(bands_array_seg.shape)

(7650, 6660)


In [29]:
label_im = label(bands_array_seg, connectivity=1).astype(np.float32) + 1

#empty_ar = np.zeros(bands_array_seg.shape).astype(np.byte)

In [18]:
label_im

array([[  1.00000000e+00,   1.00000000e+00,   1.00000000e+00, ...,
          2.11700000e+03,   2.11700000e+03,   2.11700000e+03],
       [  1.00000000e+00,   1.00000000e+00,   1.00000000e+00, ...,
          2.11700000e+03,   2.11700000e+03,   2.11700000e+03],
       [  1.00000000e+00,   1.00000000e+00,   2.00000000e+00, ...,
          2.11700000e+03,   2.11700000e+03,   2.11700000e+03],
       ..., 
       [  3.98373700e+06,   3.98373700e+06,   3.98373700e+06, ...,
          3.98508700e+06,   3.98508700e+06,   3.98508700e+06],
       [  3.98373700e+06,   3.98373700e+06,   3.98373700e+06, ...,
          3.98508700e+06,   3.98508700e+06,   3.98508700e+06],
       [  3.98373700e+06,   3.98373700e+06,   3.98074900e+06, ...,
          3.98508700e+06,   3.98508700e+06,   3.98508700e+06]], dtype=float32)

In [10]:
regions = measure.regionprops(label_im.astype(np.int32))#, intensity_image=empty_ar)
print("got regions")

got regions


In [11]:
print(label_im.shape)


(7650, 6660)


In [30]:
area_array = np.copy(label_im)

for r in regions:
    segment = r.label
    bbox = r.bbox
    min_row, max_row, min_col, max_col = bbox
    sub_array = np.copy(label_im[min_row:min_col, max_row:max_col]) # BEN
    #print("SUB ARRAY SHAPE: ", sub_array.shape)
    area = r.area
    #print(area)


    sub_array[sub_array == segment] = area
    #print(sub_array.astype(np.int))

    area_array[min_row:min_col, max_row:max_col] = sub_array
    

#area_array = np.moveaxis(area_array, 0, -1)
with rio.open(r"M:\Data\SegmentedNAIPImages\Area1.tif", 'w', **prof) as ras:
    ras.write_band(1, area_array.astype(np.int32))


  transform = guard_transform(transform)


In [27]:
label_im

array([[  60.,   60.,   60., ...,  138.,  138.,  138.],
       [  60.,   60.,   60., ...,  138.,  138.,  138.],
       [  60.,   60.,   18., ...,  138.,  138.,  138.],
       ..., 
       [  20.,   20.,   20., ...,   28.,   28.,   28.],
       [  20.,   20.,   20., ...,   28.,   28.,   28.],
       [  20.,   20.,   91., ...,   28.,   28.,   28.]], dtype=float32)

In [33]:
label_im = label(bands_array_seg, connectivity=1).astype(np.float32) + 1

#empty_ar = np.zeros(bands_array_seg.shape).astype(np.byte)

def calculateGeometry1(seg_array, regs, attrib):
    
    labeled_array = label(seg_array, connectivity=1).astype(np.float32) + 1
    
    out_array = np.copy(labeled_array)

    for r in regs:
        segment = r.label
        bbox = r.bbox
        min_row, max_row, min_col, max_col = bbox
        sub_array = labeled_array[min_row:min_col, max_row:max_col]
        
        if attrib.lower() == "area":
            value = r.area
        elif attrib.lower() == "perim":
            value = r.perimeter
        elif attrib.lower() == "perc_area":
            value = r.extent * 100
            
        sub_array[sub_array == segment] = value
        
        out_array[min_row:min_col, max_row:max_col] = sub_array
    
    return out_array
        
def calculateGeometry(labeled_array, regs):
    start = datetime.now()
    # regions = measure.regionprops(labeled_array.astype(np.int32))  #, intensity_image=empty_ar)
    # print("got regions")

    area_array = np.copy(labeled_array)
    perim_array = np.copy(labeled_array)
    ratio_area_array = np.copy(labeled_array)  # ratio area of pixels in bounding box

    array_dict = {"area": area_array,
                  "perim": perim_array,
                  "perc_area": ratio_area_array}

    for r in regs:
        segment = r.label
        bbox = r.bbox
        min_row, max_row, min_col, max_col = bbox
        sub_array = np.copy(labeled_array[min_row:min_col, max_row:max_col])  # BEN
        #print("SUB ARRAY SHAPE: ", sub_array.shape)

        values = {"area": r.area,
                  "perim": r.perimeter,
                  "perc_area": (r.extent * 100)}
        
        for characteristic, array in array_dict.items():
            ch_sub_array = np.copy(sub_array)

            ch_sub_array[ch_sub_array == segment] = values[characteristic]

            array[min_row:min_col, max_row:max_col] = ch_sub_array
            #print("label img:",labeled_array)
            #print(characteristic, array)
            
            #sub_array[sub_array == segment] = area
            #print(sub_array.astype(np.int))

            #area_array[min_row:min_col, max_row:max_col] = sub_array
        #raise ValueError

    end = datetime.now()
    #print("Elapsed: ", end - start)
    return array_dict

In [22]:
ars = calculateGeometry(label_im, regions)

In [19]:
label_im

array([[  1.00000000e+00,   1.00000000e+00,   1.00000000e+00, ...,
          2.11700000e+03,   2.11700000e+03,   2.11700000e+03],
       [  1.00000000e+00,   1.00000000e+00,   1.00000000e+00, ...,
          2.11700000e+03,   2.11700000e+03,   2.11700000e+03],
       [  1.00000000e+00,   1.00000000e+00,   2.00000000e+00, ...,
          2.11700000e+03,   2.11700000e+03,   2.11700000e+03],
       ..., 
       [  3.98373700e+06,   3.98373700e+06,   3.98373700e+06, ...,
          3.98508700e+06,   3.98508700e+06,   3.98508700e+06],
       [  3.98373700e+06,   3.98373700e+06,   3.98373700e+06, ...,
          3.98508700e+06,   3.98508700e+06,   3.98508700e+06],
       [  3.98373700e+06,   3.98373700e+06,   3.98074900e+06, ...,
          3.98508700e+06,   3.98508700e+06,   3.98508700e+06]], dtype=float32)

In [34]:
for att in ["area", "perim", "perc_area"]:
    print("Starting %s" % att)
    oa = calculateGeometry1(bands_array_seg, regions, att)


    o_file = os.path.join("M:\Data\TrainingImageStack", att + "2.tif")
    with rio.open(o_file, 'w', **prof) as ras:
        ras.write_band(1, oa.astype(np.int32))

Starting area


  transform = guard_transform(transform)


Starting perim
Starting perc_area


In [62]:
label_im

array([[  1.00000000e+00,   1.00000000e+00,   1.00000000e+00, ...,
          2.11700000e+03,   2.11700000e+03,   2.11700000e+03],
       [  1.00000000e+00,   1.00000000e+00,   1.00000000e+00, ...,
          2.11700000e+03,   2.11700000e+03,   2.11700000e+03],
       [  1.00000000e+00,   1.00000000e+00,   2.00000000e+00, ...,
          2.11700000e+03,   2.11700000e+03,   2.11700000e+03],
       ..., 
       [  3.98373700e+06,   3.98373700e+06,   3.98373700e+06, ...,
          3.98508700e+06,   3.98508700e+06,   3.98508700e+06],
       [  3.98373700e+06,   3.98373700e+06,   3.98373700e+06, ...,
          3.98508700e+06,   3.98508700e+06,   3.98508700e+06],
       [  3.98373700e+06,   3.98373700e+06,   3.98074900e+06, ...,
          3.98508700e+06,   3.98508700e+06,   3.98508700e+06]], dtype=float32)

In [50]:
for k,v in ars.items():
    print(k,v.shape)
    o_file = os.path.join("M:\Data\TrainingImageStack",k + ".tif")
    with rio.open(o_file, 'w', **prof) as ras:
        ras.write_band(1, v.astype(np.int32))

area (7650, 6660)


  transform = guard_transform(transform)


perc_area (7650, 6660)
perim (7650, 6660)


In [15]:

#area_array = np.moveaxis(area_array, 0, -1)
with rio.open(r"M:\Data\SegmentedNAIPImages\Area1.tif", 'w', **prof) as ras:
    ras.write_band(1, ar.astype(np.int32))


  transform = guard_transform(transform)


In [32]:
gdal.Open(r"M:\Data\ValleyBottoms\Watersheds\1407\HRNHDPlusRasters1407\fac.tif")