In [32]:
%load_ext chime

import rasterio as rio
from rasterio.windows import from_bounds, transform
from rasterio.features import shapes
from rasterio.mask import mask
import numpy as np
import geopandas as gpd
import pandas as pd
import os
from shapely.geometry import Point, box, Polygon
import math
from rasterio.features import shapes
from datetime import datetime
import itertools
from datetime import datetime
from joblib import Parallel, delayed
from matplotlib import pyplot
from utils import *

rasterDataDir = os.path.abspath(r"R:/ProjectData/PAG2019")

landcover2015 = r"C:/Users/BenJames/Documents/PAG-Data/DirtRoads/pima_landcover_noroads/pima_landcover_noroads.img"
ortho2019Seg_loc = os.path.join(rasterDataDir, r"EPCExtent_30cm/Orthos_Segmentedv3/Ortho2019SegmentedV3.vrt")
landcoverExtent_loc = r"../Vectors/LULC2015_Footprint.gpkg"
#ortho30cmvrt_loc = os.path.join(rasterDataDir, r"EPCExtent_30cm/Orthos/EPC_30cmOrtho_2019.vrt")

with rio.open(landcover2015) as src:
    bnds = src.bounds
    landcover2015_extent = box(*bnds)

subIndex_loc = "../Vectors/Ortho_5kSubIndex.gpkg"
targetboxes_loc = "../Vectors/targetBoxes2019.gpkg"
targetboxes = gpd.read_file(targetboxes_loc)
tileIndex = gpd.read_file(subIndex_loc)
landcoverTileIndex = gpd.read_file(landcoverExtent_loc)

hag_tindex_loc = "../vectors/HAG2015_tindex.gpkg"
hag_tindex = gpd.read_file(hag_tindex_loc)
targetboxes = targetboxes[targetboxes.within(hag_tindex.unary_union)]

The chime extension is already loaded. To reload it, use:
  %reload_ext chime


  for f in features_lst:


In [15]:
classes = ["PondsLakes", "Pool", "Dense Vegetation", "Sparse Vegetation", "Barren", "Irrigated Lands/Turf",  "Asphault", "Other Impervious", "Structure"]

- Water - Deep and/or turbid water. Ponds & Lakes
- Pools - Clear and shallow water with concrete substrate. Backyard pools and public pools.
- Dense Vegetation - High vegetation index value - Trees, bushes
- Sparse Vegetation - Low vegetation index values - shrubs, cacti
- Irrigated Lands/Turf - ground-level, high vi value - Grasses, fields, agriculture
- Barren - ground-level, dirt or rock
- Asphault - generally newer asphault associated with roads
- Other Impervious - concrete, dirty asphault, older asphault
- Structures - impervious surfaces elevated off the ground - generally fixed built structures. Could be cars/trailers/RVs

In [81]:
def delOSMColumns(df, exception):
    mandatory = ['osm_id', 'osm_way_id', 'name', 'type', "other_tags", "geometry"]
    mandatory.append(exception)
    for col in df.columns.tolist():
        if col not in mandatory:
            del df[col]
    return df


def pullTagValues(other_tags, target_tag):
    return None if other_tags is None or target_tag not in other_tags else ",".join([tag.split("=>")[1].replace('"','') for tag in other_tags.split(",") if target_tag in tag])


def getFeaturesInBounds(landcover_ras, bnds, landcoverValue=None, msaviUpperLimit=None, msaviLowerLimit=None):
    polys = []
    try:
        with rio.open(landcover_ras) as src:
            twin = from_bounds(bnds[0], bnds[1], bnds[2], bnds[3], transform=src.transform)
            wtrans = transform(twin, src.transform)
            nd = src.nodata
            window_array = src.read(1, window=twin)

        allMask = np.ones(window_array.shape).astype(bool)

        if msaviUpperLimit or msaviLowerLimit:
            with rio.open(ortho2019Seg_loc) as src:
                twin = from_bounds(*bnds, transform=src.transform)
                msavi_array = src.read(5, window=twin, out_shape=window_array.shape)
                if (msaviUpperLimit <= 255) & (msavi_array.dtype == np.uint16):
                    #print("converting limits to uint16")
                    msaviUpperLimit = (msaviUpperLimit/np.iinfo(np.uint8).max)*np.iinfo(np.uint16).max
                    msaviLowerLimit = (msaviLowerLimit/np.iinfo(np.uint8).max)*np.iinfo(np.uint16).max
                vegMask = (msavi_array <= msaviUpperLimit) & (msavi_array > msaviLowerLimit)
        else:
            vegMask = None

        if landcoverValue:
            landcoverMask = window_array == landcoverValue
        else:
            landcoverMask == None

        if landcoverMask is not None:
            allMask = landcoverMask & allMask
        if vegMask is not None:
            allMask = vegMask & allMask

        targetFeatures = shapes(window_array, allMask, transform=wtrans)
        targetFeatures = [feat for feat in targetFeatures]

        if len(targetFeatures) != 0:
            for feat in targetFeatures:
                polys.append(Polygon(feat[0]["coordinates"][0]))
    except Exception as e:
        print(f"Error on bounds {bnds}. Returning empty list\n{e}")
        
    return polys

In [33]:
osmPolygons_regional_loc = "../vectors/osmPolygons_regional.gpkg"
if not os.path.exists(osmPolygons_regional_loc):
    print(f"Creating {osmPolygons_regional_loc}...")
    osmPoly_loc = r"../Vectors/arizona-latest.osm.20201215.gpkg"
    osm_polygons = gpd.read_file(osmPoly_loc, layer="multipolygons").to_crs("epsg:2868")
    osm_polygons.geometry = osm_polygons.geometry.buffer(0)
    osm_polygons = osm_polygons[osm_polygons.intersects(landcoverTileIndex.unary_union)]
    osm_polygons.to_file(osmPolygons_regional_loc)
else:
    print(f"Reading in {osmPolygons_regional_loc}...")
    osm_polygons = gpd.read_file(osmPolygons_regional_loc)
    
%chime

Reading in ../vectors/osmPolygons_regional.gpkg...


  for f in features_lst:


----------------------------------------------
# PondsLakes & Pools

In [41]:
from rasterio.plot import show
# Get Water# natural, man-made, leisure (pool), landuse (basin, reservoir)
osm_water = "../vectors/osmwater_2020.gpkg"

if not os.path.exists(osm_water):
    print(f"Creating {osm_water}")
    osm_polyWater = osm_polygons[osm_polygons.natural == 'water'].copy().reset_index()
    osm_polyWater = delOSMColumns(osm_polyWater, "natural")
    osm_polyWater["water"] = osm_polyWater.other_tags.apply(lambda ot: pullTagValues(ot, "water"))
    display(osm_polyWater.head())

    osm_polyWater["intermittent"] = osm_polyWater.other_tags.apply(lambda ot: pullTagValues(ot, "intermittent"))
    osm_polyWater = osm_polyWater[(~pd.isnull(osm_polyWater.water)) & (osm_polyWater.intermittent != "yes")
                                 & (osm_polyWater.water.str.lower() != "river") & (osm_polyWater.water.str.lower() != "wash")]
    
    osm_polyWater.reset_index(drop=True, inplace=True)

    if "review" not in osm_polyWater.columns.tolist():
        osm_polyWater["review"] = None

    for i, row in osm_polyWater.copy().iterrows():
        if row.review is not None:
            continue
        buffered = row.geometry.centroid.buffer(100)
        bndbox = buffered.bounds
        with rio.open(ortho30cmvrt_loc) as src:
            winb = from_bounds(bndbox[0], bndbox[1], bndbox[2], bndbox[3], transform=src.transform)
            raster = src.read(window=winb)
        if 0 not in raster.shape:    
            show(raster[:3])
            result = input(f"Enter eval for {i} of {len(osm_polyWater)} (if water enter 0, to review enter 1, if not water enter 666)\n")
        else:
            result = 666

        osm_polyWater.at[i, 'review'] = result

    osm_polyWater.to_file(osm_water, driver="GPKG")
    print(f"Wrote out to {osm_water}")
else:
    print(f"Reading in {osm_water}")
    osm_polyWater = gpd.read_file(osm_water)
    osm_polyWater = osm_polyWater[osm_polyWater.review!='666']
        
%chime

Reading in ../vectors/osmwater_2020.gpkg


  for f in features_lst:


In [60]:
water_2015_loc = r"../vectors/water_2015.gpkg"
pondsLakes_2015_loc = r"../vectors/pondsLakes_2015.gpkg"
poolsPoly_loc  = r"../vectors/poolsPolys.gpkg"

water_val_2015 = 1
if not os.path.exists(poolsPoly_loc):
    osmPools_2020 = osm_polyWater[(osm_polyWater.review == '0') & (osm_polyWater.water == "pool")]
    print(f"Creating {poolsPoly_loc}")
    if not os.path.exists(water_2015_loc):
        print(f"Creating {water_2015_loc}")
        t1 = datetime.now()
        water_polys = Parallel(n_jobs=10, verbose=10)(delayed(getFeaturesInBounds)(landcover, row.geometry.bounds, landcoverValue=water_val_2015) for i, row in tileIndex.iterrows())
        %chime
        print(datetime.now()-t1)
        allPolys = list(itertools.chain.from_iterable(water_polys))
        water_2015 = gpd.GeoDataFrame(geometry=allPolys, crs="epsg:2868")
        water_2015["Area"] = water_2015.geometry.area
        water_2015.to_file(water_2015_loc, driver="GPKG")
    else:
        print(f"Reading in {water_2015_loc}")
        water_2015 = gpd.read_file(water_2015_loc)

    
    non_osmPondsLakes = water_2015[~water_2015.intersects(osm_polyWater.unary_union)].copy()
    # merge adjacent geometries
    non_osmPondsLakes.geometry = non_osmPondsLakes.geometry.buffer(3)#.explode().buffer(-3)
    non_osmPondsLakes["Type"] = "water"
    non_osmPondsLakes = non_osmPondsLakes.dissolve(by="Type")
    non_osmPondsLakes = gpd.GeoDataFrame(geometry = [g for g in non_osmPondsLakes.geometry.values[0]], crs = non_osmPondsLakes.crs)
    non_osmPondsLakes.geometry = non_osmPondsLakes.geometry.buffer(-3)
    non_osmPondsLakes["Area"] = non_osmPondsLakes.geometry.area

    pools_2015 = non_osmPondsLakes[(non_osmPondsLakes.Area < 500) & (non_osmPondsLakes.Area > 200)].copy()

    pools = pd.concat([osmPools_2020, pools_2015])
    
    pools.to_file(poolsPoly_loc, driver="GPKG")
else:
    print(f"Reading in {poolsPoly_loc}")
    pools = gpd.read_file(poolsPoly_loc)

%chime

Creating ../vectors/poolsPolys.gpkg
Reading in ../vectors/water_2015.gpkg


In [74]:
pondsLakesPoly_loc = r"../vectors/pondsLakesPolys.gpkg"

if not os.path.exists(pondsLakesPoly_loc):
    print(f"Creating {pondsLakesPoly_loc}...")
    
    if not os.path.exists(pondsLakes_2015_loc):
        pondsLakes_2015 = non_osmPondsLakes[non_osmPondsLakes.area>3000]
        pondsLakes_2015.reset_index(drop=True, inplace=True)
        if "review" not in pondsLakes_2015.columns.tolist():
            print("HERE")
            pondsLakes_2015["review"] = None

        for i, row in pondsLakes_2015.copy().iterrows():
            if row.review is not None:
                continue
            buffered = row.geometry.centroid.buffer(100)
            bndbox = buffered.bounds
            with rio.open(ortho30cmvrt_loc) as src:
                winb = from_bounds(bndbox[0], bndbox[1], bndbox[2], bndbox[3], transform=src.transform)
                raster = src.read(window=winb)
            if 0 not in raster.shape:    
                plt = show(raster[:3])
                result = input(f"Enter eval for {i} of {len(osm_polyWater)} (if water enter 0, to review enter 1, if not water enter 666)\n")
            else:
                result = 666
            pondsLakes_2015.at[i, 'review'] = result

        pondsLakes_2015 = pondsLakes_2015[pondsLakes_2015.review=='0']
        pondsLakes_2015.to_file(pondsLakes_2015_loc, driver="GPKG")
    else:
        print(f"Reading in {pondsLakes_2015_loc}")
        pondsLakes_2015 = gpd.read_file(pondsLakes_2015_loc)

    osmPondsLakes_2020 = osm_polyWater[(osm_polyWater.review == '0') & (osm_polyWater.water != "pool")]

    pondsLakes = pd.concat([osmPondsLakes_2020, pondsLakes_2015])
    del pondsLakes["Area"]
    pondsLakes["Area"] = pondsLakes.geometry.area
    pondsLakes.to_file(pondsLakesPoly_loc, driver="GPKG")
else:
    print(f"Reading in {pondsLakesPoly_loc}...")
    pondsLakes = gpd.read_file(pondsLakesPoly_loc)
    
%chime

Creating ../vectors/pondsLakesPolys.gpkg...
Reading in ../vectors/pondsLakes_2015.gpkg


-------------------------------------------

# Buildings

In [75]:
structuresPoly_loc = r"../vectors/StructuresPoly.gpkg"

structures_2015_loc = r"../vectors/structures_2015.gpkg"
osmBuildings_loc = "../vectors/osmBuildings_2020.gpkg"

if not os.path.exists(structuresPoly_loc):
    if not os.path.exists(osmBuildings_loc):
        print(f"Creating {osmBuildings_loc}")
        osm_polyBuildings = osm_polygons[~(pd.isnull(osm_polygons.building))].copy().reset_index()
        osm_polyBuildings = delOSMColumns(osm_polyBuildings, "building")
        osm_polyBuildings["buildingTag"] = osm_polyBuildings.other_tags.apply(lambda ot: pullTagValues(ot, "building"))
        osm_polyBuildings["area"] = osm_polyBuildings.geometry.area
        osm_polyBuildings.to_file(osmBuildings_loc, driver="GPKG")
    else:
        print(f"Reading in {osmBuildings_loc}")
        osm_polyBuildings = gpd.read_file(osmBuildings_loc)

    print(osm_polyBuildings.building.unique())
    # remove parking (elevated outdoor may unnecessarily confuse model), ruins, bunkers, collapsed, construction, stable, bridge
    bad_buildings = ["parking", "ruins", "bunker", "collapsed", "construction", "stable", "bridge"]
    osm_polyBuildings = osm_polyBuildings[~osm_polyBuildings["building"].isin(bad_buildings)]
    print(osm_polyBuildings.shape)

    structureVal = 7

    if not os.path.exists(structures_2015_loc):
        print(f"Creating {structures_2015_loc}")
        t1 = datetime.now()

        structures_polys = Parallel(n_jobs=10, verbose=5)(delayed(getFeaturesInBounds)(landcover, row.geometry.bounds, landcoverValue=structureVal, msaviUpperLimit=None, msaviLowerLimit=None) for i, row in targetboxes.iterrows())

        allPolys = list(itertools.chain.from_iterable(structures_polys))
        structures_2015 = gpd.GeoDataFrame(geometry=allPolys, crs="epsg:2868")
        structures_2015["area"] = structures_2015.geometry.area
        structures_2015.to_file(structures_2015_loc, driver="GPKG")
        print(datetime.now()-t1)
    else:
        print(f"Reading in {structures_2015_loc}")
        structures_2015 = gpd.read_file(structures_2015_loc)

    # osm buildings are offset (different imagery), so filter out anything below 1500 square feet
    osm_polyBuildings = osm_polyBuildings[osm_polyBuildings.area>1500]
    # filtering spatial join much faster than usual intersect
    osmBuildings_non2015 = gpd.sjoin(osm_polyBuildings, structures_2015, op="intersects", how="left")
    osmBuildings_non2015 = osmBuildings_non2015[pd.isnull(osmBuildings_non2015.index_right)]

    structuresPoly = pd.concat([structures_2015, osmBuildings_non2015])
    structuresPoly["area"] = structuresPoly.geometry.area
    structuresPoly.to_file(structuresPoly_loc, driver="GPKG")
else:
    print(f"Reading in {structuresPoly_loc}...")
    structuresPoly = gpd.read_file(structuresPoly_loc)

%chime

Creating ../vectors/osmBuildings_2020.gpkg
['office' 'university' 'yes' 'school' 'hospital' 'industrial' 'church'
 'apartments' 'commercial' 'roof' 'hotel' 'stadium' 'house' 'dormitory'
 'residential' 'shed' 'train_station' 'public' 'retail' 'carport'
 'college' 'kindergarten' 'parking' 'bridge' 'storage_tank'
 'central_office' 'terrace' 'garages' 'garage' 'ruins' 'Commercial'
 'detached' 'manufacture' 'barn' 'grandstand' 'stable' 'warehouse'
 'greenhouse' 'collapsed' 'ses' 'hangar' 'bunker' 'water_tower'
 'government' 'static_caravan' 'service' 'construction'
 'semidetached_house' 'pavilion']
(141425, 9)
Creating ../vectors/structures_2015.gpkg


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  52 tasks      | elapsed:    2.4s
[Parallel(n_jobs=10)]: Done 125 out of 125 | elapsed:    4.0s finished


0:00:16.219002


---------------------

# Dense Veg

In [140]:
denseVeg_2015_loc = r"../vectors/TrainingData/denseVeg_2015.gpkg"
treesShrubs_val = 2

if not os.path.exists(denseVeg_2015_loc):
    print(f"Creating {denseVeg_2015_loc}")
    t1 = datetime.now()

    denseVeg_polys = Parallel(n_jobs=10, verbose=5)(delayed(getFeaturesInBounds)(landcover2015, row.geometry.bounds, landcoverValue=treesShrubs_val, msaviUpperLimit=255, msaviLowerLimit=135) for i, row in targetboxes.iterrows())

    allPolys = list(itertools.chain.from_iterable(denseVeg_polys))
    denseVeg_2015 = gpd.GeoDataFrame(geometry=allPolys, crs="epsg:2868")
    denseVeg_2015["area"] = denseVeg_2015.geometry.area
    denseVeg_2015.to_file(denseVeg_2015_loc, driver="GPKG")
    print(datetime.now()-t1)
else:
    print(f"Reading in {denseVeg_2015_loc}")
    denseVeg_2015 = gpd.read_file(denseVeg_2015_loc)

%chime

Creating ../vectors/TrainingData/denseVeg_2015.gpkg


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  52 tasks      | elapsed:  3.2min
[Parallel(n_jobs=10)]: Done  99 out of  99 | elapsed:  5.5min finished


0:06:38.155543


# SPARSE VEG AND BARREN

In [82]:
ruralRegions_loc = r"../vectors/DesertBarrenVegetation_2015.gpkg"
rural_regions = gpd.read_file(ruralRegions_loc)
rural_regions = rural_regions[rural_regions.within(hag_tindex.unary_union)]

tileIndexBuff = tileIndex.copy()
tileIndexBuff["geometry"] = tileIndexBuff["geometry"].apply(lambda g: box(*g.buffer(-50).bounds))

rural_regions = gpd.overlay(rural_regions, tileIndexBuff, how="union")
rural_regions = rural_regions[~pd.isnull(rural_regions["Area"])]

rural_regions.to_file(ruralRegions_loc)

  for f in features_lst:


In [135]:
from shapely.geometry import box
from rasterio import features
from rasterio.windows import transform as wtransform

In [137]:
barrenSparse_2015Urban_loc = r"../vectors/BarrenPolys_MidTown2015.gpkg"
def barrenFrom2015(polygon):
    with rio.open("C:/Users/BenJames/Documents/PAG-Data/DirtRoads/pima_landcover_noroads/pima_landcover_noroads.img") as src:
        win = from_bounds(*polygon.bounds, src.transform)
        win_trans = wtransform(win, src.transform)
        a = src.read(1, window=win)
        mask = a == 5
        shapes = [Polygon(poly[0]["coordinates"][0]) for poly in features.shapes(a, mask=mask, transform=win_trans)]
        shapes_gdf = gpd.GeoDataFrame({"geometry":shapes}, geometry="geometry", crs=src.crs)
        
    return shapes_gdf
        
        
inPoly = box(984278,436946,1009295,456063)
barrenPolys = barrenFrom2015(inPoly)
barrenPolys.to_file(barrenSparse_2015Urban_loc, driver="GPKG")

In [545]:
def generatePoints(df, totalPointsDF, minPerFeature):
    
    df["Area"] = df.geometry.area
    remainderPoints = totalPointsDF - (minPerFeature * len(df))
    total_area = df.Area.sum()
    #df["POT"] = df["Area"].apply(lambda a: a/total_area)
    #df["NumPoints"] = df["POT"].apply(lambda pot: int(minPerFeature+(pot*remainderPoints)))

    allPoints = []
    for i, row in df.iterrows():
        bnds = row.geometry.bounds
        featurePoints = []
        while (len(featurePoints) < row.NumPoints) :
            #print(f"WHILE {len(featurePoints), row.NumPoints}")
            x = random.uniform(bnds[0], bnds[2])
            y = random.uniform(bnds[1], bnds[3])
            point = Point(x,y)
            if point.intersects(row.geometry):
                featurePoints.append(point)
        allPoints += featurePoints
    return allPoints

In [338]:
barrenSparseV_loc = "../OtherData/TrainingData/barrenSparseVPoints.gpkg"

if not os.path.exists(barrenSparseV_loc):
    # masking raster with polygon and turning into shapes takes way to long. Since we know these area contain only one of three values denseV, sparseV, or barren, justsparseVegPoints_loc and filter those
    print(f"Creating {barrenSparseV_loc}...")

    totalPoints = 10 * 1000000
    minPoints = 5000
    barrenSparsePoints = generatePoints(rural_regions, totalPoints, minPoints)
    print(f"Created {len(barrenSparsePoints)} random points")
    
    subSize = 10000
    t1 = datetime.now()
    allValues=[]
    with rio.open(ortho2019Seg_loc) as src:
        for i in range(0, len(barrenSparsePoints), subSize):
            if i % 500000 == 0 and i != 0:
                print(i)
            pointsSubset = barrenSparsePoints[i:i+subSize]
            xys = [(point.x, point.y) for point in pointsSubset]
            values = [value[0] for value in src.sample(xys, indexes=5)]
            allValues += values

    t2 = datetime.now()
    print(t2-t1)

    barrenSparseV = gpd.GeoDataFrame({"MSAVI":allValues}, geometry=barrenSparsePoints, crs="epsg:2868")
    barrenSparseV.to_file(barrenSparseV_loc, driver="GPKG")
    

else:
    print(f"Reading in {barrenSparseV_loc}...")
    barrenSparseV = gpd.read_file(barrenSparseV_loc)

%chime

500000
1000000
1500000
2000000
2500000
3000000
3500000
4000000
4500000
5000000
5500000
6000000
6500000
7000000
7500000
8000000
8500000
9000000
9500000
3:36:29.633577
6747567 2682605 569726


___________________________
# Irrigated Land

2015 irrigated classification with MSAVI values greater than the 135 cutoff (dense veg)

In [80]:
irrigatedLand_2015_loc = r"../vectors/irrigatedLand_2015.gpkg"
irrigatedVal = 3

if not os.path.exists(irrigatedLand_2015_loc):
    print(f"Creating {irrigatedLand_2015_loc}")
    targetboxes["geometry"] = targetboxes["geometry"].apply(lambda g: box(*g.buffer(-50).bounds))
    t1 = datetime.now()

    irrigated_polys = Parallel(n_jobs=10, verbose=5)(delayed(getFeaturesInBounds)(landcover2015, row.geometry.bounds, irrigatedVal, msaviUpperLimit=255, msaviLowerLimit=135) for i, row in targetboxes.iterrows())

    allPolys = list(itertools.chain.from_iterable(irrigated_polys))
    irrigated_2015 = gpd.GeoDataFrame(geometry=allPolys, crs="epsg:2868")
    irrigated_2015["Area"] = irrigated_2015.geometry.area
    
    irrigated_2015["Area"] = irrigated_2015.geometry.area
    irrigated_2015.to_file(irrigatedLand_2015_loc, driver="GPKG")
    print(datetime.now()-t1)
else:
    print(f"Reading in {irrigatedLand_2015_loc}")
    irrigated_2015 = gpd.read_file(irrigatedLand_2015_loc)

print(irrigated_2015["Area"].sum())
%chime

Creating ../vectors/irrigatedLand_2015.gpkg


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  52 tasks      | elapsed:  2.2min
[Parallel(n_jobs=10)]: Done  99 out of  99 | elapsed:  4.5min finished


0:04:33.527592
22109376.0


___________________________
# Asphault

Drop points along osm road lines to ensure they fall on asphault
Buffer osm roads to create polygons, drop points within polygons

In [9]:
censusPlaces_loc = r"Q:\Projects\2019ImageryAnalysis\vectors\tl_2021_04_place.shp"
osm_roads_loc = "../vectors/osm_arizona_20210406_lines_pima_roads.gpkg"
asphaultPoly_loc = "../vectors/asphaultPoly.gpkg"
target_place = gpd.read_file(censusPlaces_loc).to_crs(tileIndex.crs)
target_place = target_place[target_place.NAME.isin(["Tucson", "Oro Valley", "Casas Adobes", "Drexel Heights", "Vail", "Green Valley", "Sahuarita"])]
urbanboxes = tileIndex[(tileIndex.intersects(target_place.unary_union)) & (tileIndex.within(landcoverTileIndex.unary_union))].sample(n=75)
#urbanboxes = gpd.read_file("../vectors/UrbanBoxes.gpkg")

buffer_size = 5

if not os.path.exists(asphaultPoly_loc):
    print(f"Creating {asphaultPoly_loc}...")
    osm_roads_all = gpd.read_file(osm_roads_loc)
    roadboxes = pd.concat([targetboxes, urbanboxes])
    roadboxes["geometry"] = roadboxes["geometry"].apply(lambda g: box(*g.buffer(-50).bounds))
    bad_roads = ["abandonded", "bridleway", "construction", "elevator", "footway", "living_street", "path", "pedestrian", "proposed", "steps","unclassified"]
    osm_roads = osm_roads_all[
        (osm_roads_all.other_tags.str.contains("asphault")) 
        | (osm_roads_all.other_tags.str.contains("parking_aisle"))
        | (
            (~osm_roads_all.highway.isin(bad_roads))
        )
        | (
            (osm_roads_all.highway == "residential")
            & (~pd.isnull(osm_roads_all.name)) 
            & (osm_roads_all.intersects(urbanboxes.to_crs(osm_roads_all.crs).unary_union))
        )
    ].copy()
    #osm_roads.to_file("../pavedRoads_20211228.gpkg", driver="GPKG")
    osm_roads.to_crs(roadboxes.crs, inplace=True)
    osm_roads["geometry"] = osm_roads.buffer(buffer_size)
    roads_targets = gpd.clip(osm_roads, roadboxes)
    roads_targets = gpd.overlay(roads_targets, roadboxes[roadboxes.intersects(roads_targets.unary_union)], how="union")
    roads_targets = roads_targets[~pd.isnull(roads_targets["osm_id"])]
    roads_targets = roads_targets.dissolve(by=["name", "path", "row"]).reset_index().explode().reset_index(drop=True)
    roads_targets["Area"] = roads_targets.geometry.area
    roads_targets.to_file(asphaultPoly_loc, driver="GPKG")
else:
    print(f"Reading in {asphaultPoly_loc}...")
    road_targets = gpd.read_file(asphaultPoly_loc)

%chime


Reading in ../vectors/asphaultPoly.gpkg...


  for f in features_lst:


num_asphault = 2.5 * 1000000
minPoints = 1
remainderPoints = num_asphault - (minPoints * len(roads_targets))
total_area = roads_targets.Area.sum()
roads_targets["POT"] = roads_targets["Area"].apply(lambda a: a/total_area)
roads_targets["NumPoints"] = roads_targets["POT"].apply(lambda pot: int(minPoints+(pot*remainderPoints)))

allPoints = []
for i, row in roads_targets.iterrows():
    if i%1000 == 0 and i != 0:
        print(f"index:{i} of {len(roads_targets)}")
    bnds = row.geometry.bounds
    featurePoints = []
    while len(featurePoints) < row.NumPoints:
        x = random.uniform(bnds[0], bnds[2])
        y = random.uniform(bnds[1], bnds[3])
        point = Point(x,y)
        if point.intersects(row.geometry):
            featurePoints.append(point)
    allPoints += featurePoints
print(len(allPoints))

asphaultPoints = gpd.GeoDataFrame(geometry=allPoints, crs=roads_targets.crs)
asphaultPoints.to_file("../OtherData/TrainingData/asphaultPoints.gpkg", driver="GPKG")

%chime

____________________________________
# Impervious

In [130]:
%%time
amexSidewalks_loc = "../vectors/Amex_ADAAssetsRawCalcs_COT.gpkg"
cotSidewalks = gpd.read_file(amexSidewalks_loc, layer="Amex_ADAAssetsRawCalcs_COT_Sidewalks")
cotSidewalks = cotSidewalks[cotSidewalks["Material"].str.lower() == "concrete"]
cotSidewalks = gpd.overlay(cotSidewalks[cotSidewalks.intersects(targetboxes.unary_union)], tileIndexBuff, how="union")
cotSidewalks = cotSidewalks[cotSidewalks.Material.notnull()]

impervious_loc = "../vectors/Impervious.gpkg"
imperviousPoly_loc = "../vectors/ImperviousPoly.gpkg"
imperviousPoly = gpd.read_file(impervious_loc, layer="ImperviousPoly")
imperviousPoly["Area"] = imperviousPoly.geometry.area
imperviousPoly["Area"].sum()/4

tileIndex = gpd.read_file(subIndex_loc)
tileIndex["geometry"] = tileIndex["geometry"].apply(lambda g: box(*g.buffer(-50).bounds))
imperviousPoly = gpd.overlay(imperviousPoly, tileIndex, how="union")
imperviousPoly = imperviousPoly[imperviousPoly.Area.notnull()]

impervious = pd.concat([imperviousPoly, cotSidewalks])
impervious = impervious[tileIndex.columns].dissolve(by=["path", "row"], as_index=False)

impervious.to_file(imperviousPoly_loc, driver="GPKG")

  for f in features_lst:
  for f in features_lst:


Wall time: 1min 15s


In [390]:
def getSnappedPointsLocation(geometry, rasterBnds, rasterRes):
    """ Returns set of upper-right snapped pixel locations in set as (x, y)"""

    geom_left, geom_bottom, geom_right, geom_top = geometry.bounds
    
    pix_diff_x_left = (geom_left - rasterBnds.left) / rasterRes % 1
    pix_diff_y_bottom = (geom_bottom - rasterBnds.bottom) / rasterRes % 1
    pix_diff_x_right = (geom_right - rasterBnds.right) / rasterRes % 1
    pix_diff_y_top = (geom_top - rasterBnds.top) / rasterRes % 1
    
    geom_left -= pix_diff_x_left
    geom_right += 1-pix_diff_x_left
    geom_bottom -= pix_diff_y_bottom
    geom_top += 1-pix_diff_y_top
    
    sizex = ceil((geom_right - geom_left)/rasterRes)
    sizey = ceil((geom_top - geom_bottom)/rasterRes)
    
    points = []
    for x in range(0, sizex):
        xp = geom_left + (x*rasterRes) + (rasterRes/2)
        for y in range(0, sizey):
            yp = geom_bottom + (y*rasterRes) + (rasterRes/2)
            points.append(Point(xp,yp))
    
    return [point for point in points if point.intersects(geometry)]

In [135]:
with rio.open(ortho30cmvrt_loc) as src:
    rasbnds = src.bounds
    reso = src.res[0]
    
impervious_points = [getSnappedPointsLocation(geom, rasbnds, reso) for geom in impervious.geometry]
impervious_points = list(itertools.chain.from_iterable(impervious_points))

impervious_pnts = gpd.GeoDataFrame(geometry=impervious_points, crs = "epsg:2868")
impervious_pnts.to_file("../OtherData/TrainingData/ImperviousPoints.gpkg", driver="GPKG")
%chime

In [132]:
subSize = 10000
t1 = datetime.now()
allValues=[]

with rio.open(ortho2019Seg_loc) as src:
    for i in range(0, len(allPoints), subSize):
        if i%1000000 == 0:
            print(i)
        pointsSubset = allPoints[i:i+subSize]
        xys = [(point.x, point.y) for point in pointsSubset]
        values = [value[0] for value in src.sample(xys, indexes=5)]
        allValues += values
        
        
t2 = datetime.now()
print(t2-t1)

barrenSparsV = gpd.GeoDataFrame({"MSAVI":allValues}, geometry=allPoints, crs="epsg:2868")
barrenSparsV.to_file("../OtherData/TrainingData/barrenSparsV.gpkg", driver="GPKG")
barrenSparsV.head()
barrenPts = barrenSparsV[barrenSparsV.MSAVI <= 115]
sparsePts = barrenSparsV[(barrenSparsV.MSAVI > 115) & (barrenSparsV.MSAVI <= 135)]
densePts = barrenSparsV[barrenSparsV.MSAVI > 135]

print(len(barrenPts), len(sparsePts), len(densePts))

%chime

31275316.208916515

In [106]:
bad_roads = ["service","footway","pedestrian","living_street","steps","unknown","bridleway"]
osm_roads = osm_roads[(~osm_roads.fclass.isin(bad_roads)) | (~osm_roads.fclass.str.contains("track"))]
osm_roads.fclass.unique()

array(['motorway_link', 'motorway', 'secondary', 'residential',
       'tertiary', 'service', 'track', 'track_grade4', 'path',
       'unclassified', 'footway', 'trunk', 'track_grade2', 'cycleway',
       'trunk_link', 'track_grade3', 'pedestrian', 'primary_link',
       'secondary_link', 'track_grade5', 'living_street', 'primary',
       'tertiary_link', 'track_grade1', 'steps', 'unknown', 'bridleway'],
      dtype=object)

_______________________________