In [1]:
from PIL import Image
import rasterio
import multiprocessing as mp
import numpy as np
import glob
import os
import geopandas as gpd
from rasterio.mask import mask
import matplotlib.pyplot as plt
from scipy.spatial import KDTree
from queue import Queue
from skimage import io, img_as_bool, measure, morphology

from shapely.geometry import box, Polygon, Point
import pyproj
from shapely.ops import transform, unary_union, orient
import shapely
import pickle
from pyproj import Geod
from skimage.segmentation import find_boundaries
import multiprocessing as mp

# Phase 2: Processing TIFs in San Jose
1. Blackout image based on zoning information
2. Filter building footprints
    a) Only those within residential zones
    b) Only those >= 15m^2 (can change later)
3. Generate loss_weights based on footprint size
    a) Binary case
    b) Continuous function (need to solidify)


### Load in zoning information
- ZONINGABBREV R-* means residential
- ZONINGABBREV A(PD) but PDUSE Res should be included -- I did an inspection of some of the bounds when including this condition, and they look good
    
- R-1: SFR
- R-2: double family
- R-M: multiple family

In [3]:
oak_fp = '/oak/stanford/groups/deho/building_compliance/'

In [20]:
zoning = gpd.read_file(os.path.join(oak_fp, 'san_jose_suppl', 'san_jose_Zoning_Districts.geojson'))

In [21]:
res_zones = zoning[zoning['ZONINGABBREV'].str.contains('R-')]

In [24]:
sj_footprints = gpd.read_file('../../all_buildings/data/input/OSM/san_jose.geojson')

### Make phase2 directory

In [6]:
if not os.path.exists(os.path.join(oak_fp, 'san_jose_naip_512', 'phase2')):
    os.mkdir(os.path.join(oak_fp, 'san_jose_naip_512', 'phase2'))
    for i in ['train', 'test', 'val']:
        os.mkdir(os.path.join(oak_fp, 'san_jose_naip_512', 'phase2', i))
        os.mkdir(os.path.join(oak_fp, 'san_jose_naip_512', 'phase2', i, 'images'))
        os.mkdir(os.path.join(oak_fp, 'san_jose_naip_512', 'phase2', i, 'masks'))
        os.mkdir(os.path.join(oak_fp, 'san_jose_naip_512', 'phase2', i, 'masks_wt'))
#         os.mkdir(os.path.join(oak_fp, 'san_jose_naip_512', 'phase2', i, 'masks_loss'))

In [7]:
def plot_sample(lr, sr):
    plt.figure(figsize=(20, 10))

    images = [lr, sr]

    for i, img in enumerate(images):
        plt.subplot(1, 2, i+1)
        plt.imshow(img)
        plt.xticks([])
        plt.yticks([])
        
    plt.show()

### Driver

Need to create loss weights

In [22]:
def make_global(oak_fp, partition):
    global oak_fp_global
    global partition_global

    oak_fp_global = oak_fp
    partition_global = partition

In [23]:
def driver(im):
    fn = im.split('.')[0].replace('_mask', '')
    
    masks = np.load(os.path.join(oak_fp_global, 'san_jose_naip_512', partition_global, 'masks', im))
    image = np.load(os.path.join(oak_fp_global, 'san_jose_naip_512', partition_global, 
                                 'images', im.replace('_mask.npy', '.npy')))

    with rasterio.open(os.path.join(oak_fp_global, 'san_jose_naip_512', 'raw_tif', 
                                    im.replace('_mask.npy', '.tif'))) as inds:
        # get TIF bounds
        bounds = inds.bounds
        geom = box(*bounds)

        # prepare to convert TIF bounds to standard 4326
        wgs84 = pyproj.CRS('EPSG:26910')
        utm = pyproj.CRS('EPSG:4326')

        project = pyproj.Transformer.from_crs(wgs84, utm, always_xy=True).transform

        # convert
        utm_geom = shapely.ops.transform(project, geom)

        # clipping instead of using the mask (line above) clips the residential zones to the square, which 
        # cuts off the buildings at the edge because we impose an IOU condition
#         df = gpd.clip(zoning, utm_geom)

        df = gpd.read_file(os.path.join(oak_fp_global, 'san_jose_suppl', 'san_jose_Zoning_Districts.geojson'),
                           mask=utm_geom)
        df = df[(df['ZONINGABBREV'].str.contains('R-')) | \
               ((df['ZONINGABBREV'] == 'A(PD)') & (df['PDUSE'] == 'Res'))]

        if len(df) == 0:
            with open(os.path.join(oak_fp_global, 'san_jose_naip_512', 
                                     'phase2', 'no_res.txt'), 'a') as w:
                w.write(f'{fn}\n')
        else:
            # filter masks based on the residential zones
            res_zone = unary_union(df['geometry'])
            
            # COPIED FILE TO OAK
            # /oak/stanford/groups/deho/building_compliance/san_jose_suppl/san_jose_OSM_footprints.geojson
            
            df_masks = gpd.read_file('../../all_buildings/data/input/OSM/san_jose.geojson', 
                                     mask=res_zone)

            if len(df_masks) == 0:
                with open(os.path.join(oak_fp_global, 'san_jose_naip_512', 
                                     'phase2', 'no_res_buildings.txt'), 'a') as w:
                    w.write(f'{fn}\n')

            else:
                df_masks = gpd.clip(df_masks, res_zone)

                # there are some buildings that only partially intersect the residneital zones -- filter out
                df_masks['iou'] = df_masks['geometry'].apply(lambda x: x.intersection(res_zone).area/x.area)

                # filter masks based on size
                geod = Geod(ellps="WGS84")
                # apply orient() before passing to Geod so that the area is not negative
                df_masks['area'] = df_masks['geometry'].apply(lambda x: geod.geometry_area_perimeter(orient(x))[0])


                df_masks = df_masks[(df_masks['area'] >= 15) & (df_masks['iou'] >= 0.8)] # CAN CHANGE THIS ARBITRARILY
                
                if len(df_masks) == 0:
                    with open(os.path.join(oak_fp_global, 'san_jose_naip_512', 
                                     'phase2', 'no_res_buildings.txt'), 'a') as w:
                        w.write(f'{fn}\n')
                else:
                    df_masks['binary_wts'] = df_masks['area'].apply(lambda x: 1 if x >= 15 and x <= 115 else 0)
                    df_masks['continuous_wts'] = df_masks['area'].apply(lambda x: 1 if x >= 15 and x <= 115 else 0)

                    df = df.to_crs('epsg:26910')
                    df_masks = df_masks.to_crs('epsg:26910')

                    mask_im, trans = mask(inds, list(df['geometry']))
                    mask_im = np.rollaxis(mask_im, 0, 3)

                    mask_im_footprints, _ = mask(inds, list(df_masks['geometry']))
                    mask_im_footprints = np.rollaxis(mask_im_footprints, 0, 3)
                    mask_footprints = np.zeros((mask_im_footprints.shape[0], mask_im_footprints.shape[1]))
                    mask_footprints[np.sum(mask_im_footprints == 0, axis=2) < 4] = 1


                    ### ---- MAKE LOSS_WEIGHTS ----
                    # make a pixel lat/lon map
                    band1 = inds.read(1)
                    height = band1.shape[0]
                    width = band1.shape[1]
                    cols, rows = np.meshgrid(np.arange(width), np.arange(height))
                    xs, ys = rasterio.transform.xy(inds.transform, rows, cols)
                    lons = np.array(xs)[0]
                    lats = [i[0] for i in np.array(ys)]

                    # segmentation from scikit-image.measure.label
                    labels = measure.label(mask_footprints)

                    # labels image in clusters #1-num_clusters (cluster 0 is the background, so we exclude in the loop)
                    clusters = []
                    for i in range(1, len(np.unique(labels))):
                        clusters.append(np.column_stack(np.where(labels == i)))

                    loss_weights_mask = np.zeros_like(mask_footprints, dtype=float)
                    for cluster in clusters:
                        # for each cluster, just take the first point and find the associated area of building
                        first = cluster[0]
                        point = Point(lons[first[1]], lats[first[0]])
                        building = df_masks[df_masks.contains(point)]

                        assert not building.empty

                        area = building['area'].values[0]

                        for c in cluster:
                            loss_weights_mask[c[0], c[1]] = area.round(2)

                    # save blocked image, mask_footprints, loss_weights
                    np.save(os.path.join(oak_fp_global, 'san_jose_naip_512', 
                                         'phase2', partition_global, 'images', f'{fn}.npy'), mask_im)

                    np.save(os.path.join(oak_fp_global, 'san_jose_naip_512', 
                                         'phase2', partition_global, 'masks', f'{fn}_mask.npy'), loss_weights_mask)
                    

In [27]:
partition = 'train'
done = [i.replace('.npy', '') for i in os.listdir(os.path.join(oak_fp, 'san_jose_naip_512', 
                                     'phase2', partition, 'images'))]
masks = os.listdir(os.path.join(oak_fp, 'san_jose_naip_512', partition, 'masks'))
masks = [i.replace('_mask.npy', '') for i in masks]

leftover = list(set(masks) - set(done))
leftover = [f'{i}_mask.npy' for i in leftover]

In [26]:
nprocs = mp.cpu_count()

p = mp.Pool(processes=nprocs, initializer=make_global, initargs=(oak_fp, partition, ))
p.map(driver, leftover)

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [24]:
partition = 'test'
done = [i.replace('.npy', '') for i in os.listdir(os.path.join(oak_fp, 'san_jose_naip_512', 
                                     'phase2', partition, 'images'))]
masks = os.listdir(os.path.join(oak_fp, 'san_jose_naip_512', partition, 'masks'))
masks = [i.replace('_mask.npy', '') for i in masks]

leftover = list(set(masks) - set(done))
leftover = [f'{i}_mask.npy' for i in leftover]

nprocs = mp.cpu_count()

p = mp.Pool(processes=nprocs, initializer=make_global, initargs=(oak_fp, partition, ))
p.map(driver, leftover)

  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = datase

  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = datase

  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = datase

  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = datase

  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = datase

  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = datase

  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = datase

  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = datase

  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(
  out_image = dataset.read(


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,