In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os

In [8]:
fp = '/oak/stanford/groups/deho/building_compliance/los_angeles_naip/2016_rgb_footprint_512/raw_tif/'
fp_out = '/oak/stanford/groups/deho/building_compliance/los_angeles_naip/2016_rgb_footprint_512/mask/'

In [9]:
with open('data/los_angeles/no_buildings.txt', 'r') as f:
    no_buildings = f.readlines()
    no_buildings = [line.rstrip() for line in no_buildings]

In [10]:
len(no_buildings)

3412

In [11]:
len(os.listdir(fp_out))

2783

## Investigate the LA footprint dataset from 2017

The change file contains the building footprints from 2014 that were modified, so this is not needed

In [2]:
fp = '/oak/stanford/groups/deho/building_compliance/LA_County_Building_Footprints_2017/'

In [9]:
original = gpd.read_file(os.path.join(fp, 'Countywide_Building_Outlines_(2017).geojson'))

In [10]:
original.columns

Index(['OBJECTID', 'CODE', 'BLD_ID', 'HEIGHT', 'ELEV', 'SOURCE', 'DATE_',
       'STATUS', 'OLD_BLD_ID', 'AREA', 'SHAPE_Length', 'SHAPE_Area',
       'geometry'],
      dtype='object')

- LARIAC2: 2008 acquisition -- the majority of buildings have not changed
- LARIAC4: 2014
- LARIAC5: 2017

- STATUS: new in LARIAC5 does not necessarily mean that the building is newly built, but rather than the footprint is newly added (although it could definitely mean the former)

In [53]:
original['SOURCE'].value_counts()

LARIAC2                   2525887
LARIAC5                    380885
LARIAC4                    172877
Pasadena                   102449
Glendale: Merrick & Co      39939
LARIAC 2006 4-in            32504
Glendale: LARIAC2            8099
Name: SOURCE, dtype: int64

In [54]:
original[original['SOURCE'] == 'LARIAC5']

Unnamed: 0,OBJECTID,CODE,BLD_ID,HEIGHT,ELEV,SOURCE,DATE_,STATUS,OLD_BLD_ID,AREA,SHAPE_Length,SHAPE_Area,geometry
191,192,Building,201700191458,14.0764,2441.607267,LARIAC5,2017,New,,479.228247,0.000270,4.384342e-09,"MULTIPOLYGON (((-118.28347 34.76224, -118.2835..."
192,193,Building,201700191459,21.8400,2451.560000,LARIAC5,2017,Modified,476077099936,3402.833449,0.000912,3.113170e-08,"MULTIPOLYGON (((-118.28494 34.76243, -118.2849..."
193,194,Building,201700191460,9.1749,2364.069897,LARIAC5,2017,New,,369.857773,0.000234,3.383713e-09,"MULTIPOLYGON (((-117.95152 34.76276, -117.9515..."
194,195,Building,201700191461,10.8125,2317.453179,LARIAC5,2017,New,,296.054795,0.000210,2.708560e-09,"MULTIPOLYGON (((-118.16211 34.76287, -118.1621..."
195,196,Building,201700191462,9.4900,2431.180000,LARIAC5,2017,Modified,477460100035,321.684025,0.000222,2.943015e-09,"MULTIPOLYGON (((-118.28038 34.76260, -118.2804..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3262635,3262636,Building,201700194794,18.3300,3643.940000,LARIAC5,2017,Modified,528284985731,3454.282781,0.000821,3.148455e-08,"MULTIPOLYGON (((-118.11071 34.44893, -118.1106..."
3262636,3262637,Building,201700194795,13.9250,3150.420684,LARIAC5,2017,New,,307.492951,0.000213,2.802999e-09,"MULTIPOLYGON (((-118.16682 34.45817, -118.1668..."
3262637,3262638,Building,201700194796,20.2400,3060.570000,LARIAC5,2017,Modified,2014175190000,9163.545106,0.001727,8.353382e-08,"MULTIPOLYGON (((-118.16446 34.46047, -118.1644..."
3262638,3262639,Building,201700194797,15.6000,3332.572777,LARIAC5,2017,New,,314.902875,0.000222,2.871037e-09,"MULTIPOLYGON (((-118.02339 34.47245, -118.0234..."


There could be duplicate BLD_ID, all of them are from 2014 it seems -- they correspond to **different** buildings, so don't discard

In [49]:
o = original['BLD_ID'].value_counts()

In [50]:
o[o>1]

2014151251030000    2
2014151253310000    2
2014151253180000    2
2014151253220000    2
2014151244540000    2
                   ..
2014151256280000    2
2014151258760000    2
2014151258790000    2
2014151258800000    2
2014151256290000    2
Name: BLD_ID, Length: 950, dtype: int64

In [None]:
original[original['BLD_ID'] == '2014151258760000']

# See if footprint data actually captures ADUs

It does! Pretty well too

In [3]:
from shapely.geometry import box, Point

In [62]:
small_box = box(-118.32214889946084, 33.95636717424377, -118.31886643950786, 33.95801022890701)

In [64]:
test_adu = gpd.read_file(os.path.join(fp, 'Countywide_Building_Outlines_(2017).geojson'), bbox=small_box)

In [67]:
test_adu.to_file('test_adu_la')

  test_adu.to_file('test_adu_la')


In [65]:
len(test_adu)

104

# Subset building footprints to 4 areas
- South LA
- Torrance/Long Beach
- Valley
- Mid-Wilshire ish

In [4]:
south_la = box(-118.36618406252838, 33.92399439434224, -118.24340101311658, 34.01043519525057)
mid_wilshire = box(-118.42264979853323, 34.020425261173386, -118.3274039386137, 34.051457250941674)
torrance = box(-118.38894671860481, 33.76190036981278, -118.26456955803374, 33.837507964798)
valley = box(-118.5947617037504, 34.1518934898276, -118.40286108520485, 34.278004449211494)

In [5]:
from shapely.ops import unary_union

multipol = unary_union([south_la, mid_wilshire, torrance, valley])

In [6]:
select_footprints = test_adu = gpd.read_file(os.path.join(fp, 'Countywide_Building_Outlines_(2017).geojson'), mask=multipol)

In [7]:
len(select_footprints)

587652

In [8]:
# MOVED TO OAK
select_footprints.to_file('data/los_angeles/la_select_footprints.geojson', driver='GeoJSON')