In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *
from shapely.geometry import MultiPolygon
from zipfile import ZipFile
import requests 
import sys

In [2]:
import yaml

with open('../config/postgres.yaml') as f:
    engine_configs = yaml.load(f, Loader=yaml.FullLoader)
    
try:
    engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(**engine_configs))
except Exception as e:
    print("Uh oh, can't connect. Invalid dbname, user or password?")
    print(e)

In [3]:
def process_geometry_SQL_insert(gdf):
    gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement((MultiPolygon([x]) if x.geom_type == 'Polygon' else x).wkt, srid=4326))
    gdf = gdf.drop('geometry', 1)
    return gdf

In [4]:
CITY='LA'
NEIGHBORHOOD_SIZE = 805 # 805 OR 1609

In [51]:
bounds_gdf = gpd.read_file('zip://../../data/LA/boundary/boundary.zip')
bounds_gdf = bounds_gdf[(bounds_gdf.CITY_NAME == 'Los Angeles') & (bounds_gdf.FEAT_TYPE == 'Land')]
bounds_gdf = bounds_gdf[['geometry']]
bounds_gdf['city'] = CITY
bounds_gdf.head()

Unnamed: 0,geometry,city
332,"MULTIPOLYGON (((-118.24364 33.76163, -118.2434...",LA1m


In [52]:
insert_gdf = process_geometry_SQL_insert(bounds_gdf)
insert_gdf.to_sql('boundary', engine, if_exists='append', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

## Spatial groups and blocks_group

In [53]:
block_groups_gdf = gpd.read_file('zip://../../data/LA/blocks_group/cb_2013_06_bg_500k.zip')
block_groups_gdf = block_groups_gdf[['GEOID', 'geometry']]
block_groups_gdf = block_groups_gdf.to_crs({'init': 'epsg:4326'}) 
block_groups_gdf.head()

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,GEOID,geometry
0,60830030071,"POLYGON ((-119.82937 34.42047, -119.82287 34.4..."
1,60730178132,"POLYGON ((-117.34455 33.14537, -117.34356 33.1..."
2,60590524172,"POLYGON ((-117.75759 33.71208, -117.75095 33.7..."
3,60230101021,"POLYGON ((-124.04615 41.46185, -124.04192 41.4..."
4,60375773001,"POLYGON ((-118.14651 33.75791, -118.14617 33.7..."


In [54]:
block_groups_gdf = gpd.sjoin(block_groups_gdf, bounds_gdf, how="inner", op='intersects').drop('index_right', axis=1)

  "(%s != %s)" % (left_df.crs, right_df.crs)


In [55]:
block_groups_gdf = block_groups_gdf.rename(columns={'GEOID': 'original_id'})
block_groups_gdf['city'] = CITY

In [56]:
insert_gdf = process_geometry_SQL_insert(block_groups_gdf)
insert_gdf.to_sql('temptable_{}'.format(CITY.lower()), engine, if_exists='replace', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [60]:
sql = """
INSERT INTO blocks_group (original_id, city, geom) 
SELECT s.original_id, s.city, ST_Multi(ST_Intersection(s.geom, b.geom))
FROM temptable_{tempname} as s
INNER JOIN boundary b ON ST_Intersects(s.geom, b.geom) AND (NOT ST_Touches(s.geom, b.geom)) AND b.city=s.city
where s.city='{city}' and ST_Area(ST_Intersection(s.geom, b.geom))/ST_Area(s.geom) > 0.5;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Neighborhoods

In [61]:
sql = """INSERT INTO spatial_groups (city, core_geom, core_id, lower_ids, spatial_name, approx_geom)
SELECT  a.city, a.geom as core_geom, a.bid as core_id, array_agg(b.bid), 'ego', ST_multi(ST_Union(b.geom))
FROM blocks_group a
INNER JOIN blocks_group b ON a.city = b.city AND (a.bid = b.bid OR ST_DWithin(a.geom::geography, ST_Centroid(b.geom)::geography, {distance}) OR st_touches(a.geom, b.geom)) 
where a.city='{city}'
GROUP BY a.bid, a.geom, a.city;
delete from spatial_groups where ST_Area(approx_geom::geography) < 250000 and spatial_name='ego';
""".format(city=CITY, tempname=CITY.lower(), distance=NEIGHBORHOOD_SIZE)

result = engine.execute(text(sql))

## Blocks

In [62]:
block_gdf = gpd.read_file('zip://../../data/LA/block/CENSUS_BLOCKS_2010.zip')
block_gdf = block_gdf[['geometry']]
block_gdf = block_gdf.to_crs({'init': 'epsg:4326'}) 
block_gdf.head()

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,geometry
0,"POLYGON ((-118.31961 33.88241, -118.31960 33.8..."
1,"POLYGON ((-118.31547 33.82443, -118.31499 33.8..."
2,"POLYGON ((-118.38992 33.77734, -118.39009 33.7..."
3,"POLYGON ((-118.33019 33.82080, -118.33018 33.8..."
4,"POLYGON ((-118.34505 33.82928, -118.34505 33.8..."


In [63]:
insert_gdf = process_geometry_SQL_insert(block_gdf)
insert_gdf.to_sql('temptable_{}'.format(CITY.lower()), engine, if_exists='replace', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [64]:
sql = """
insert into block (sp_id, geom, city, geog, greater_1sm) select bid, geom, city, geom::geography, ST_AREA(geom::geography)>2.59e+6 
from(
    SELECT bid, st_multi(geom) as geom, city, ROW_NUMBER() OVER (PARTITION BY geom ORDER by area DESC) AS r
    from (
        select b.bid, c.geom, b.city, ST_Area(ST_Intersection(b.geom, c.geom)) as area
        from temptable_{tempname} as c
        inner join blocks_group as b on ST_Intersects(b.geom, c.geom)
        where b.city = '{city}'
    ) as dtable
) x
WHERE x.r = 1;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [65]:
sql = """
UPDATE block AS b SET geom=ST_Multi(ST_Intersection(b.geom, s.geom))
FROM boundary AS s
WHERE ST_Intersects(b.geom, s.geom) AND b.city=s.city AND s.city='{city}' AND NOT ST_Contains(s.geom, b.geom);
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [66]:
## Some blocks_group do not have blocks
sql = """
DELETE FROM blocks_group bg
WHERE NOT EXISTS(SELECT * FROM block b WHERE b.sp_id = bg.bid) AND bg.city='{city}';
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Census

In [67]:
zip_file = ZipFile('../../data/LA/employment/ACS_13_5YR_B23025.zip')
zip_file.infolist()

[<ZipInfo filename='ACS_13_5YR_B23025_with_ann.csv' compress_type=deflate file_size=1011951 compress_size=202616>,
 <ZipInfo filename='ACS_13_5YR_B23025_metadata.csv' compress_type=deflate file_size=830 compress_size=218>,
 <ZipInfo filename='ACS_13_5YR_B23025.txt' compress_type=deflate file_size=3712 compress_size=1360>,
 <ZipInfo filename='aff_download_readme_ann.txt' compress_type=deflate file_size=1062 compress_size=463>]

In [68]:
employment_df = pd.read_csv(zip_file.open('ACS_13_5YR_B23025_with_ann.csv'))
employment_df = employment_df.rename(columns={'GEO.id2': 'original_id', 'HD01_VD02': 'inforce', 'HD01_VD04': 'employed', 'HD01_VD06': 'armed'})
employment_df = employment_df[['original_id', 'inforce', 'employed', 'armed']]
# Skip first header line
employment_df = employment_df[employment_df['original_id'] != 'Id2']

employment_df['inforce'] = employment_df['inforce'].astype(int)
employment_df['employed'] = employment_df['employed'].astype(int)
employment_df['armed'] = employment_df['armed'].astype(int)
employment_df.head()

Unnamed: 0,original_id,inforce,employed,armed
1,60371011101,1043,900,0
2,60371011102,927,859,0
3,60371011103,622,562,0
4,60371011221,1492,1379,0
5,60371011222,742,667,0


In [69]:
zip_file = ZipFile('../../data/LA/population/ACS_13_5YR_B01003.zip')
zip_file.infolist()

[<ZipInfo filename='ACS_13_5YR_B01003_with_ann.csv' compress_type=deflate file_size=731365 compress_size=90938>,
 <ZipInfo filename='ACS_13_5YR_B01003_metadata.csv' compress_type=deflate file_size=114 compress_size=91>,
 <ZipInfo filename='ACS_13_5YR_B01003.txt' compress_type=deflate file_size=3662 compress_size=1319>,
 <ZipInfo filename='aff_download_readme_ann.txt' compress_type=deflate file_size=1062 compress_size=463>]

In [70]:
pop_df = pd.read_csv(zip_file.open('ACS_13_5YR_B01003_with_ann.csv'))
pop_df = pop_df.rename(columns={'GEO.id2': 'original_id', 'HD01_VD01': 'population'})
pop_df = pop_df[['original_id', 'population']]
# Skip first header line
pop_df = pop_df[pop_df['original_id'] != 'Id2']

pop_df['population'] = pop_df['population'].astype(int)
pop_df.head()

Unnamed: 0,original_id,population
1,60371011101,2286
2,60371011102,1601
3,60371011103,1125
4,60371011221,2199
5,60371011222,1305


In [71]:
zip_file = ZipFile('../../data/LA/population/ACS_13_5YR_B25001.zip')
zip_file.infolist()

[<ZipInfo filename='ACS_13_5YR_B25001_with_ann.csv' compress_type=deflate file_size=722516 compress_size=85167>,
 <ZipInfo filename='ACS_13_5YR_B25001_metadata.csv' compress_type=deflate file_size=114 compress_size=91>,
 <ZipInfo filename='ACS_13_5YR_B25001.txt' compress_type=deflate file_size=3656 compress_size=1315>,
 <ZipInfo filename='aff_download_readme_ann.txt' compress_type=deflate file_size=1062 compress_size=463>]

In [72]:
dwellings_df = pd.read_csv(zip_file.open('ACS_13_5YR_B25001_with_ann.csv'), dtype={'GEO.id2': str})
dwellings_df = dwellings_df.rename(columns={'GEO.id2': 'original_id', 'HD01_VD01': 'dwellings'})
dwellings_df = dwellings_df[['original_id', 'dwellings']]

dwellings_df['dwellings'] = dwellings_df['dwellings'].astype(int)
dwellings_df.head()

Unnamed: 0,original_id,dwellings
0,60371011101,738
1,60371011102,635
2,60371011103,384
3,60371011221,851
4,60371011222,505


In [73]:
print(len(pop_df))
census_df = pd.merge(employment_df, pop_df, on='original_id')
census_df = pd.merge(census_df, dwellings_df, on='original_id')
print(len(census_df))
census_df.head()

6425
6425


Unnamed: 0,original_id,inforce,employed,armed,population,dwellings
0,60371011101,1043,900,0,2286,738
1,60371011102,927,859,0,1601,635
2,60371011103,622,562,0,1125,384
3,60371011221,1492,1379,0,2199,851
4,60371011222,742,667,0,1305,505


In [74]:
census_df.to_sql('temptable_{}'.format(CITY.lower()), engine, if_exists='replace', index=False)

In [75]:
sql = """
insert into census (bid, population, employed, inforce, tot_survey, dwellings, city) 
select b.bid, c.population, c.employed+c.armed, c.inforce, c.population, c.dwellings, '{city}' 
from temptable_{tempname} c 
inner join blocks_group b on b.original_id = c.original_id
where b.city='{city}';
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

### Residential stability

In [76]:
zip_file = ZipFile('../../data/LA/residential_stability/ACS_13_5YR_B07201.zip')
zip_file.infolist()

[<ZipInfo filename='ACS_13_5YR_B07201.csv' compress_type=deflate file_size=1227768 compress_size=225538>,
 <ZipInfo filename='ACS_13_5YR_B07201_metadata.csv' compress_type=deflate file_size=3381 compress_size=400>,
 <ZipInfo filename='ACS_13_5YR_B07201.txt' compress_type=deflate file_size=4042 compress_size=1513>,
 <ZipInfo filename='aff_download_readme.txt' compress_type=deflate file_size=1951 compress_size=802>]

In [77]:
stab_df = pd.read_csv(zip_file.open('ACS_13_5YR_B07201.csv'))
stab_df = stab_df.rename(columns={'GEO.id2': 'original_id', 'HD01_VD01': 'total', 'HD01_VD02': 'stable'})
stab_df = stab_df[['original_id', 'total', 'stable']]
# Skip first header line
stab_df = stab_df[stab_df['original_id'] != 'Id2']

stab_df['total'] = stab_df['total'].astype(int)
stab_df['stable'] = stab_df['stable'].astype(int)
stab_df.head()

Unnamed: 0,original_id,total,stable
1,60371011101,2277,2125
2,60371011102,1560,1308
3,60371011103,1092,1034
4,60371011221,2199,1883
5,60371011222,1295,1203


In [78]:
zip_file = ZipFile('../../data/LA/tenure/ACS_13_5YR_B25003.zip')
zip_file.infolist()

[<ZipInfo filename='ACS_13_5YR_B25003.csv' compress_type=deflate file_size=811352 compress_size=130647>,
 <ZipInfo filename='ACS_13_5YR_B25003_metadata.csv' compress_type=deflate file_size=312 compress_size=140>,
 <ZipInfo filename='ACS_13_5YR_B25003.txt' compress_type=deflate file_size=3658 compress_size=1310>,
 <ZipInfo filename='aff_download_readme.txt' compress_type=deflate file_size=1951 compress_size=802>]

In [79]:
tenure_df = pd.read_csv(zip_file.open('ACS_13_5YR_B25003.csv'))
tenure_df = tenure_df.rename(columns={'GEO.id2': 'original_id', 'HD01_VD01': 'total2', 'HD01_VD02': 'owner'})
tenure_df = tenure_df[['original_id', 'total2', 'owner']]
# Skip first header line
tenure_df = tenure_df[tenure_df['original_id'] != 'Id2']

tenure_df['total2'] = tenure_df['total2'].astype(int)
tenure_df['owner'] = tenure_df['owner'].astype(int)
tenure_df.head()

Unnamed: 0,original_id,total2,owner
1,60371011101,705,332
2,60371011102,603,257
3,60371011103,374,270
4,60371011221,782,631
5,60371011222,446,414


In [80]:
res_stability_df = pd.merge(stab_df, tenure_df, on='original_id')
res_stability_df.head()

Unnamed: 0,original_id,total,stable,total2,owner
0,60371011101,2277,2125,705,332
1,60371011102,1560,1308,603,257
2,60371011103,1092,1034,374,270
3,60371011221,2199,1883,782,631
4,60371011222,1295,1203,446,414


In [81]:
res_stability_df.to_sql('temptable_{}'.format(CITY.lower()), engine, if_exists='replace', index=False)

In [82]:
sql = """
INSERT INTO residential_stability (bid, city, total, stable, total2, owner) 
SELECT b.bid, '{city}', c.total, c.stable, c.total2, c.owner 
FROM temptable_{tempname} c 
INNER JOIN blocks_group b ON b.original_id = c.original_id
where b.city='{city}';
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

### Ethnic diversity

In [83]:
zip_file = ZipFile('../../data/LA/ethnic_diversity/ACS_13_5YR_B02001.zip')
zip_file.infolist()

[<ZipInfo filename='ACS_13_5YR_B02001.csv' compress_type=deflate file_size=1093189 compress_size=227763>,
 <ZipInfo filename='ACS_13_5YR_B02001_metadata.csv' compress_type=deflate file_size=1373 compress_size=336>,
 <ZipInfo filename='ACS_13_5YR_B02001.txt' compress_type=deflate file_size=3650 compress_size=1305>,
 <ZipInfo filename='aff_download_readme.txt' compress_type=deflate file_size=1951 compress_size=802>]

In [84]:
eth_df = pd.read_csv(zip_file.open('ACS_13_5YR_B02001.csv'))
eth_df = eth_df.rename(columns={'GEO.id2': 'original_id', 'HD01_VD02': 'white', 'HD01_VD03': 'black', 'HD01_VD04': 'native', 'HD01_VD05': 'asian', 'HD01_VD06': 'native2', 
                               'HD01_VD08': 'o1', 'HD01_VD09': 'o2', 'HD01_VD10': 'o3'})
eth_df = eth_df[['original_id', 'white', 'black', 'asian', 'native', 'native2', 'o1', 'o2', 'o3']]
# Skip first header line
eth_df = eth_df[eth_df['original_id'] != 'Id2']

for x in ['white', 'black', 'asian', 'native', 'native2', 'o1', 'o2', 'o3']:
    eth_df[x] = eth_df[x].astype(int)
    
eth_df['other'] = eth_df['o1'] + eth_df['o2'] + eth_df['o3']
eth_df = eth_df.drop(['o1', 'o2', 'o3'], axis=1)
eth_df.head()

Unnamed: 0,original_id,white,black,asian,native,native2,other
1,60371011101,1691,18,194,0,0,186
2,60371011102,1009,18,147,7,0,82
3,60371011103,940,8,86,0,0,0
4,60371011221,1431,0,378,6,0,368
5,60371011222,1049,30,89,0,0,166


In [85]:
eth_df.to_sql('temptable_{}'.format(CITY.lower()), engine, if_exists='replace', index=False)

In [86]:
sql = """
INSERT INTO ethnic_diversity (bid, city, race1, race2, race3, race4, race5, race6) 
SELECT b.bid, '{city}', c.white, c.black, c.native, c.asian, c.native2, c.other
FROM temptable_{tempname} c 
INNER JOIN blocks_group b ON b.original_id = c.original_id
where b.city='{city}';
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

### Poverty

In [87]:
zip_file = ZipFile('../../data/LA/poverty/ACS_13_5YR_C17002.zip')
zip_file.infolist()

[<ZipInfo filename='ACS_13_5YR_C17002_with_ann.csv' compress_type=deflate file_size=1034254 compress_size=223724>,
 <ZipInfo filename='ACS_13_5YR_C17002_metadata.csv' compress_type=deflate file_size=759 compress_size=222>,
 <ZipInfo filename='ACS_13_5YR_C17002.txt' compress_type=deflate file_size=3732 compress_size=1372>,
 <ZipInfo filename='aff_download_readme_ann.txt' compress_type=deflate file_size=1062 compress_size=463>]

In [88]:
pov_df = pd.read_csv(zip_file.open('ACS_13_5YR_C17002_with_ann.csv'))
pov_df = pov_df.rename(columns={'GEO.id2': 'original_id', 'HD01_VD01': 'total', 'HD01_VD02': 'p50', 'HD01_VD03': 'p99'})
pov_df = pov_df[['original_id', 'total', 'p50', 'p99']]
# Skip first header line
pov_df = pov_df[pov_df['original_id'] != 'Id2']

for x in ['total', 'p50', 'p99']:
    pov_df[x] = pov_df[x].astype(int)
    
pov_df['poors'] = pov_df['p50'] + pov_df['p99']
pov_df = pov_df.drop(['p50', 'p99'], axis=1)
pov_df.head()

Unnamed: 0,original_id,total,poors
1,60371011101,2278,778
2,60371011102,1531,112
3,60371011103,1125,43
4,60371011221,2194,137
5,60371011222,1305,11


In [89]:
pov_df.to_sql('temptable_{}'.format(CITY.lower()), engine, if_exists='replace', index=False)

In [90]:
sql = """
INSERT INTO poverty_index (bid, city, total, poors) 
SELECT b.bid, '{city}', c.total, c.poors
FROM temptable_{tempname} c 
INNER JOIN blocks_group b ON b.original_id = c.original_id
where b.city='{city}';
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Buildings

In [91]:
#bld_gdf = gpd.read_file('../../data/LA/buildings/LARIAC_Buildings_2014.gdb', driver="OpenFileGDB", layer='LARIAC4_BUILDINGS_2014')
bld_gdf = gpd.read_file('../../data/LA/buildings/LARIAC4Buildings_converted.gpkg', driver="GPKG")

bld_gdf = bld_gdf.rename(columns={'HEIGHT': 'height'})
bld_gdf = bld_gdf[bld_gdf['CODE'] == 'Building']

bld_gdf = bld_gdf[['height', 'geometry', 'YearBuilt1']]
bld_gdf = bld_gdf.to_crs({'init': 'epsg:4326'}) 

bld_gdf.head()

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,height,geometry,YearBuilt1
0,25.63,"MULTIPOLYGON (((-118.21884 33.78390, -118.2190...",
1,20.43,"MULTIPOLYGON (((-118.21852 33.78928, -118.2182...",1989.0
2,10.65,"MULTIPOLYGON (((-118.21823 33.78878, -118.2182...",1955.0
3,15.08,"MULTIPOLYGON (((-118.21863 33.78758, -118.2183...",1958.0
4,35.87,"MULTIPOLYGON (((-118.21815 33.78762, -118.2177...",1970.0


In [92]:
len(bld_gdf)

3088397

In [93]:
insert_gdf = process_geometry_SQL_insert(bld_gdf)
insert_gdf.to_sql('temptable_b_{}'.format(CITY.lower()), engine, if_exists='replace', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [94]:
sql = """
UPDATE temptable_b_{tempname} p SET geom=ST_Multi(ST_buffer(p.geom, 0.0)) 
WHERE (NOT ST_IsValid(p.geom))
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [95]:
sql = """
INSERT INTO building (bid, city, geom, height, area) 
SELECT bid, '{city}', geom, height, barea
FROM (
    SELECT bid, geom, height, barea, ROW_NUMBER() OVER (PARTITION BY geom ORDER BY area DESC) AS r
    from (
        SELECT p.geom, p.height, ST_Area(p.geom::geography) as barea, d.bid, ST_Area(ST_Intersection(p.geom, d.geom)) as area
        FROM temptable_b_{tempname} as p
        INNER JOIN blocks_group as d on ST_Intersects(p.geom, d.geom) AND NOT ST_Touches(p.geom, d.geom)
        WHERE d.city = '{city}' AND ST_Area(p.geom::geography) >= 40
        ) as dtable
) x
WHERE x.r = 1;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Property age

In [96]:
sql = """
INSERT INTO property_age (bid, age, area, city) 
SELECT bid, age, area_building, '{city}'
FROM (
    SELECT bid, age, area_building, ROW_NUMBER() OVER (PARTITION BY geom ORDER BY area DESC) AS r
    from (
        SELECT p.geom, p."YearBuilt1"::int as age, ST_Area(p.geom::geography) as area_building, d.bid, ST_Area(ST_Intersection(p.geom, d.geom)) as area
        FROM temptable_b_{tempname} as p
        INNER JOIN blocks_group as d on ST_Intersects(p.geom, d.geom) AND NOT ST_Touches(p.geom, d.geom)
        WHERE d.city = '{city}' AND ST_Area(p.geom::geography) >= 40 AND p."YearBuilt1" IS NOT NULL
        ) as dtable
) x
WHERE x.r = 1;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Land use

ref: http://dts.edatatrace.com/dts3/content/doc/whelp/mergedProjects/dts2tt/mergedProjects/dts2ttcs/land_use_la.htm

In [97]:
land_gdf = gpd.read_file('zip://../../data/LA/land_use/Parcels 2014 Tax Roll.zip')
#land_gdf = land_gdf[(~(land_gdf['geometry'].isnull())) & (~(land_gdf['UseCode'].isnull()))]
land_gdf = land_gdf.drop_duplicates(subset=['ain'])

#land_gdf = land_gdf.rename(columns={'SQFTmain': 'sqftmain', 'UseCode': 'usecode', 'YearBuilt': 'yearbuilt', 'Roll_totLa': 'value'})

#land_gdf = land_gdf[['AssessorID', 'sqftmain', 'usecode', 'geometry', 'value']]

land_gdf = land_gdf[['ain', 'geometry']]

land_gdf.head()

Unnamed: 0,ain,geometry
0,7024005002,"POLYGON ((-118.06987 33.86189, -118.06987 33.8..."
1,7024008023,"POLYGON ((-118.06811 33.86152, -118.06830 33.8..."
2,7024001042,"POLYGON ((-118.06854 33.86550, -118.06857 33.8..."
3,7024005004,"POLYGON ((-118.06987 33.86160, -118.07020 33.8..."
4,7024006008,"POLYGON ((-118.07091 33.85923, -118.07125 33.8..."


In [98]:
zip_file = ZipFile('../../data/LA/land_use/parcels_data_2013.csv.zip')
land_2013_df = pd.read_csv(zip_file.open('parcels_data_2013.csv'), dtype={'AIN': str})
land_2013_df = land_2013_df.rename(columns={'SQFTmain': 'sqftmain', 
                                            'AIN': 'ain',
                                            'PropertyUseCode': 'usecode', 
                                            'YearBuilt': 'yearbuilt', 'TotalValue': 'value'})
land_2013_df = land_2013_df[['ain', 'sqftmain', 'usecode', 'value']]
land_2013_df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,ain,sqftmain,usecode,value
0,5653015020,1498,101,458864.0
1,5653015021,1532,100,363890.0
2,5653015022,1186,101,570000.0
3,5653015023,2192,101,618177.0
4,5653015024,2108,100,505364.0


In [99]:
land_2013_df = land_2013_df.drop_duplicates(subset=['ain'])

In [100]:
land_gdf = pd.merge(land_gdf, land_2013_df, on='ain')
print(len(land_gdf))

2378363


In [101]:
wrong_pids = ['4211017901', '4211017804', '4218005900', '4221031008',
       '4218020900', '4224013902', '2109001903', '2678027900',
       '2679025900', '2680020901', '2687017900', '2688030900',
       '2707003011', '2708010900', '2726009901', '2726012900',
       '2746010042', '2746013901', '2761001906', '2779016900',
       '2779047900', '2784003801', '2779010900', '2708021001',
       '2111029903', '4211016902', '4211015904', '4211007916',
       '2786002902', '2727021907', '4211014800', '4211017805',
       '4218005902', '2108025900', '2678020900', '2687023012',
       '2687020903', '2688024901', '2688031900', '2786002901',
       '2708010013', '2708020005', '2726010900', '2761030904',
       '2779017900', '2780005900', '2138014904', '2783028902',
       '4211014902', '4211017807', '4224013901', '2108026900',
       '2109001902', '2113006900', '2677016900', '2679016901',
       '2685019900', '2689016901', '2688043900', '2786002813',
       '2726014900', '2761032900', '2770018808', '2780004900',
       '2681011902', '2111029902', '2779005900', '4218005901',
       '2680018902', '2707003005', '2708020001', '2707002004',
       '2761001907', '4211016901', '4211015902', '4211007917',
       '2148032902', '4211007919', '4211014904', '4211017900',
       '4211017803', '4211014901', '2108031900', '2685013032',
       '2685013031', '2686003008', '2685023030', '2685018900',
       '2685013900', '2689017900', '2708020012', '2746005900',
       '2748001803', '2761031902', '2761040901', '2770018904',
       '2770018903', '2779010901', '2779011905', '2779020905',
       '2111029901', '4221022176', '2761001814', '4211007012',
       '4224013900', '2783028801', '2689019900', '2205008901',
       '2231018901', '2225010902', '2226017901', '2231002909',
       '2231017900', '2205007900', '4211017901', '4211017804', '4218005900', '4221031008',
       '4218020900', '4224013902', '5409013910', '5410015826',
       '2109001903', '2678027900', '2679025900', '2680020901',
       '2687017900', '2688030900', '2707003011', '2708010900',
       '2726009901', '2726012900', '2746010042', '2746013901',
       '2761001906', '2779016900', '2779047900', '2784003801',
       '5173021811', '5173020911', '5173023900', '5173020903',
       '2779010900', '2708021001', '5170011803', '2111029903',
       '4211016902', '4211015904', '4211007916', '5172014806',
       '5172014901', '2786002902', '2727021907', '4211014800',
       '4211017805', '4218005902', '5409013905', '5409013906',
       '5409015922', '5409014904', '5409021903', '5409019903',
       '2108025900', '2678020900', '2687023012', '2687020903',
       '2688024901', '2688031900', '2786002901', '2708010013',
       '2708020005', '2726010900', '2761030904', '2779017900',
       '2780005900', '5171024910', '5173020902', '5173020901',
       '5173023901', '2138014904', '5164004804', '5172013010',
       '5172013002', '5164004902', '2783028902', '4211014902',
       '4211017807', '4224013901', '5409020910', '5409020911',
       '5447017902', '2108026900', '2109001902', '2113006900',
       '2677016900', '2679016901', '2685019900', '2689016901',
       '2688043900', '2786002813', '2726014900', '2761032900',
       '2770018808', '2780004900', '5171024010', '2681011902',
       '2111029902', '2779005900', '4218005901', '2680018902',
       '2707003005', '2708020001', '2707002004', '5166001901',
       '5164017906', '2761001907', '5173022902', '4211016901',
       '4211015902', '4211007917', '5172013803', '5172013901',
       '2148032902', '4211007919', '4211014904', '5171015901',
       '4211017900', '4211017803', '4211014901', '5409014905',
       '2108031900', '2685013032', '2685013031', '2686003008',
       '2685023030', '2685018900', '2685013900', '2689017900',
       '2708020012', '2746005900', '2748001803', '2761031902',
       '2761040901', '2770018904', '2770018903', '2779010901',
       '2779011905', '2779020905', '5170010805', '5164004901',
       '5173021902', '5173023902', '5173020810', '5173021810',
       '5173021904', '5173023805', '2111029901', '4221022176',
       '5171014808', '2761001814', '5173022808', '5173022903',
       '5173022901', '4211007012', '4224013900', '2783028801',
       '2689019900', '5173024900', '5166001900', '2205008901',
       '2231018901', '2225010902', '2226017901', '2231002909',
       '2231017900', '2205007900', '5447032900', '2368001030', '2366035901', '2366036905',
       '2367015900', '2367018900', '2368019900', '2368023900',
       '2375018903', '2126038901', '2134016901', '2134024904',
       '2136015905', '2136017904', '2137013900', '2137014900',
       '2137015902', '2137012900', '2138006901', '2138022901',
       '2123022901', '5435038027', '5435038902', '5435039903',
       '5437028903', '5437028906', '5437034908', '5437028907',
       '5437035901', '5437034909', '5437034904', '5442010901',
       '5442010902', '5442002916', '5445011042', '5445005904',
       '5445006901', '5445007900', '5445010903', '5445006905',
       '5168023015', '5169029013', '5168016904', '5170010900',
       '2424042901', '2138014906', '5593012909', '5168023902',
       '5171015900', '5437035902', '5172013900', '5593001270',
       '5442009902', '2126033900', '5593018907', '5410002900',
       '2360002909', '2366033900', '2366033901', '2368007901',
       '2375021903', '2126038005', '2127011904', '2128031901',
       '2138006903', '2138011900', '2138014905', '5435039006',
       '5415002900', '5435036900', '5437028904', '5442002915',
       '5437028900', '5445006903', '5445007901', '5169029272',
       '5169029012', '5169016902', '5581003017', '5581003021',
       '5581004023', '5593001902', '5169028017', '5435038904',
       '2360003913', '5593002916', '5445004001', '5445004900',
       '5447027901', '5415003901', '5415003900', '2360014902',
       '2366020903', '2366027902', '2375004900', '2128003901',
       '2138011902', '2124018906', '5442010020', '5442002903',
       '5445008908', '5445007902', '5169029010', '5169015901',
       '2423030906', '2423031902', '2423035902', '5581003011',
       '5593018900', '5169016011', '5169029902', '5168017900',
       '5435037904', '2131010900', '5442009900', '2127001903',
       '5410006900', '2360012900', '2366026902', '2367018901',
       '2375019903', '2132009900', '2138017900', '2138017901',
       '2138023900', '2138029902', '2123021900', '2124001905',
       '5437029900', '5435039900', '5445011043', '5445012044',
       '5437028902', '5437036902', '5445008907', '5168016002',
       '5168016903', '5581003008', '5581004022', '5593001903',
       '5171014900', '5172014900', '5593002907', '5168017902',
       '5593001900', '5445004902', '5445005903', '5445002902',
       '5593001901', '2127005900', '2368010902', '5173024900',
       '2248029903', '2263020902', '2263016904', '2248001904',
       '2248028906', '2263013902', '2263015902', '2263021902',
       '2248001905', '2263024900', '2263014902', '2271001902', 
             '5173021811', '5173020911', '5173023900', '5171014809',
       '5171015900', '5172013900', '5173023901', '5164004804',
       '5164004902', '5173020910', '5173021903', '5173022902',
       '5171014900', '5173020907', '5173021902', '5173023902',
       '5172014900', '5173020810', '5173021810', '5173021904',
       '5173023805', '5173022808', '5173022903', '5173022901',
       '5173024900']
land_gdf = land_gdf[~land_gdf.ain.isin(wrong_pids)]

In [102]:
land_gdf['landuse'] = 'none'

land_gdf.loc[land_gdf['usecode'].str[:1] == '0', 'landuse'] = 'residential'

land_gdf.loc[(land_gdf['usecode'].str[:1].isin({'1', '2', '3', '4', '5', '7'})) & (~land_gdf['usecode'].isin({'7100', '8840'})), 'landuse'] = 'commercial'
land_gdf.loc[land_gdf['usecode'].str[:2].isin({'82', '83'}), 'landuse'] = 'commercial'
land_gdf.loc[land_gdf['usecode'].isin({'8820', '8000', '8821', '8822', '8823', '8824', '8825', '8826', '8827', '8828', '8829', '8830', '8831', '8832', '8833', '8834', '8835', '8855', '8861', '8862', '8863', '8864', '8865', '8872', '8873', '8874', '8800', '8890', '8900'}), 'landuse'] = 'commercial'

land_gdf.loc[land_gdf['usecode'].str[:1] == '6', 'landuse'] = 'recreational'
land_gdf.loc[land_gdf['usecode'].isin({'7100', '8840', '8840', '8841', '8842', '8843', '8844', '8845', '8847', '8848', '8849', '8851', '8852', '8853'}), 'landuse'] = 'recreational'

# Vacant
land_gdf.loc[land_gdf['usecode'].str[-1] == 'V', 'landuse'] = 'vacant'

#Fixes
land_gdf.loc[land_gdf['usecode'].isin({'8100', '8109', '810X', '8860', '8500'}), 'landuse'] = 'none'

In [103]:
land_gdf.loc[land_gdf.ain.isin(['7467032900', '7469018904', '7469030901', '7469030900',
       '7563001901', '7563001900', '7563002908', '7563002914',
       '6038013900', '5414020901', '5414020900', '2178007900',
       '2184026901', '5666025907', '6049025901', '4432001903',
       '4432005913', '4432005800', '4432006901', '4490011903',
       '4493014900', '4422003900', '4432002918', '4432002924',
       '4432002923', '4434001903', '5037027915', '5046013900',
       '5160001901', '5512004903', '5630030908', '4370012902',
       '4387002900', '5404014900', '5581011900', '5581012900',
       '5581010900', '5581013901', '5583025900', '5593002908',
       '5593002910', '5109022900', '5161004909', '2526004901',
       '2526004900', '2552007902', '2569021900', '5029017905',
       '4355012904', '5029020904', '2701001910', '4432002919',
       '7412014900', '7560028900', '2384024900', '5029017927',
       '5459004930', '7446001901', '7467025900', '7469028900',
       '5414027900', '2177034902', '2177034901', '5666024901',
       '2184005900', '4432001900', '4491006900', '4409001902',
       '4409001900', '4422002900', '4432002920', '4432003904',
       '5028004902', '5029017921', '5029017910', '2470002901',
       '2546013903', '2545022900', '4387002904', '4387017906',
       '4387016900', '5565005900', '5565004900', '5570021902',
       '5415004900', '5415012902', '5577019901', '5581016900',
       '5101002900', '2551012901', '2846003900', '7563002909',
       '5029017926', '5029017919', '5593002906', '2701001912',
       '4493014906', '5581007912', '4379029900', '4379028902',
       '4431009901', '4432003906', '5211021901', '2872001900',
       '4386003900', '4386005900', '2177034900', '7467031900',
       '7469018902', '7563002913', '7563002906', '7412012900',
       '2184028901', '2184026902', '2184028900', '5672021900',
       '6049025900', '6070004900', '4432005915', '4432006902',
       '4432005914', '4432001901', '4490010900', '4490011902',
       '4490024900', '4491009900', '4434001900', '4432002922',
       '4434001901', '4432002925', '4432002917', '5037028905',
       '5037028902', '2470001905', '2470002900', '2545024901',
       '5608001902', '5630030906', '5630030907', '4379027902',
       '4379027903', '4379027900', '4380034902', '4387002905',
       '5415012901', '5581017900', '5581014900', '5581026900',
       '5149031900', '5161005916', '5869016900', '4434001902',
       '5029017900', '5404015900', '5029020905', '2701001917',
       '5415005906', '5593002905', '4357004901', '5577011902',
       '7561025902', '5593002909', '2701002909', '4493015900',
       '2382015900', '4432001902', '7422017900', '7469018903',
       '7469029900', '7563001902', '7412012903', '7562021900',
       '7563006902', '2180024900', '2184005901', '2184027901',
       '2671001903', '4490011900', '4490017900', '4491001900',
       '4409001904', '4434005900', '4432002921', '5037028908',
       '5160001900', '4370012901', '4386008901', '4386015906',
       '4387002902', '5415004902', '5415005905', '5415005902',
       '5415012903', '5582001900', '5593002912', '5113008912',
       '5161005923', '2526003909', '2526003910', '2551003900',
       '2551015902', '2551012900', '2552004900', '2552007906',
       '2552009902', '2552007907', '2553017900', '2569022901',
       '5415005904', '5047014900', '5029017924', '5581008900',
       '5029017911', '5593002907', '4382029900', '4431009900',
       '7412015900', '5593002917', '4432004901', '2552005901',
       '7412010903', '4386004902', '2177034904', '2180026900',
       '2180025900', '5302002900', '5302006900', '2287009903',
       '2287009902', '2287009901', '2292014901', '2292014900',
       '2292013901', '5630030902', '5302001900', '2820019900',
       '5303025901']), 'landuse'] = 'recreational'


In [104]:
land_gdf = land_gdf.reset_index()
land_gdf.head()

Unnamed: 0,index,ain,geometry,sqftmain,usecode,value,landuse
0,0,7024005002,"POLYGON ((-118.06987 33.86189, -118.06987 33.8...",3033,100,248617.0,residential
1,1,7024008023,"POLYGON ((-118.06811 33.86152, -118.06830 33.8...",2036,101,487044.0,residential
2,2,7024001042,"POLYGON ((-118.06854 33.86550, -118.06857 33.8...",950,100,72176.0,residential
3,3,7024005004,"POLYGON ((-118.06987 33.86160, -118.07020 33.8...",2500,100,415616.0,residential
4,4,7024006008,"POLYGON ((-118.07091 33.85923, -118.07125 33.8...",2605,100,504556.0,residential


In [105]:
ins_gdf = process_geometry_SQL_insert(land_gdf)#[land_gdf.landuse != 'none'].copy())
ins_gdf.to_sql('temptable_{}'.format(CITY.lower()), engine, if_exists='replace', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [106]:
sql = """
UPDATE temptable_{tempname} p SET geom=ST_Multi(ST_buffer(p.geom, 0.0)) WHERE NOT ST_Isvalid(p.geom);
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [107]:
sql = """
INSERT INTO land_uses (bid, city, use_type, area) 
SELECT bid, '{city}', landuse, SUM(area) 
FROM (
    SELECT bid, landuse, area, ROW_NUMBER() OVER (PARTITION BY index ORDER BY area DESC) AS r
    from (
        SELECT p.index, p.\"ain\" as pid, p.landuse, d.bid, ST_Area(ST_Intersection(p.geom, d.geom)::geography) as area
        FROM temptable_{tempname} as p
        INNER JOIN blocks_group as d on ST_Intersects(p.geom, d.geom) AND NOT ST_Touches(p.geom, d.geom)
        WHERE d.city = '{city}' AND p.landuse <> 'none' AND ST_Isvalid(p.geom)
        ) as dtable
) x
WHERE x.r = 1
GROUP BY bid, landuse;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Land values

In [108]:
sql = """
INSERT INTO property_value (bid, area, value, city) 
SELECT bid, sqftmain, value, '{city}'
FROM (
    SELECT bid, sqftmain, value, ROW_NUMBER() OVER (PARTITION BY pid ORDER BY area DESC) AS r
    from (
        SELECT p.\"ain" as pid, p.sqftmain, d.bid, p.value, ST_Area(ST_Intersection(p.geom, d.geom)) as area
        FROM temptable_{tempname} as p
        INNER JOIN blocks_group as d on ST_Intersects(p.geom, d.geom) AND NOT ST_Touches(p.geom, d.geom)
        WHERE d.city = '{city}' AND p.landuse IN ('residential', 'commercial') AND ST_Isvalid(p.geom)
        ) as dtable
) x
WHERE x.r = 1;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Unused areas

In [109]:
unused_gdf = gpd.read_file('zip://../../data/LA/unused_areas/tl_2012_06_arealm.zip')
unused_gdf = unused_gdf[['geometry', 'MTFCC']]
unused_gdf = unused_gdf.to_crs({'init': 'epsg:4326'}) 

unused_gdf = unused_gdf[unused_gdf['MTFCC'].isin({'K2180', 'K2181', 'K2182', 'K2183', 'K2184', 'K2185',
                                                 'K2186', 'K2187', 'K2188', 'K2189', 'K2190'})].drop('MTFCC', axis=1)

unused_gdf['type'] = 'park'
unused_gdf['city'] = CITY
unused_gdf.head()

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,geometry,type,city
6,"POLYGON ((-120.67749 36.38351, -120.67749 36.3...",park,LA1m
7,"POLYGON ((-121.76418 38.00146, -121.76410 38.0...",park,LA1m
8,"POLYGON ((-121.94705 37.73679, -121.94646 37.7...",park,LA1m
9,"POLYGON ((-122.12754 38.01388, -122.12700 38.0...",park,LA1m
12,"POLYGON ((-122.15512 37.85694, -122.15510 37.8...",park,LA1m


In [110]:
ins_gdf = process_geometry_SQL_insert(unused_gdf)
ins_gdf.to_sql('unused_areas', engine, if_exists='append', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [111]:
unused_gdf = gpd.read_file('zip://../../data/LA/unused_areas/tl_2012_06037_areawater.zip')
unused_gdf = unused_gdf[['geometry']]
unused_gdf = unused_gdf.to_crs({'init': 'epsg:4326'}) 
unused_gdf['type'] = 'water'
unused_gdf['city'] = CITY
unused_gdf.head()

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,geometry,type,city
0,"POLYGON ((-118.19072 33.87436, -118.19052 33.8...",water,LA1m
1,"POLYGON ((-118.06469 34.06525, -118.06466 34.0...",water,LA1m
2,"POLYGON ((-118.16609 34.78007, -118.16609 34.7...",water,LA1m
3,"POLYGON ((-118.03496 33.86649, -118.03413 33.8...",water,LA1m
4,"POLYGON ((-118.24877 33.91260, -118.24887 33.9...",water,LA1m


In [112]:
ins_gdf = process_geometry_SQL_insert(unused_gdf)
ins_gdf.to_sql('unused_areas', engine, if_exists='append', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [113]:
unused_gdf = gpd.read_file('../../data/LA/unused_areas/parksandrivers.geojson')
unused_gdf = unused_gdf[['geometry']]
unused_gdf = unused_gdf.to_crs({'init': 'epsg:4326'}) 
unused_gdf['type'] = 'parksrivers'
unused_gdf['city'] = CITY
unused_gdf = unused_gdf[~unused_gdf.geometry.geom_type.isin(['LineString', 'Point']) ]
unused_gdf.head()

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,geometry,type,city
0,"MULTIPOLYGON (((-118.08760 33.80648, -118.0876...",parksrivers,LA1m
1,"POLYGON ((-118.19197 33.76320, -118.19199 33.7...",parksrivers,LA1m
2,"MULTIPOLYGON (((-118.71113 34.09592, -118.7092...",parksrivers,LA1m
3,"POLYGON ((-118.37852 34.01768, -118.37797 34.0...",parksrivers,LA1m
4,"POLYGON ((-118.35633 34.01135, -118.35623 34.0...",parksrivers,LA1m


In [114]:
ins_gdf = process_geometry_SQL_insert(unused_gdf)
ins_gdf.to_sql('temptable_{tempname}'.format(tempname=CITY.lower()), engine, if_exists='replace', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [115]:
sql = """
update temptable_{tempname} set geom=st_multi(st_buffer(geom, 0.0));
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [116]:
sql = """
DROP TABLE IF EXISTS temptable_unusedhelper_{tempname};
CREATE TEMPORARY TABLE temptable_unusedhelper_{tempname} AS
SELECT ST_Union(geom) as geom FROM unused_areas u 
WHERE city='{city}';

DROP TABLE IF EXISTS temptable_unusedhelper_exp_{tempname};
CREATE TEMPORARY TABLE temptable_unusedhelper_exp_{tempname} AS
SELECT (ST_Dump(geom)).geom FROM temptable_unusedhelper_{tempname} u;
CREATE INDEX ON temptable_unusedhelper_exp_{tempname} USING GIST (geom);
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [117]:
sql = """
update temptable_{tempname} t set geom=ST_Multi(st_buffer(ST_Difference(t.geom, h.geom), 0.0))
FROM temptable_unusedhelper_{tempname} h
WHERE st_intersects(t.geom, h.geom) AND (NOT ST_Touches(t.geom, h.geom)) AND ST_GeometryType(ST_Multi(ST_Difference(t.geom, h.geom))) <> 'ST_GeometryCollection';
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [118]:
sql = """
DELETE FROM temptable_{tempname} t 
USING temptable_unusedhelper_exp_{tempname} h
WHERE ST_Within(t.geom, h.geom) OR (st_intersects(t.geom, h.geom) AND (NOT ST_Touches(t.geom, h.geom)) AND ST_GeometryType(ST_Multi(ST_Difference(t.geom, h.geom))) = 'ST_GeometryCollection');
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [119]:
sql = """
update temptable_{tempname} set geom=st_multi(st_buffer(geom, 0.0));
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [120]:
sql = """
INSERT INTO unused_areas (geom, type, city) 
SELECT p.geom, p.type, p.city
FROM temptable_{tempname} as p
WHERE NOT EXISTS(SELECT * FROM unused_areas u WHERE ST_Intersects(u.geom, p.geom) AND (NOT ST_Touches(u.geom, p.geom)) AND u.city=p.city)
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [121]:
sql = """
update unused_areas set geom=st_multi(st_buffer(geom, 0.0)) WHERE city = '{city}';
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Net area

In [122]:
unique_land_gdf = land_gdf.copy()
unique_land_gdf.loc[:, 'x'] = unique_land_gdf.geometry.centroid.x
unique_land_gdf.loc[:, 'y'] = unique_land_gdf.geometry.centroid.y
unique_land_gdf = unique_land_gdf.drop_duplicates(subset=['x', 'y'])

In [123]:
ins_gdf = process_geometry_SQL_insert(unique_land_gdf)
ins_gdf.to_sql('temptable_{}'.format(CITY.lower()), engine, if_exists='replace', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [124]:
sql = """
UPDATE temptable_{tempname} p SET geom=ST_Multi(ST_buffer(p.geom, 0.0)) 
WHERE NOT ST_Isvalid(p.geom)
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [125]:
## This deletes the blocks that are related to streets
sql = """
DELETE FROM block b
WHERE city='{city}' and NOT EXISTS (
    select 1 
    from temptable_{tempname} t 
    where st_intersects(t.geom, b.geom) and landuse <> 'none'  
    HAVING SUM(ST_Area(st_intersection(t.geom, b.geom)))/ST_Area(b.geom) > 0.2);
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [126]:
sql = """
DELETE 
FROM temptable_{tempname} t
USING unused_areas u 
WHERE u.city = '{city}' AND ST_Intersects(u.geom, t.geom) AND (NOT ST_Touches(u.geom, t.geom)) 
AND (ST_Contains(u.geom, t.geom) OR ST_AREA(ST_Intersection(t.geom, u.geom))/ST_Area(t.geom) > 0.5);
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [127]:
sql = """
INSERT INTO spatial_groups_net_area (sp_id, city, spatial_name, used_area) 
SELECT sp_id, city, spatial_name, SUM(ST_Area(ST_Intersection(s.approx_geom, t.geom)::geography))/1000000.
FROM temptable_{tempname} t
INNER JOIN spatial_groups s ON ST_Intersects(s.approx_geom, t.geom) AND NOT ST_Touches(s.approx_geom, t.geom)
WHERE s.city = '{city}' 
GROUP BY sp_id, city, spatial_name;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Roads

In [5]:
sql = """
create index on planet_osm_line (highway);
INSERT INTO roads (geom, motorway, city) 
SELECT ST_MULTI(ST_LineSubstring(geom, 0.002*n/length,
  CASE
	WHEN 0.002*(n+1) < length THEN 0.002*(n+1)/length
	ELSE 1
  END)) As geom, (t.highway='motorway' OR t.highway='trunk'), city 
FROM
  (SELECT b.city, ST_LineMerge(p.way) AS geom, p.highway, 
  ST_Length(p.way) As length
  FROM planet_osm_line p
  INNER JOIN boundary b ON ST_Intersects(p.way, b.geom) AND NOT ST_Touches(p.way, b.geom)
  WHERE b.city = '{city}' AND p.highway <> 'service' AND p.highway <> 'path' AND p.highway IS NOT NULL
  ) AS t
CROSS JOIN generate_series(0, 50) AS n
WHERE n*0.002/length < 1;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## POIs

In [129]:
sql = """
REFRESH MATERIALIZED VIEW block_centroids;
"""

result = engine.execute(text(sql))

In [130]:
sql = """
REFRESH MATERIALIZED VIEW pois_requests;
"""

result = engine.execute(text(sql))

In [131]:
from collections import defaultdict
from joblib import Parallel, delayed

In [132]:
def make_trip(lon1, lat1, dest):
    # PERSONALIZE HERE
    r = requests.get(
        'http://localhost:5000/table/v1/foot/{lon1},{lat1};{dest}?annotations=distance&sources=0'.format(lon1=lon1, lat1=lat1, dest=dest))
    distances = r.json()['distances']
    distance = np.array(distances[0][1:], np.float32)
    distance[distance < 0] = 0
    return distance

def walkscore_list(bid, clon, clat, list_dests, ws, straight_distances):
    dists = make_trip(clon, clat, list_dests)
    straight_distances = np.array(straight_distances)
    #good_idxs = np.argwhere(dists < 1600)
    ws = np.array(ws)+ 0.00000001 #[good_idxs] 
    zeros_idxs = np.argwhere(dists == 0)
    dists[zeros_idxs] = 1
    if np.sum(ws) == 0 or len(ws) == 0:
        return bid, -1
    return bid, np.average(straight_distances/dists, weights=ws) #[good_idxs] [good_idxs]

cat_weights = {
    'grocery': [3],
    'Food': [.75,.45,.25,.25,.225,.225,.225,.225,.2,.2],
    'Shops': [.5,.45,.4,.35,.3],
    'Schools': [1],
    'Entertainment': [1],
    'Parks and outside': [1],
    'Coffee': [1.25,.75],
    'Banks': [1],
    'Books': [1]
}


def walkscore(meters):
	max_walk = 1500
	score = np.exp(-5 * (meters / max_walk) ** 2.5)
	score = np.clip(score, 0, 1)
	return score

def walkscore2_list(bid, clon, clat, list_dests, c):
    dists = make_trip(clon, clat, list_dests)
    #good_idxs = np.argwhere(dists < 1600)
    scores = np.sort(walkscore(dists))[::-1]
    n = len(cat_weights[c])
    d = np.zeros(n)
    d[:scores.shape[0]] = scores[:n]
    w = np.sum(np.array(d)*np.array(cat_weights[c]))
    assert w <= np.sum(cat_weights[c]) and w >= 0
    
    return bid, w #[good_idxs] [good_idxs]

In [133]:
sql = """
SELECT bid, lon, lat, dests, parent_cat FROM pois_requests WHERE city = '{city}'
""".format(city=CITY, tempname=CITY.lower())

blocks_df = pd.read_sql_query(sql, con=engine)
blocks_df.head()

Unnamed: 0,bid,lon,lat,dests,parent_cat
0,515785,-118.259246,33.801533,"-118.263873,33.807823;-118.266000,33.791000",Banks
1,515785,-118.260009,33.801522,"-118.263873,33.807823;-118.266000,33.791000",Banks
2,515785,-118.259983,33.801722,"-118.263873,33.807823;-118.266000,33.791000",Banks
3,515785,-118.261698,33.801963,"-118.263873,33.807823;-118.266724,33.791253",Banks
4,515785,-118.263092,33.801466,"-118.263873,33.807823;-118.266724,33.791253",Banks


In [134]:
sql = """
SELECT bid, COUNT(*) as size
FROM block_centroids b WHERE b.city = '{city}' 
GROUP BY bid
ORDER BY bid
""".format(city=CITY, tempname=CITY.lower())

n_blocks_df = pd.read_sql_query(sql, con=engine).set_index('bid')
n_blocks_df.head()

Unnamed: 0_level_0,size
bid,Unnamed: 1_level_1
515785,16
515786,16
515787,13
515788,14
515789,15


In [135]:
list(set(blocks_df.parent_cat.values))

['Books',
 'Banks',
 'Food',
 'Coffee',
 'Shops',
 'Entertainment',
 'grocery',
 'Schools']

In [136]:
block_groups = defaultdict(list)
for index, row in blocks_df.iterrows():
    block_groups[row['bid']].append(row.values[1:])

In [137]:
from tqdm import tqdm

print(len(blocks_df))
results = [(idx, score) for idx, score in Parallel(n_jobs=10)(delayed(walkscore2_list)(bid, req[0], req[1], req[2], req[3]) for bid, reqs in tqdm(block_groups.items()) for req in reqs)]

  0%|          | 0/2507 [00:00<?, ?it/s]

183044


100%|██████████| 2507/2507 [02:15<00:00, 18.46it/s] 


In [138]:
block_vacuum_index = defaultdict(list)
bid2size = {k: v['size'] for k, v in n_blocks_df.iterrows()}

for bid, score in results:
    block_vacuum_index[bid].append(score)
    
sum_cat_weights = np.sum([y for x in cat_weights.values() for y in x])

for bid, score in block_vacuum_index.items():
    if len(score) > 0:
        score = (np.sum(score)/bid2size[bid])/sum_cat_weights
        assert score <= 1.01
        sql = "INSERT INTO walk_index (bid, score, city) VALUES ({}, {}, '{}')".format(bid, score, CITY)
        result = engine.execute(text(sql))

## Crime

In [6]:
df = pd.read_csv('../../data/LA/crime/Crimes_2012-2015.csv.gz')
df.head()

Unnamed: 0,Date.Rptd,DR.NO,DATE.OCC,TIME.OCC,AREA,AREA.NAME,RD,Crm.Cd,CrmCd.Desc,Status,Status.Desc,LOCATION,Cross.Street,Location.1
0,03/20/2013,132007717,03/20/2013,2015,20,Olympic,2004,997,TRAFFIC DR #,UNK,Unknown,OXFORD,OAKWOOD,"(34.0776, -118.308)"
1,03/10/2013,130608787,03/10/2013,445,6,Hollywood,635,997,TRAFFIC DR #,UNK,Unknown,ODIN ST,CAHUENGA BL,"(34.1113, -118.3336)"
2,12/18/2013,131820260,12/18/2013,745,18,Southeast,1839,997,TRAFFIC DR #,UNK,Unknown,105TH ST,CROESUS AV,"(33.9406, -118.2338)"
3,10/18/2013,131817514,10/18/2013,1730,18,Southeast,1827,997,TRAFFIC DR #,UNK,Unknown,101ST ST,JUNIPER ST,"(33.9449, -118.2332)"
4,05/26/2013,130510483,05/25/2013,2000,5,Harbor,507,440,THEFT PLAIN - PETTY (UNDER $400),UNK,Unknown,1300 W SEPULVEDA BL,,"(33.8135, -118.2992)"


In [7]:
df = df[['DATE.OCC', 'TIME.OCC', 'CrmCd.Desc', 'Location.1']]
df.head()

Unnamed: 0,DATE.OCC,TIME.OCC,CrmCd.Desc,Location.1
0,03/20/2013,2015,TRAFFIC DR #,"(34.0776, -118.308)"
1,03/10/2013,445,TRAFFIC DR #,"(34.1113, -118.3336)"
2,12/18/2013,745,TRAFFIC DR #,"(33.9406, -118.2338)"
3,10/18/2013,1730,TRAFFIC DR #,"(33.9449, -118.2332)"
4,05/25/2013,2000,THEFT PLAIN - PETTY (UNDER $400),"(33.8135, -118.2992)"


In [8]:
print(df.count())
df = df.dropna()
print(df.count())
df = df[df['CrmCd.Desc'] != 'TRAFFIC DR #']
df.head()

DATE.OCC      935258
TIME.OCC      935258
CrmCd.Desc    935256
Location.1    930861
dtype: int64
DATE.OCC      930859
TIME.OCC      930859
CrmCd.Desc    930859
Location.1    930859
dtype: int64


Unnamed: 0,DATE.OCC,TIME.OCC,CrmCd.Desc,Location.1
4,05/25/2013,2000,THEFT PLAIN - PETTY (UNDER $400),"(33.8135, -118.2992)"
6,08/23/2014,2240,BURGLARY,"(34.0617, -118.2469)"
7,08/23/2014,1337,VIOLATION OF RESTRAINING ORDER,"(34.069, -118.3066)"
8,08/23/2014,1945,ROBBERY,"(34.0644, -118.2387)"
9,08/22/2014,825,VIOLATION OF RESTRAINING ORDER,"(34.1817, -118.4509)"


In [9]:
df['datetime'] = pd.to_datetime(df['DATE.OCC'] + ' ' + df['TIME.OCC'].map(str).str.zfill(4), format='%m/%d/%Y %H%M')
df.head()

Unnamed: 0,DATE.OCC,TIME.OCC,CrmCd.Desc,Location.1,datetime
4,05/25/2013,2000,THEFT PLAIN - PETTY (UNDER $400),"(33.8135, -118.2992)",2013-05-25 20:00:00
6,08/23/2014,2240,BURGLARY,"(34.0617, -118.2469)",2014-08-23 22:40:00
7,08/23/2014,1337,VIOLATION OF RESTRAINING ORDER,"(34.069, -118.3066)",2014-08-23 13:37:00
8,08/23/2014,1945,ROBBERY,"(34.0644, -118.2387)",2014-08-23 19:45:00
9,08/22/2014,825,VIOLATION OF RESTRAINING ORDER,"(34.1817, -118.4509)",2014-08-22 08:25:00


In [10]:
df['Location.1'] = df['Location.1'].str.replace('(', '')
df['Location.1'] = df['Location.1'].str.replace(')', '')

In [11]:
df['lng'] = df['Location.1'].str.split(', ').str[1]
df['lat'] = df['Location.1'].str.split(', ').str[0]
df['num'] = 1
df.head()

Unnamed: 0,DATE.OCC,TIME.OCC,CrmCd.Desc,Location.1,datetime,lng,lat,num
4,05/25/2013,2000,THEFT PLAIN - PETTY (UNDER $400),"33.8135, -118.2992",2013-05-25 20:00:00,-118.2992,33.8135,1
6,08/23/2014,2240,BURGLARY,"34.0617, -118.2469",2014-08-23 22:40:00,-118.2469,34.0617,1
7,08/23/2014,1337,VIOLATION OF RESTRAINING ORDER,"34.069, -118.3066",2014-08-23 13:37:00,-118.3066,34.069,1
8,08/23/2014,1945,ROBBERY,"34.0644, -118.2387",2014-08-23 19:45:00,-118.2387,34.0644,1
9,08/22/2014,825,VIOLATION OF RESTRAINING ORDER,"34.1817, -118.4509",2014-08-22 08:25:00,-118.4509,34.1817,1


### Crime types

In [12]:
crime_types_df = pd.read_csv('../../data/crime_types/LA_types_categorized.csv')[['CrmCd.Desc', 'UCR part', 'UCR1']]
crime_types_df['UCR part'] = crime_types_df['UCR part'].astype(int)
crime_types_df.head()

Unnamed: 0,CrmCd.Desc,UCR part,UCR1
0,BATTERY - SIMPLE ASSAULT,2,
1,BURGLARY FROM VEHICLE,1,Larceny-theft (except motor vehicle theft)
2,VEHICLE - STOLEN,1,Motor vehicle theft
3,BURGLARY,1,Burglary (breaking or entering)
4,THEFT OF IDENTITY,2,


In [13]:
crime_types_df.loc[crime_types_df['UCR1'] == 'Larceny-theft (except motor vehicle theft)', 'UCR part'] = '12'

In [14]:
crime_types_df[crime_types_df['UCR part']==1]

Unnamed: 0,CrmCd.Desc,UCR part,UCR1
2,VEHICLE - STOLEN,1,Motor vehicle theft
3,BURGLARY,1,Burglary (breaking or entering)
7,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",1,Aggravated assault
9,ROBBERY,1,Robbery
29,"BURGLARY, ATTEMPTED",1,Burglary (breaking or entering)
30,ATTEMPTED ROBBERY,1,Robbery
34,SPOUSAL (COHAB) ABUSE - AGGRAVATED ASSAULT,1,Aggravated assault
35,"RAPE, FORCIBLE",1,Rape
49,ARSON,1,Arson
50,VEHICLE - ATTEMPT STOLEN,1,Motor vehicle theft


In [15]:
print(df['num'].count())
df = pd.merge(df, crime_types_df, on='CrmCd.Desc')
print(df['num'].count())

765944
765944


In [16]:
df = df[df['UCR part'] == 1]
print(df['num'].count())

194843


#### Subtypes of crimes

In [17]:
ucr_crimes_df = pd.read_csv('../../data/crime_types/UCR_crimes.csv')
ucr_crimes_df.head()

Unnamed: 0,Name,Category
0,Criminal homicide,Violent crime
1,Rape,Violent crime
2,Robbery,Violent crime
3,Aggravated assault,Violent crime
4,Burglary (breaking or entering),Property crime


In [18]:
list(set(df['UCR1'].values))

['Aggravated assault',
 'Arson',
 'Rape',
 'Motor vehicle theft',
 'Criminal homicide',
 'Robbery',
 'Burglary (breaking or entering)']

In [19]:
df.dtypes

DATE.OCC              object
TIME.OCC               int64
CrmCd.Desc            object
Location.1            object
datetime      datetime64[ns]
lng                   object
lat                   object
num                    int64
UCR part              object
UCR1                  object
dtype: object

In [20]:
df_ucr1 = pd.merge(df, ucr_crimes_df.rename(columns={'Name': 'UCR1'}), on='UCR1')

a = set(df_ucr1['UCR1'].drop_duplicates().values)
b = set(df['UCR1'].drop_duplicates().values)
assert(a.intersection(b) == a)

# Categories not present in crime dataset
df_ucr1[~(df_ucr1['UCR1'].isin(b))]

Unnamed: 0,DATE.OCC,TIME.OCC,CrmCd.Desc,Location.1,datetime,lng,lat,num,UCR part,UCR1,Category


In [21]:
df_2013 = df_ucr1[df_ucr1['datetime'].dt.year == 2013][['lng', 'lat', 'CrmCd.Desc', 'num', 'UCR1', 'Category']]
df_2013['lng'] = df_2013['lng'].astype('float32')
df_2013['lat'] = df_2013['lat'].astype('float32')
df_2013 = df_2013.rename(columns={'CrmCd.Desc': 'description'})
df_2013.count()

lng            46426
lat            46426
description    46426
num            46426
UCR1           46426
Category       46426
dtype: int64

In [22]:
from geopandas import GeoDataFrame
from shapely.geometry import Point

geometry = [Point(xy) for xy in zip(df_2013.lng, df_2013.lat)]
df_2013 = df_2013.drop(['lng', 'lng'], axis=1)
crs = {'init': 'epsg:4326'}
gdf = GeoDataFrame(df_2013, crs=crs, geometry=geometry)
gdf.head()

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,lat,description,num,UCR1,Category,geometry
15114,34.056801,BURGLARY,1,Burglary (breaking or entering),Property crime,POINT (-118.28210 34.05680)
15115,34.0326,BURGLARY,1,Burglary (breaking or entering),Property crime,POINT (-118.34710 34.03260)
15116,34.0499,BURGLARY,1,Burglary (breaking or entering),Property crime,POINT (-118.36800 34.04990)
15117,34.037102,BURGLARY,1,Burglary (breaking or entering),Property crime,POINT (-118.32480 34.03710)
15118,34.204899,BURGLARY,1,Burglary (breaking or entering),Property crime,POINT (-118.47490 34.20490)


In [23]:
insert_gdf = process_geometry_SQL_insert(gdf)
insert_gdf.to_sql('temptable_{}'.format(CITY.lower()), engine, if_exists='replace', index=True, dtype={'geom': Geometry('Point', srid=4326)})

In [24]:
sql = """
insert into crime (sp_id, num, city, ucr1, ucr_category) 
select bid, SUM(num), '{city}', "UCR1", "Category" from(
SELECT num, bid, "UCR1", "Category", ROW_NUMBER() OVER (PARTITION BY index) AS r
from (
select c.index, c.num, b.bid, "UCR1", "Category"
from temptable_{tempname} as c
inner join blocks_group as b on ST_Intersects(b.geom, st_buffer(c.geom::geography, 30)::geometry)
where b.city='{city}'
    ) as dtable
) x
group by bid, "UCR1", "Category";
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Refresh materialized views

In [None]:
sql = """
REFRESH MATERIALIZED VIEW join_building_ways;
"""

result = engine.execute(text(sql))

In [118]:
sql = """
REFRESH MATERIALIZED VIEW spatial_groups_unused_areas;
"""

result = engine.execute(text(sql))

In [72]:
sql = """
REFRESH MATERIALIZED VIEW block_building;
"""

result = engine.execute(text(sql))

In [73]:
sql = """
REFRESH MATERIALIZED VIEW blocks_group_with_building;
"""

result = engine.execute(text(sql))