In [2]:
from sqlalchemy import create_engine

engine = create_engine('postgresql://localhost:5432/zoning')

In [3]:
import pandas as pd
import geopandas as gpd
from shapely import wkt

def gpd_from_csv(csv_file, geometry_col, **kwargs):
    assert geometry_col != 'geometry'

    df = pd.read_csv(csv_file, **kwargs)
    
    df = df[df[geometry_col].apply(lambda x: isinstance(x, str))]
    df['geometry'] = df[geometry_col].apply(wkt.loads)
    df = df.drop(columns=[geometry_col])

    gdf = gpd.GeoDataFrame(df, geometry='geometry')
    # I *think* this is what sf data gov uses
    gdf.set_crs(epsg=4326, inplace=True)
    # Useful for calculating area
    gdf = gdf.to_crs(epsg=3857)
    return gdf

In [21]:
# ETL parcels

prc = gpd_from_csv('../data/Parcels___Active_and_Retired_20240807.csv', 'shape')

prc.to_postgis('prc', engine, if_exists='replace')

prc

Unnamed: 0,mapblklot,blklot,block_num,lot_num,from_address_num,to_address_num,street_name,street_type,odd_even,in_asr_secured_roll,pw_recorded_map,zoning_code,zoning_district,date_rec_add,date_rec_drop,date_map_add,date_map_drop,date_map_alt,project_id_add,project_id_drop,project_id_alt,active,centroid_latitude,centroid_longitude,supdist,supervisor_district,supdistpad,numbertext,supname,analysis_neighborhood,police_district,police_company,planning_district,planning_district_number,data_as_of,data_loaded_at,geometry
0,6550046,6550048,6550,048,3861.0,3861.0,25TH,ST,O,True,True,RH-2,"RESIDENTIAL- HOUSE, TWO FAMILY",1980-10-03,,1998-07-01,,,ORIG_BASEMAP,,,True,37.749907,-122.426374,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Noe Valley,MISSION,D,Central,7.0,2024/08/06 02:50:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13628444.767 4544133.666, -13..."
1,6550046,6550049,6550,049,3863.0,3863.0,25TH,ST,O,True,True,RH-2,"RESIDENTIAL- HOUSE, TWO FAMILY",1980-10-03,,1998-07-01,,,ORIG_BASEMAP,,,True,37.749907,-122.426374,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Noe Valley,MISSION,D,Central,7.0,2024/08/06 02:50:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13628444.767 4544133.666, -13..."
2,6550050,6550050,6550,050,3869.0,3871.0,25TH,ST,O,True,True,RH-2,"RESIDENTIAL- HOUSE, TWO FAMILY",1983-01-28,,1998-07-01,,,ORIG_BASEMAP,,,True,37.749896,-122.426546,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Noe Valley,MISSION,D,Central,7.0,2024/08/06 02:50:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13628463.963 4544132.2, -1362..."
3,6550051,6550051,6550,051,3865.0,3865.0,25TH,ST,O,True,True,RH-2,"RESIDENTIAL- HOUSE, TWO FAMILY",1983-01-28,,1998-07-01,,,ORIG_BASEMAP,,,True,37.749901,-122.426460,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Noe Valley,MISSION,D,Central,7.0,2024/08/06 02:50:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13628454.365 4544132.933, -13..."
4,6550053,6550053,6550,053,3891.0,3891.0,25TH,ST,O,True,True,RH-2,"RESIDENTIAL- HOUSE, TWO FAMILY",1991-10-22,,1998-07-01,,,ORIG_BASEMAP,,,True,37.749934,-122.426988,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Noe Valley,MISSION,D,Central,7.0,2024/08/06 02:50:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13628506.165 4544173.184, -13..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235682,3211019,3211019,3211,019,189.0,189.0,SANTA YNEZ,AVE,O,True,True,RH-1,"RESIDENTIAL- HOUSE, ONE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.725062,-122.441626,SUPERVISORIAL DISTRICT 11,11.0,11.0,ELEVEN,Ahsha Safai,Outer Mission,INGLESIDE,H,South Central,12.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13630150.281 4540634.639, -13..."
235683,3211020,3211020,3211,020,179.0,179.0,SANTA YNEZ,AVE,O,True,True,RH-1,"RESIDENTIAL- HOUSE, ONE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.725023,-122.441555,SUPERVISORIAL DISTRICT 11,11.0,11.0,ELEVEN,Ahsha Safai,Outer Mission,INGLESIDE,H,South Central,12.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13630142.36 4540629.153, -136..."
235684,3211021,3211021,3211,021,177.0,177.0,SANTA YNEZ,AVE,O,True,True,RH-1,"RESIDENTIAL- HOUSE, ONE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.724984,-122.441483,SUPERVISORIAL DISTRICT 11,11.0,11.0,ELEVEN,Ahsha Safai,Outer Mission,INGLESIDE,H,South Central,12.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13630134.439 4540623.666, -13..."
235685,3211022,3211022,3211,022,171.0,171.0,SANTA YNEZ,AVE,O,True,True,RH-1,"RESIDENTIAL- HOUSE, ONE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.724946,-122.441412,SUPERVISORIAL DISTRICT 11,11.0,11.0,ELEVEN,Ahsha Safai,Outer Mission,INGLESIDE,H,South Central,12.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13630126.518 4540618.18, -136..."


In [19]:
# only parcels where active is true
# prc = prc[prc.active == True]

# prc['is_public'] = prc.zoning_district.str.contains('PUBLIC|TREASURE ISLAND', regex=True)
# prc = prc[prc.is_public == False]

# find overlapping parcels
# overlaps = gpd.sjoin(prc, prc, how='inner')
# overlaps
# prc[prc.neighborhood == 'Castro']
# pd.set_option('display.max_columns', None)
# prc[(prc.street_name == 'NOE') & (prc.to_address_num < 500) & (prc.to_address_num > 400)]
# prc[prc.street_name == 'NOE', prc.lot_num == '021']

Unnamed: 0,mapblklot,blklot,block_num,lot_num,from_address_num,to_address_num,street_name,street_type,odd_even,in_asr_secured_roll,pw_recorded_map,zoning_code,zoning_district,date_rec_add,date_rec_drop,date_map_add,date_map_drop,date_map_alt,project_id_add,project_id_drop,project_id_alt,active,centroid_latitude,centroid_longitude,supdist,supervisor_district,supdistpad,numbertext,supname,analysis_neighborhood,police_district,police_company,planning_district,planning_district_number,data_as_of,data_loaded_at,geometry,is_public
94054,3581029B,3581029B,3581,029B,487.0,489.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.761295,-122.43257,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629142.526 4545753.728, -13...",False
94056,3581031,3581031,3581,031,479.0,479.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.761437,-122.432496,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629111.349 4545775.496, -13...",False
94057,3581032,3581032,3581,032,471.0,471.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.761505,-122.432503,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629144.718 4545782.648, -13...",False
94058,3581033,3581033,3581,033,465.0,469.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.761574,-122.43251,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629145.449 4545792.287, -13...",False
94059,3581034,3581034,3581,034,461.0,461.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.761642,-122.432516,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629146.18 4545801.927, -136...",False
94087,3581064,3581064,3581,064,437.0,439.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.762039,-122.432555,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629150.42 4545857.839, -136...",False
94088,3581065,3581065,3581,065,431.0,433.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.762108,-122.432561,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629151.151 4545867.479, -13...",False
94089,3581067,3581067,3581,067,423.0,425.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.762245,-122.432575,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629110.696 4545894.221, -13...",False
94090,3581068,3581068,3581,068,415.0,415.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",,,1998-07-01,,,ORIG_BASEMAP,,,True,37.762409,-122.432684,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629153.344 4545896.399, -13...",False
94112,3581092,3581092,3581,092,429.0,429.0,NOE,ST,O,True,True,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",1997-07-09,,1999-03-15,,,1998C35814,,,True,37.762176,-122.432568,SUPERVISORIAL DISTRICT 8,8.0,8.0,EIGHT,Rafael Mandelman,Castro/Upper Market,MISSION,D,Central,7.0,2024/08/06 02:49:00 PM,2024/08/06 10:03:10 PM,"MULTIPOLYGON (((-13629151.882 4545877.119, -13...",False


In [22]:
# ETL zoning

hgt = gpd_from_csv('../data/Zoning_Map_-_Height_and_Bulk_Districts_20240807.csv', 'the_geom')

# convert gen_hght from string to int
hgt['gen_hght'] = hgt['gen_hght'].astype(int)

# set values higher than 1000 to 0
hgt.loc[hgt['gen_hght'] > 1000, 'gen_hght'] = 0

hgt.to_postgis('zoning_height', engine, if_exists='replace')

hgt

Unnamed: 0,height,gen_hght,geometry
0,450-S,450,"MULTIPOLYGON (((-13625269.968 4549465.605, -13..."
1,450-S,450,"MULTIPOLYGON (((-13625269.968 4549465.605, -13..."
2,450-S,450,"MULTIPOLYGON (((-13625187.726 4549582.992, -13..."
3,45-X,45,"MULTIPOLYGON (((-13631027.449 4549264.617, -13..."
4,67-X,67,"MULTIPOLYGON (((-13630723.316 4549330.85, -136..."
...,...,...,...
1190,75-TI,75,"MULTIPOLYGON (((-13622539.832 4554355.103, -13..."
1191,75-TI,75,"MULTIPOLYGON (((-13622671.971 4554539.132, -13..."
1192,75-TI/240 Flex Zone-TI,240,"MULTIPOLYGON (((-13622794.027 4554776.684, -13..."
1193,75-TI/240 Flex Zone-TI,240,"MULTIPOLYGON (((-13622573.539 4554348.511, -13..."


In [23]:
# ETL assessor

assessor = gpd_from_csv('../data/assessor.csv', 'the_geom', usecols=['Number of Stories', 'the_geom'])

# Salesforce tower is 61, anything above that is baloney
assessor.loc[assessor['Number of Stories'] > 70, 'Number of Stories'] = 0 

assessor.to_postgis('assessor', engine, if_exists='replace')