In [1]:
import geopandas
import pandas as pd
import numpy as np
from shapely.validation import make_valid
import folium

In [2]:
#read in tax parcel data from summit gis and drop NAs
tax_parcels = geopandas.read_file("./data/Tax_Parcels.dbf")
tax_parcels.dropna(subset = ['geometry'], inplace=True)

#read in jurisdiction data from summit gis
jurisdictions = geopandas.read_file("./data/Jurisdictions.dbf")

#runtime for this on macbook pro is usually 2min 20s

In [3]:
#find akron jurisdiction, save to file, and make the geometry valid
akron_juris = jurisdictions[jurisdictions['NAME'] == 'AKRON']
akron_juris['geometry'] = make_valid(akron_juris.geometry.item())
akron_juris.to_file("./data/akron_juris.shp")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [4]:
#read in road centerline data, clip to just the city jurisdiction, and save out as akron roads
roads = geopandas.read_file("./data/RoadCenterline.shp")
akron_roads = geopandas.clip(roads, akron_juris['geometry'].item())
akron_roads.to_file("./data/akron_roads.shp")
#takes ~40s on macbook pro

In [5]:
#find nonvalid parcels in tax_parcels, then make them valid one by one (there's ~11 or so nonvalid parcels that have to be adjusted)
nonvalid_parcels = tax_parcels[['OBJECTID', 'geometry']][~tax_parcels.geometry.is_valid]

for parcelID in nonvalid_parcels['OBJECTID']:
    tax_parcels['geometry'][tax_parcels['OBJECTID'] == parcelID] = make_valid(nonvalid_parcels['geometry'][nonvalid_parcels['OBJECTID'] == parcelID].item())

In [6]:
#clip tax_parcels for entire county to just akron
akron_parcels = geopandas.clip(tax_parcels, akron_juris['geometry'].item())
#takes ~35s

In [7]:
#read in parcel data and land data from summit county fiscal office
pardat = geopandas.read_file("./data/Parcel_short.csv")
land = geopandas.read_file("./data/Land_short.csv")
#takes 29s

In [8]:
#merge pardat and land with parcels to get full data
akron_pardat = akron_parcels.merge(pardat, how = 'left', left_on = 'parcelid', right_on = 'PPN')
akron_pardat_land = akron_pardat.merge(land, how = 'left', left_on = 'parcelid', right_on = 'PPN')

In [9]:
#drop columns we aren't concerned with
akron = akron_pardat_land.drop(columns=['OBJECTID', 'OBJECTID_1', 'lowparceli', 'building', 'unit',
        'lglstartdt', 'cvttxcd', 'cvttxdscrp', 'schltxcd',
       'schldscrp', 'usecd', 'usedscrp', 'NGHBRHDCD', 'classcd', 'classdscrp',
       'siteaddres', 'prprtydscr', 'cnvyname', #'ownernme1', 'ownernme2',
       'pstladdres', 'pstlcity', 'pstlstate', 'pstlzip5', 'pstlzip4',
       'floorcount', 'bldgarea', 'resflrarea', 'resyrblt', 'resstrtyp',
       'strclass', 'classmod', 'lndvalue', 'prvassdval', 'cntassdval',
       'assdvalyrc', 'assdpcntcg', 'prvtxblval', 'cnttxblval', 'txblvalyrc',
       'txblpcntch', 'prvwnttxod', 'prvsmrtxod', 'totprvttxt', 'cntwnttxod',
       'cntsmrtxod', 'totcnttxod', 'txodyrchg', 'txodpcntch', 'waterserv',
       'sewerserv', 'cntmarval', 'altid', 'oldtaxmap',
       'resstrtypv', 'PPN_x',
       'Addr1', 'Addr3', 'Str1', 'Str2', 'geometry_y', 'PPN_y',
        'LandCode', 'Column7', 'Column8', 'Column9',  'DepthTbl', 'DepthFactor', 'Value',
       'geometry', 'statedarea', 'Shape_Leng', 'Shape_Area', 'SF', 'Effect_Fr', 'Acre'])

In [10]:
#turn frontage and depth into float values
akron['Act_Fr'] = akron['Act_Fr'].replace('',np.nan).astype(float)
akron['Depth'] = akron['Depth'].replace('',np.nan).astype(float)

In [11]:
#rename auto-generated geometry_x column back to geometry
akron.rename(columns = {'geometry_x':'geometry'}, inplace = True)

In [12]:
#get only the lots of type F (lots that have frontage measured, applied to residential lots)
akron_F = akron[akron['Type'] == 'F']

In [13]:
#calculate square footage
akron_F['sqft'] = akron_F['Act_Fr']*akron_F['Depth']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  akron_F['sqft'] = akron_F['Act_Fr']*akron_F['Depth']


In [14]:
#get where the frontage and square footage don't meet the city's requirements for buildability (lot must be over 5000 sqft and have at least 50ft in frontage)
akron_frontage = akron_F[akron_F['Act_Fr'] < 50]
akron_sqftage = akron_F[akron_F['sqft'] < 5000]

In [15]:
#add lots that don't meet either frontage and sqftage to same list. drop the duplicates of the lots that don't meet either requirement
akron_unbuildable = pd.concat([akron_frontage, akron_sqftage], ignore_index = True).drop_duplicates()

In [16]:
#to keep things reasonable, we're only looking at frontage.
#this is in part because the city of akron website says that an unbuildable parcel is merely a lot having less than a 50-foot frontage.
#technically, a buildable parcel is defined as a lot having a minimum of 50-foot frontage and a lot size of greater than 5,000 sq. ft, which means
#that lots under 5000 sqft aren't buildable, but they aren't "unbuildable" unless the frontage is too little.
#we're also narrowing down the range of unbuildable from [0,50) to [25,45).
#this means that strange tiny lots that have actually been combined with other lots without updating the data aren't included
#before this, lots as small as 5ft in frontage were included, and those aren't what we're looking for
#the upper bound was moved down to 45 because the measurements aren't always accurate down to the foot, and we didn't want to include
#lots that are, for example, 49ft in frontage in the dataset, but in reality are 50ft and thus buildable
akron_frontage_25to45 = akron_F[(akron_F['Act_Fr'] < 45) & (akron_F['Act_Fr'] >= 25)]

In [17]:
#set vacant boolean column based on building value (if building value is 0, the lot is vacant)
#set as geodataframe for proper geometry and save out to file for future use
akron_frontage_25to45['Vacant'] = akron_frontage_25to45['bldgvalue'] == 0
akron_frontage_25to45 = geopandas.GeoDataFrame(akron_frontage_25to45)
akron_frontage_25to45.to_file("./data/akron_frontage_25to45.dbf")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  akron_frontage_25to45['Vacant'] = akron_frontage_25to45['bldgvalue'] == 0


In [18]:
#print number of vacant unbuildable lots and non-vacant unbuildable lots to see how many we have
print("Number of Vacant Unbuildable Lots:", akron_frontage_25to45['Vacant'].sum())
print("Number of Unbuildable Lots with Residential Buildings:", akron_frontage_25to45.shape[0] - akron_frontage_25to45['Vacant'].sum())

Number of Vacant Unbuildable Lots: 6285
Number of Unbuildable Lots with Residential Buildings: 21606


In [20]:
print("Number of Vacant Unbuildable Lots Owned by the City:", akron_frontage_25to45[akron_frontage_25to45['LUC'] == '640'].shape[0])

Number of Vacant Unbuildable Lots Owned by the City: 600
