# Create Building Intelligence Feature Space
#### Daniel Fay

In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
foreclosures = gpd.read_file('../Data/processed/shapefiles/FC_count.shp')
police = gpd.read_file('../Data/processed/shapefiles/PD_count.shp')
complaints = gpd.read_file('../Data/processed/shapefiles/complaint_count.shp')
owners = gpd.read_file('../Data/Master_Building/CH_Only_Clip.shp')
code_enforcement = gpd.read_file('../Data/processed/shapefiles/viol_agg_final_shp.shp')
tree_scores = pd.read_csv('../Data/processed/tree_scores.csv')
building_permits = pd.read_csv('../Data/processed/bldg_permits_agg.csv')

In [3]:
addresses = []
for ix,row in owners.iterrows():
    address = str(row['parcel_add'])[:-7]
    addresses.append(address)

owners['parcel_add'] = addresses

### Foreclosures

In [4]:
foreclosures = foreclosures[['PPN', 'Count']]
foreclosures.columns = ['PPN', 'FC_count']

In [5]:
owners.PARCELPIN = owners.PARCELPIN.astype(str)
foreclosures.PPN = foreclosures.PPN.astype(str)

In [6]:
featureSpace = pd.merge(owners, foreclosures, how='left', left_on='PARCELPIN', right_on='PPN')

In [7]:
featureSpace.drop(['PPN'], axis=1, inplace=True)

### Police Dispatch

In [8]:
police['address'] = map(lambda x: x.upper(), police['address'])
police['address'] = police['address'] + ', CLEVELAND HEIGHTS, OH'

In [9]:
featureSpace = gpd.sjoin(featureSpace, police, how='left', op='intersects')

In [10]:
featureSpace.drop(['address', 'index_right'], axis=1, inplace=True)

In [11]:
counts = featureSpace[['parcel_add','PARCELPIN', 'PD_Count']]
counts = counts.groupby(['parcel_add', 'PARCELPIN']).sum()
counts.reset_index(inplace=True)

In [12]:
featureSpace.drop(['PD_Count'], axis=1, inplace=True)
featureSpace.drop_duplicates(['parcel_add', 'PARCELPIN'], inplace=True)

In [13]:
featureSpace['PD_count'] = counts.PD_Count

### Complaints

In [14]:
addresses = []
for ix,row in complaints.iterrows():
    address = row['Address'].split(',')[0] + ', Cleveland Heights, OH'
    addresses.append(address)

complaints.Address = addresses
complaints['Address'] = map(lambda x: x.upper(), complaints['Address'])

In [15]:
featureSpace = gpd.sjoin(featureSpace, complaints, how='left', op='intersects')
featureSpace.drop(['Address', 'index_right'], axis=1, inplace=True)

In [16]:
counts = featureSpace[['parcel_add','PARCELPIN', 'Complaint_']]
counts = counts.groupby(['parcel_add', 'PARCELPIN']).sum()
counts.reset_index(inplace=True)

In [17]:
featureSpace.drop(['Complaint_'], axis=1, inplace=True)
featureSpace.drop_duplicates(['parcel_add', 'PARCELPIN'], inplace=True)

In [18]:
featureSpace['Comp_count'] = counts.Complaint_

### Code Enforcement

In [19]:
code_enforcement = code_enforcement[['address', 'negCodecnt', 'last_case', 'own_occup', 'geometry']]

In [20]:
featureSpace = gpd.sjoin(featureSpace, code_enforcement, how='left', op='intersects')
featureSpace.drop(['address', 'index_right'], axis=1, inplace=True)

In [21]:
counts = featureSpace[['parcel_add','PARCELPIN', 'negCodecnt']]
counts = counts.groupby(['parcel_add', 'PARCELPIN']).sum()
counts.reset_index(inplace=True)

In [22]:
featureSpace.drop(['negCodecnt'], axis=1, inplace=True)
featureSpace.drop_duplicates(['parcel_add', 'PARCELPIN'], inplace=True)

In [23]:
featureSpace['NegAct_count'] = counts.negCodecnt

### Tree Scores

In [24]:
def fix_address(row):
    num = row['Address'].split(' ')[2]
    st = row['Address'].split(' ')[0] + ' ' + row['Address'].split(' ')[1]
    add = num + ' ' + st + ', CLEVELAND HEIGHTS, OH'
    return add

tree_scores['Address'] = tree_scores.apply(fix_address, axis=1)
tree_scores['Address'] = map(lambda x: x.upper(), tree_scores['Address'])

In [25]:
featureSpace = pd.merge(featureSpace, tree_scores, how='left', left_on='parcel_add', right_on='Address')
featureSpace.drop('Address', axis=1, inplace=True)

### Building Permits

In [26]:
building_permits = building_permits[['parcelID', 'perm_num']]

In [27]:
featureSpace = pd.merge(featureSpace, building_permits, how='left', left_on='PARCEL_ID', right_on='parcelID')
featureSpace.drop('parcelID', axis=1, inplace=True)

In [35]:
featureSpace[['FC_count', 'PD_count', 'own_occup','Comp_count', 'NegAct_count', 'perm_num']] = featureSpace[['FC_count', 'PD_count', 'own_occup', \
                                                                                                 'Comp_count','NegAct_count', 'perm_num']].fillna(value=0)


In [36]:
featureSpace_shp = gpd.GeoDataFrame(featureSpace, crs = {'init': 'epsg:4326'})

In [37]:
featureSpace_shp.to_file('../Data/processed/shapefiles/featureSpace.shp')