In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import glob, os
import numpy as np
import matplotlib.pyplot as plt
import datetime  as dt
import seaborn as sns

In [3]:
data_folder = 'merge_data'

In [4]:
# data folder path
data_directory = os.path.join('..','data','clean_data/{}/crime_beats.csv'.format(data_folder))
data_directory_saves = os.path.join( '..','data','clean_data','merge_data/')

In [5]:
df = pd.read_csv(data_directory)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85578 entries, 0 to 85577
Data columns (total 8 columns):
date            85578 non-null object
hour            85578 non-null int64
beat            85578 non-null object
offense_type    85578 non-null object
block_range     85578 non-null object
street_name     85578 non-null object
premise         85578 non-null object
num_offenses    85578 non-null int64
dtypes: int64(2), object(6)
memory usage: 5.2+ MB


In [8]:
df.head()

Unnamed: 0,date,hour,beat,offense_type,block_range,street_name,premise,num_offenses
0,1916-05-23,19,10H70,Aggravated Assault,UNK,LIBERTY ROAD,Residence or House,1
1,1917-02-20,16,10H70,Theft,7500-7599,ARDMORE,Other Parking Lot,1
2,1963-02-02,13,10H40,Theft,3800-3899,MAIN,REHABILITATION CENTER,1
3,1966-01-01,0,10H50,Rape,3300-3399,ALABAMA,APARTMENT,1
4,1971-02-03,6,1A10,Theft,1200-1299,TRAVIS,Residence or House,1


## cleanup premise column

In [9]:
len(df.premise.unique())

335

In [10]:
df.premise.value_counts()[:10]

Road, Street, or Sidewalk        7325
ROAD/STREET/SIDEWALK             7063
APARTMENT                        3982
APARTMENT PARKING LOT            3681
RESIDENCE/HOUSE                  3605
UNK                              3598
Residence or House               3229
OTHER PARKING LOT                3101
COMMERCIAL PARKING LOT/GARAGE    2541
Apartment Parking Lot            2492
Name: premise, dtype: int64

## lowercase all

In [11]:
df.premise = df.premise.str.lower()

In [12]:
len(df.premise.unique())

278

In [13]:
df.premise.value_counts()[:10]

road, street, or sidewalk        7325
road/street/sidewalk             7063
apartment parking lot            6173
apartment                        6159
other parking lot                4891
residence/house                  3605
unk                              3598
residence or house               3229
commercial parking lot/garage    2541
hospital                         2162
Name: premise, dtype: int64

In [14]:
def clean_premise(pat,df):
    fil = df.premise.str.contains(pat,regex=True)
    result = df[fil].premise.value_counts()
    return result

In [15]:
def change_premise(pat,df,new_premise):
    df.premise = df.premise.str.replace(pat,new_premise,regex=True)

## road

In [20]:
word1 = 'road'
word2 = 'street'

In [21]:
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

road, street, or sidewalk                                                                                                                                 7325
road/street/sidewalk                                                                                                                                      7063
road, street, or sidewalk                                                                                                                                  220
Name: premise, dtype: int64

In [22]:
change_premise(pat,df,'road_street_sidewalk')

In [23]:
df.premise.value_counts()[:10]

road_street_sidewalk             14608
apartment parking lot             6173
apartment                         6159
other parking lot                 4891
residence/house                   3605
unk                               3598
residence or house                3229
commercial parking lot/garage     2541
hospital                          2162
other/unknown                     1967
Name: premise, dtype: int64

## apartment parking

In [24]:
word1 = 'apartment'
word2 = 'parking'

In [25]:
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

apartment parking lot                                                                                                                                     6173
apartment parking lot                                                                                                                                       75
Name: premise, dtype: int64

In [26]:
change_premise(pat,df,'apartment_parking')

In [27]:
df.premise.value_counts()[:10]

road_street_sidewalk             14608
apartment_parking                 6248
apartment                         6159
other parking lot                 4891
residence/house                   3605
unk                               3598
residence or house                3229
commercial parking lot/garage     2541
hospital                          2162
other/unknown                     1967
Name: premise, dtype: int64

## residence

In [28]:
word1 = 'residence'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

residence/house                                                                                                                                           3605
residence or house                                                                                                                                        3229
vacant single occupancy residence (houses,townhouses,duplexes, etc.)                                                                                       187
residence or house                                                                                                                                          73
Name: premise, dtype: int64

In [29]:
change_premise(pat,df,'residence')

In [30]:
df.premise.value_counts()[:10]

road_street_sidewalk             14608
residence                         7094
apartment_parking                 6248
apartment                         6159
other parking lot                 4891
unk                               3598
commercial parking lot/garage     2541
hospital                          2162
other/unknown                     1967
driveway                          1857
Name: premise, dtype: int64

## commercial parking

In [31]:
word1 = 'commercial'
word2 = 'parking'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

commercial parking lot/garage                                                                                                                             2541
commercial parking lot or garage                                                                                                                          1794
commercial parking lot or garage                                                                                                                            86
Name: premise, dtype: int64

In [32]:
change_premise(pat,df,'commercial_parking')

In [33]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
other/unknown            1967
driveway                 1857
Name: premise, dtype: int64

## restaurant parking

In [34]:
word1 = 'restaurant'
word2 = 'parking'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

restaurant/cafeteria parking lot                                                                                                                          1077
restaurant or cafeteria parking lot                                                                                                                       1060
restaurant or cafeteria parking lot                                                                                                                         21
Name: premise, dtype: int64

In [35]:
change_premise(pat,df,'restaurant_parking')

In [36]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [39]:
word1 = 'club'
word2 = 'parking'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

bar/night club parking lot                                                                                                                                690
bar or night club parking lot                                                                                                                             559
bar or night club parking lot                                                                                                                              13
Name: premise, dtype: int64

In [40]:
change_premise(pat,df,'bar_club_parking')

In [41]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [42]:
word1 = 'club'
word2 = 'bar'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

bar/night club                                                                                                                                            1036
bar or night club                                                                                                                                          707
bar or night club                                                                                                                                           18
Name: premise, dtype: int64

In [43]:
change_premise(pat,df,'bar_club')

In [44]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [47]:
word1 = 'grocery'
word2 = 'parking'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

grocery/supermarket parking lot                                                                                                                           226
grocery store or supermarket parking lot                                                                                                                  208
grocery store or supermarket parking lot                                                                                                                    4
Name: premise, dtype: int64

In [48]:
change_premise(pat,df,'supermarket_parking')

In [49]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [50]:
word1 = 'grocery'
word2 = 'supermarket'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

grocery/supermarket                                                                                                                                       954
grocery store or supermarket                                                                                                                              515
vacant grocery store or supermarket                                                                                                                        10
grocery store or supermarket                                                                                                                                6
vacant grocery/supermarket                                                                                                                                  1
Name: premise, dtype: int64

In [51]:
change_premise(pat,df,'supermarket')

In [52]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [53]:
word1 = 'department'
word2 = 'discount'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

department/discount store                                                                                                                                 1027
department or discount store                                                                                                                               336
department or discount store                                                                                                                                10
Name: premise, dtype: int64

In [54]:
change_premise(pat,df,'department_store')

In [55]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [56]:
word1 = 'hotel'
word2 = 'parking'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

hotel/motel parking lot                                                                                                                                   682
hotel or motel parking lot                                                                                                                                528
hotel or motel parking lot                                                                                                                                 25
Name: premise, dtype: int64

In [57]:
change_premise(pat,df,'hotel_motel_parking')

In [58]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [59]:
word1 = 'hotel'
word2 = 'motel'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

hotel/motel/etc.                                                                                                                                          623
hotel, motel, inn, etc.                                                                                                                                   562
hotel, motel, inn, etc.                                                                                                                                    14
vacant hotel/motel/etc.                                                                                                                                     6
vacant hotel, motel, etc.                                                                                                                                   2
Name: premise, dtype: int64

In [60]:
change_premise(pat,df,'hotel_motel')

In [61]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [63]:
word1 = 'restaurant'
word2 = 'cafeteria'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

restaurant/cafeteria                                                                                                                                      781
restaurant or cafeteria                                                                                                                                   623
restaurant or cafeteria                                                                                                                                    13
Name: premise, dtype: int64

In [64]:
change_premise(pat,df,'restaurant')

In [65]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [66]:
word1 = 'gas'
word2 = 'station'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

service or gas station                                                                                                                                    763
service/gas station                                                                                                                                       556
service or gas station                                                                                                                                     15
Name: premise, dtype: int64

In [67]:
change_premise(pat,df,'gas_station')

In [68]:
df.premise.value_counts()[:10]

road_street_sidewalk    14608
residence                7094
apartment_parking        6248
apartment                6159
other parking lot        4891
commercial_parking       4421
unk                      3598
hospital                 2162
restaurant_parking       2158
other/unknown            1967
Name: premise, dtype: int64

In [69]:
word1 = 'hospital'
word2 = 'parking'

pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

hospital parking lot                                                                                                                                      545
hospital parking lot                                                                                                                                        7
Name: premise, dtype: int64

In [70]:
change_premise(pat,df,'hospital_parking')

In [72]:
df.premise.value_counts()[10:20]

driveway               1857
bar_club               1761
supermarket            1486
restaurant             1417
department_store       1373
gas_station            1334
bar_club_parking       1262
hotel_motel_parking    1235
hotel_motel            1207
convenience store       911
Name: premise, dtype: int64

In [74]:
word1 = 'convenience'
word2 = 'store'
word3 = 'parking'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2,word3)
clean_premise(pat,df)

convenience store parking lot                                                                                                                             568
convenience store parking lot                                                                                                                               3
Name: premise, dtype: int64

In [75]:
change_premise(pat,df,'convenience_store_parking')

In [76]:
word1 = 'convenience'
word2 = 'store'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

convenience store                                                                                                                                         911
convenience store                                                                                                                                           7
Name: premise, dtype: int64

In [77]:
change_premise(pat,df,'convenience_store')

In [79]:
df.premise.value_counts()[20:30]

office building                             827
commercial building                         789
other, unknown, or not listed               778
miscellaneous business (non-specific)       680
strip business center parking lot           632
convenience_store_parking                   571
hospital_parking                            552
construction site                           532
parks and recreation, zoo, swimming pool    514
parks & recreation, zoo, swim pool          504
Name: premise, dtype: int64

In [81]:
word1 = 'building'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

office building                                                                                                                                           827
commercial building                                                                                                                                       789
government/public building                                                                                                                                124
vacant building (commercial)                                                                                                                              121
government or public building                                                                                                                              96
maintenance/building services                                                                                                                              18
maintenance or building services                    

In [82]:
change_premise(pat,df,'building')

In [83]:
df.premise.value_counts()[20:30]

convenience_store                           918
other, unknown, or not listed               778
miscellaneous business (non-specific)       680
strip business center parking lot           632
convenience_store_parking                   571
hospital_parking                            552
construction site                           532
parks and recreation, zoo, swimming pool    514
parks & recreation, zoo, swim pool          504
condominium                                 478
Name: premise, dtype: int64

In [87]:
word1 = 'construction'
word2 = 'site'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

construction site                                                                                                                                         532
construction site                                                                                                                                          12
Name: premise, dtype: int64

In [88]:
change_premise(pat,df,'construction_site')

In [89]:
df.premise.value_counts()[20:30]

convenience_store                           918
other, unknown, or not listed               778
miscellaneous business (non-specific)       680
strip business center parking lot           632
convenience_store_parking                   571
hospital_parking                            552
construction_site                           544
parks and recreation, zoo, swimming pool    514
parks & recreation, zoo, swim pool          504
condominium                                 478
Name: premise, dtype: int64

In [90]:
word1 = 'bus'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

bus station                                                                                                                                               427
bus stop                                                                                                                                                  425
bus stop                                                                                                                                                    7
bus station                                                                                                                                                 4
Name: premise, dtype: int64

In [91]:
change_premise(pat,df,'bus_stop_station')

In [92]:
df.premise.value_counts()[20:30]

convenience_store                           918
bus_stop_station                            863
other, unknown, or not listed               778
miscellaneous business (non-specific)       680
strip business center parking lot           632
convenience_store_parking                   571
hospital_parking                            552
construction_site                           544
parks and recreation, zoo, swimming pool    514
parks & recreation, zoo, swim pool          504
Name: premise, dtype: int64

In [93]:
word1 = 'parks'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

parks and recreation, zoo, swimming pool                                                                                                                  514
parks & recreation, zoo, swim pool                                                                                                                        504
parks and recreation, zoo, swimming pool                                                                                                                   25
Name: premise, dtype: int64

In [94]:
change_premise(pat,df,'park_rec_pool')

In [95]:
df.premise.value_counts()[20:30]

park_rec_pool                            1043
convenience_store                         918
bus_stop_station                          863
other, unknown, or not listed             778
miscellaneous business (non-specific)     680
strip business center parking lot         632
convenience_store_parking                 571
hospital_parking                          552
construction_site                         544
condominium                               478
Name: premise, dtype: int64

In [98]:
word1 = 'sexually'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

sexually oriented club                    28
sexually oriented business parking lot    24
Name: premise, dtype: int64

In [99]:
change_premise(pat,df,'sexually_oriented_business')

In [101]:
word1 = 'business'
word2 = 'parking'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

strip business center parking lot                                                                                                                         632
strip business center parking lot                                                                                                                           6
Name: premise, dtype: int64

In [102]:
change_premise(pat,df,'business_center_parking')

In [103]:
word1 = 'business'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

miscellaneous business (non-specific)                                                                                                                     680
misc. business (non-specific)                                                                                                                             349
miscellaneous business (non-specific)                                                                                                                      22
Name: premise, dtype: int64

In [104]:
change_premise(pat,df,'business')

In [106]:
df.premise.value_counts()[30:40]

garage or carport                            445
supermarket_parking                          438
multi-plex hme(duplex,triplex etc)           422
specialty store (non-specific)               391
stadium, sports arena, race track            353
stadium/sprts arena/race track               328
garage/carport                               322
drug store/medical supply                    316
clothing store                               287
vehicle/auto sales/lease/auto parts store    255
Name: premise, dtype: int64

In [108]:
word1 = 'stadium'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

stadium, sports arena, race track                                                                                                                         353
stadium/sprts arena/race track                                                                                                                            328
stadium, sports arena, race track                                                                                                                          12
Name: premise, dtype: int64

In [109]:
change_premise(pat,df,'stadium_arena_track')

In [110]:
df.premise.value_counts()[30:40]

condominium                                  478
garage or carport                            445
supermarket_parking                          438
multi-plex hme(duplex,triplex etc)           422
specialty store (non-specific)               391
garage/carport                               322
drug store/medical supply                    316
clothing store                               287
vehicle/auto sales/lease/auto parts store    255
libraries, museums                           215
Name: premise, dtype: int64

In [111]:
word1 = 'drug'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

drug store/medical supply                                                                                                                                 316
drug store or medical supply                                                                                                                              162
drug store or medical supply                                                                                                                                3
Name: premise, dtype: int64

In [112]:
change_premise(pat,df,'drug_store')

In [113]:
df.premise.value_counts()[30:40]

drug_store                                   481
condominium                                  478
garage or carport                            445
supermarket_parking                          438
multi-plex hme(duplex,triplex etc)           422
specialty store (non-specific)               391
garage/carport                               322
clothing store                               287
vehicle/auto sales/lease/auto parts store    255
libraries, museums                           215
Name: premise, dtype: int64

In [116]:
word1 = 'liquor'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

liquor store                                                                                                                                              67
liquor store parking lot                                                                                                                                  24
liquor store                                                                                                                                               3
liquor store parking lot                                                                                                                                   2
Name: premise, dtype: int64

In [117]:
change_premise(pat,df,'liquor_store')

In [118]:
df.premise.value_counts()[30:40]

drug_store                                   481
condominium                                  478
garage or carport                            445
supermarket_parking                          438
multi-plex hme(duplex,triplex etc)           422
specialty store (non-specific)               391
garage/carport                               322
clothing store                               287
vehicle/auto sales/lease/auto parts store    255
libraries, museums                           215
Name: premise, dtype: int64

In [121]:
word1 = 'auto'
word2 = 'repair'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

auto repair                                                                                                                                               115
auto repair                                                                                                                                                 1
Name: premise, dtype: int64

In [122]:
change_premise(pat,df,'auto_repair')

In [123]:
word1 = 'auto'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

vehicle/auto sales/lease/auto parts store                                                                                                                 255
vehicle/auto sales/lease/auto parts store                                                                                                                   2
Name: premise, dtype: int64

In [124]:
change_premise(pat,df,'auto_sale_parts_store')

In [125]:
df.premise.value_counts()[30:40]

drug_store                            481
condominium                           478
garage or carport                     445
supermarket_parking                   438
multi-plex hme(duplex,triplex etc)    422
specialty store (non-specific)        391
garage/carport                        322
clothing store                        287
auto_sale_parts_store                 257
libraries, museums                    215
Name: premise, dtype: int64

In [126]:
word1 = 'school'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

high school                                                                                                                                               141
elementary school                                                                                                                                          50
private school                                                                                                                                             36
middle school                                                                                                                                              23
commercial or training school                                                                                                                              20
vacant school or college/university                                                                                                                        17
vacant school/college                               

In [127]:
change_premise(pat,df,'school')

In [129]:
word1 = 'libraries'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

libraries, museums                                                                                                                                        215
libraries, museums                                                                                                                                          3
Name: premise, dtype: int64

In [130]:
change_premise(pat,df,'libraries_museums')

In [132]:
df.premise.value_counts()[40:50]

libraries_museums                             218
church/synagogue/temple                       198
vacant single occ resd(house,townhs,dplex)    185
rental storage facility                       176
warehouse                                     171
gym,recreat,club hse,indr pool,spa            164
university/college                            146
gym, recreat, club house, indoor pool, spa    140
convention center/exhibit halls               136
church/synagogue/temple parking lot           130
Name: premise, dtype: int64

In [134]:
word1 = 'church'
word2 = 'parking'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

church/synagogue/temple parking lot         130
church, synagogue, or temple parking lot    106
Name: premise, dtype: int64

In [135]:
change_premise(pat,df,'church_temple_parking')

In [136]:
word1 = 'church'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

church/synagogue/temple                                                                                                                                   198
church, synagogue, or temple                                                                                                                               85
vacant church/synagogue/temple                                                                                                                              5
vacant church, synagogue, or temple                                                                                                                         4
church, synagogue, or temple                                                                                                                                2
Name: premise, dtype: int64

In [137]:
change_premise(pat,df,'church_temple')

In [138]:
df.premise.value_counts()[40:50]

auto_sale_parts_store                         257
church_temple_parking                         236
libraries_museums                             218
vacant single occ resd(house,townhs,dplex)    185
rental storage facility                       176
warehouse                                     171
gym,recreat,club hse,indr pool,spa            164
university/college                            146
gym, recreat, club house, indoor pool, spa    140
convention center/exhibit halls               136
Name: premise, dtype: int64

In [139]:
word1 = 'gym'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

gym,recreat,club hse,indr pool,spa                                                                                                                        164
gym, recreat, club house, indoor pool, spa                                                                                                                140
gym, recreat, club house, indoor pool                                                                                                                      41
gym, recreat, club house, indoor pool                                                                                                                       6
Name: premise, dtype: int64

In [140]:
change_premise(pat,df,'gym_club_house')

In [141]:
df.premise.value_counts()[40:50]

clothing store                                287
auto_sale_parts_store                         257
church_temple_parking                         236
libraries_museums                             218
vacant single occ resd(house,townhs,dplex)    185
rental storage facility                       176
warehouse                                     171
university/college                            146
convention center/exhibit halls               136
convention center or exhibit halls            128
Name: premise, dtype: int64

In [142]:
word1 = 'storage'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

rental storage facility                                                                                                                                   176
vacant storage fac (barn,garage,warehouse)                                                                                                                 12
vacant storage facility (barns,garages,warehouses,etc.)                                                                                                     4
rental storage facility                                                                                                                                     4
vacant storage fac (barn,garage,warehouse)                                                                                                                  1
Name: premise, dtype: int64

In [143]:
change_premise(pat,df,'storage_facility')

In [144]:
df.premise.value_counts()[40:50]

clothing store                                287
auto_sale_parts_store                         257
church_temple_parking                         236
libraries_museums                             218
storage_facility                              197
vacant single occ resd(house,townhs,dplex)    185
warehouse                                     171
university/college                            146
convention center/exhibit halls               136
convention center or exhibit halls            128
Name: premise, dtype: int64

In [145]:
word1 = 'convention'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

convention center/exhibit halls                                                                                                                           136
convention center or exhibit halls                                                                                                                        128
convention center or exhibit halls                                                                                                                          8
Name: premise, dtype: int64

In [146]:
change_premise(pat,df,'convention_center')

In [148]:
df.premise.value_counts()[50:60]

auto_repair                              116
multi-plex home (duplex,triplex etc.)    113
highway/freeway                          112
bank                                     112
light rail (metro rail) vehicle          111
physician's office                       107
barber and beauty shops                  100
liquor_store                              96
field, woods, forest, park                87
bank/saving institution parking lot       77
Name: premise, dtype: int64

In [150]:
word1 = 'bank'
word2 = 'parking'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

bank/saving institution parking lot                                                                                                                       77
bank or savings institution parking lot                                                                                                                   42
bank or savings institution parking lot                                                                                                                    2
Name: premise, dtype: int64

In [151]:
change_premise(pat,df,'bank_parking')

In [152]:
word1 = 'bank'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

bank                                                                                                                                                      112
bank                                                                                                                                                        2
Name: premise, dtype: int64

In [153]:
change_premise(pat,df,'bank')

In [154]:
df.premise.value_counts()[50:60]

bank_parking                             121
auto_repair                              116
bank                                     114
multi-plex home (duplex,triplex etc.)    113
highway/freeway                          112
light rail (metro rail) vehicle          111
physician's office                       107
barber and beauty shops                  100
liquor_store                              96
field, woods, forest, park                87
Name: premise, dtype: int64

In [157]:
word1 = 'rail'
word2 = 'vehicle'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

light rail (metro rail) vehicle                                                                                                                           111
light rail (metro rail) vehicle                                                                                                                             6
light rail vehicle                                                                                                                                          6
Name: premise, dtype: int64

In [158]:
change_premise(pat,df,'rail_vehicle')

In [159]:
word1 = 'rail'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

light rail platform                                                                                                                                       125
light rail platform                                                                                                                                         4
Name: premise, dtype: int64

In [160]:
change_premise(pat,df,'rail_platform')

In [162]:
df.premise.value_counts()[:60]

road_street_sidewalk                          14608
residence                                      7094
apartment_parking                              6248
apartment                                      6159
other parking lot                              4891
commercial_parking                             4421
unk                                            3598
hospital                                       2162
restaurant_parking                             2158
building                                       2017
other/unknown                                  1967
driveway                                       1857
bar_club                                       1761
supermarket                                    1486
restaurant                                     1417
department_store                               1373
gas_station                                    1334
bar_club_parking                               1262
hotel_motel_parking                            1235
hotel_motel 

In [165]:
word1 = 'unknown'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

other/unknown                                                                                                                                             1967
other, unknown, or not listed                                                                                                                              778
other, unknown, or not listed                                                                                                                               22
Name: premise, dtype: int64

In [166]:
change_premise(pat,df,'unknown')

In [167]:
word1 = 'unk'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

unk    3598
Name: premise, dtype: int64

In [168]:
change_premise(pat,df,'unknown')

In [169]:
df.premise.value_counts()[:60]

road_street_sidewalk                          14608
residence                                      7094
unknown                                        6365
apartment_parking                              6248
apartment                                      6159
other parking lot                              4891
commercial_parking                             4421
hospital                                       2162
restaurant_parking                             2158
building                                       2017
driveway                                       1857
bar_club                                       1761
supermarket                                    1486
restaurant                                     1417
department_store                               1373
gas_station                                    1334
bar_club_parking                               1262
hotel_motel_parking                            1235
hotel_motel                                    1207
business    

In [170]:
word1 = 'parking'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

other parking lot                                                                                                                                         4891
other parking lot                                                                                                                                           59
mall parking lot                                                                                                                                            47
laundry or dry cleaners parking lot                                                                                                                         17
laundry/dry cleaners parking lot                                                                                                                            10
mall parking lot                                                                                                                                             1
Name: premise, dtype: int64

In [171]:
change_premise(pat,df,'other_parking')

In [172]:
df.premise.value_counts()[:60]

road_street_sidewalk                          14608
residence                                      7094
unknown                                        6365
apartment_parking                              6248
apartment                                      6159
other_parking                                  5025
commercial_parking                             4421
hospital                                       2162
restaurant_parking                             2158
building                                       2017
driveway                                       1857
bar_club                                       1761
supermarket                                    1486
restaurant                                     1417
department_store                               1373
gas_station                                    1334
bar_club_parking                               1262
hotel_motel_parking                            1235
hotel_motel                                    1207
business    

In [174]:
word1 = 'garage'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

garage or carport                                                                                                                                         445
garage/carport                                                                                                                                            322
garage or carport                                                                                                                                          17
Name: premise, dtype: int64

In [175]:
change_premise(pat,df,'garage_carport')

In [176]:
df.premise.value_counts()[:60]

road_street_sidewalk                          14608
residence                                      7094
unknown                                        6365
apartment_parking                              6248
apartment                                      6159
other_parking                                  5025
commercial_parking                             4421
hospital                                       2162
restaurant_parking                             2158
building                                       2017
driveway                                       1857
bar_club                                       1761
supermarket                                    1486
restaurant                                     1417
department_store                               1373
gas_station                                    1334
bar_club_parking                               1262
hotel_motel_parking                            1235
hotel_motel                                    1207
business    

In [178]:
word1 = 'condominium'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

condominium                                                                                                                                               478
condominium                                                                                                                                                 2
Name: premise, dtype: int64

In [179]:
change_premise(pat,df,'apartment')

In [180]:
df.premise.value_counts()[:60]

road_street_sidewalk                          14608
residence                                      7094
apartment                                      6639
unknown                                        6365
apartment_parking                              6248
other_parking                                  5025
commercial_parking                             4421
hospital                                       2162
restaurant_parking                             2158
building                                       2017
driveway                                       1857
bar_club                                       1761
supermarket                                    1486
restaurant                                     1417
department_store                               1373
gas_station                                    1334
bar_club_parking                               1262
hotel_motel_parking                            1235
hotel_motel                                    1207
business    

In [191]:
word1 = 'adult'
word2 = 'store'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

adult book store/newsstand       1
adult book store or newsstand    1
Name: premise, dtype: int64

In [192]:
change_premise(pat,df,'sexually_oriented_business')

In [195]:
word1 = 'specialty'
word2 = 'store'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

specialty store (non-specific)                                                                                                                            391
specialty store (non-specific)                                                                                                                              2
Name: premise, dtype: int64

In [196]:
change_premise(pat,df,'specialty_store')

In [198]:
word1 = 'clothing'
word2 = 'store'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

clothing store                                                                                                                                            287
clothing store                                                                                                                                              3
Name: premise, dtype: int64

In [199]:
change_premise(pat,df,'clothing_store')

In [200]:
word1 = 'electronics'
word2 = 'store'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

electronics store, electrical sup.        55
electronics store, electrical supplies    33
Name: premise, dtype: int64

In [201]:
change_premise(pat,df,'electronic_store')

In [202]:
word1 = 'store'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

furniture, appliances, radios, tv store    5
Name: premise, dtype: int64

In [203]:
change_premise(pat,df,'electronic_store')

In [204]:
df.premise.value_counts()[:60]

road_street_sidewalk                          14608
residence                                      7094
apartment                                      6639
unknown                                        6365
apartment_parking                              6248
other_parking                                  5025
commercial_parking                             4421
hospital                                       2162
restaurant_parking                             2158
building                                       2017
driveway                                       1857
bar_club                                       1761
supermarket                                    1486
restaurant                                     1417
department_store                               1373
gas_station                                    1334
bar_club_parking                               1262
hotel_motel_parking                            1235
hotel_motel                                    1207
business    

In [205]:
word1 = 'plex'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

multi-plex hme(duplex,triplex etc)                                                                                                                        422
multi-plex home (duplex,triplex etc.)                                                                                                                     113
multi-plex home (duplex,triplex etc.)                                                                                                                       1
Name: premise, dtype: int64

In [206]:
change_premise(pat,df,'multiplex_home')

In [207]:
df.premise.value_counts()[:60]

road_street_sidewalk                          14608
residence                                      7094
apartment                                      6639
unknown                                        6365
apartment_parking                              6248
other_parking                                  5025
commercial_parking                             4421
hospital                                       2162
restaurant_parking                             2158
building                                       2017
driveway                                       1857
bar_club                                       1761
supermarket                                    1486
restaurant                                     1417
department_store                               1373
gas_station                                    1334
bar_club_parking                               1262
hotel_motel_parking                            1235
hotel_motel                                    1207
business    

In [208]:
word1 = 'vacant'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

vacant single occ resd(house,townhs,dplex)                                                                                                                185
vacant other residential (apartment,dorms)                                                                                                                 46
vacant other residential (apartment,inn,dorms,boarding house)                                                                                              30
vacant other structure (out buildings,monuments,buildings under construction,etc.)                                                                         23
vacant restaurant                                                                                                                                          21
vacant other out build/monument/underconst                                                                                                                 20
vacant hospital                                     

In [209]:
change_premise(pat,df,'vacant_structure')

In [210]:
df.premise.value_counts()[:60]

road_street_sidewalk                   14608
residence                               7094
apartment                               6639
unknown                                 6365
apartment_parking                       6248
other_parking                           5025
commercial_parking                      4421
hospital                                2162
restaurant_parking                      2158
building                                2017
driveway                                1857
bar_club                                1761
supermarket                             1486
restaurant                              1417
department_store                        1373
gas_station                             1334
bar_club_parking                        1262
hotel_motel_parking                     1235
hotel_motel                             1207
business                                1051
park_rec_pool                           1043
convenience_store                        918
bus_stop_s

In [211]:
word1 = 'warehouse'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

warehouse                                                                                                                                                 171
warehouse                                                                                                                                                   1
Name: premise, dtype: int64

In [212]:
change_premise(pat,df,'warehouse')

In [213]:
word1 = 'college'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

university/college                                                                                                                                        146
university or college                                                                                                                                      75
university or college                                                                                                                                       1
Name: premise, dtype: int64

In [214]:
change_premise(pat,df,'college')

In [215]:
word1 = 'highway'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

highway/freeway                                                                                                                                           112
highway or freeway                                                                                                                                         56
highway or freeway                                                                                                                                          3
Name: premise, dtype: int64

In [216]:
change_premise(pat,df,'highway_freeway')

In [218]:
word1 = 'rental'
word2 = 'office'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

apartment/rental office                                                                                                                                   64
apartment rental office                                                                                                                                   61
apartment rental office                                                                                                                                    2
Name: premise, dtype: int64

In [219]:
change_premise(pat,df,'rental_office')

In [221]:
word1 = 'doctor'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

physician, doctor, dentist's office                                                                                                                       77
physician, doctor, dentist's office                                                                                                                        3
Name: premise, dtype: int64

In [222]:
change_premise(pat,df,'doctor_office')

In [228]:
word1 = 'office'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

physician's office    107
Name: premise, dtype: int64

In [226]:
word1 = 'book'
word2 = 'office'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

book,record,stationary,office sup.           13
book, record, stationary, office supplies     3
Name: premise, dtype: int64

In [227]:
change_premise(pat,df,'office_supplies')

In [229]:
word1 = 'physician'
word2 = 'office'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

physician's office    107
Name: premise, dtype: int64

In [230]:
change_premise(pat,df,'doctor_office')

In [235]:
df.premise.value_counts()[60:]

laundry/dry cleaners/washaterias                                                                                                                          66
alley                                                                                                                                                     65
apartment                                                                                                                                                 64
freeway service road                                                                                                                                      60
amusement park, bowling alley, skate rink                                                                                                                 57
sexually_oriented_business                                                                                                                                54
high rise                                                 

In [232]:
word1 = 'field'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

field, woods, forest, park                                                                                                                                87
field/woods                                                                                                                                               77
field, woods, forest, park                                                                                                                                 3
Name: premise, dtype: int64

In [233]:
change_premise(pat,df,'field_woods_park')

In [234]:
df.premise.value_counts()[:60]

road_street_sidewalk                14608
residence                            7094
apartment                            6639
unknown                              6365
apartment_parking                    6248
other_parking                        5025
commercial_parking                   4421
hospital                             2162
restaurant_parking                   2158
building                             2017
driveway                             1857
bar_club                             1761
supermarket                          1486
restaurant                           1417
department_store                     1373
gas_station                          1334
bar_club_parking                     1262
hotel_motel_parking                  1235
hotel_motel                          1207
business                             1051
park_rec_pool                        1043
convenience_store                     918
bus_stop_station                      863
garage_carport                    

In [236]:
word1 = 'cleaners'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

laundry/dry cleaners/washaterias                                                                                                                          66
laundry, dry cleaners, washaterias                                                                                                                        37
laundry, dry cleaners, washaterias                                                                                                                         2
Name: premise, dtype: int64

In [237]:
change_premise(pat,df,'dry_cleaners')

In [241]:
word1 = 'park'
word2 = 'ride'
pat = r"^(?=.*\b{}\b)(?=.*\b{}\b).*$".format(word1,word2)
clean_premise(pat,df)

park and ride terminal                                                                                                                                    77
park & ride terminal                                                                                                                                      25
park and ride terminal                                                                                                                                     6
Name: premise, dtype: int64

In [242]:
change_premise(pat,df,'park_ride')

In [244]:
word1 = 'police'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

police station    76
Name: premise, dtype: int64

In [245]:
change_premise(pat,df,'police_station')

In [246]:
word1 = 'credit'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

credit union    7
Name: premise, dtype: int64

In [247]:
change_premise(pat,df,'bank')

In [248]:
word1 = 'package'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

package facility (fedex,ups,dhl)    5
Name: premise, dtype: int64

In [249]:
change_premise(pat,df,'package_facility')

In [250]:
word1 = 'barber'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

barber and beauty shops                                                                                                                                   100
barber and beauty shops                                                                                                                                     1
Name: premise, dtype: int64

In [251]:
change_premise(pat,df,'barber_shop')

In [252]:
word1 = 'pool'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

pool hall/game room       9
pool hall or game room    3
Name: premise, dtype: int64

In [253]:
change_premise(pat,df,'pool_hall')

In [255]:
word1 = 'video'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

video rental & sales    3
Name: premise, dtype: int64

In [256]:
change_premise(pat,df,'video_rental')

In [257]:
word1 = 'care'
pat = r"^(?=.*\b{}\b).*$".format(word1)
clean_premise(pat,df)

daycare/child care/kindergarten                                                                                                                           34
daycare, child care, or kindergarten                                                                                                                      20
daycare, child care, or kindergarten                                                                                                                       2
Name: premise, dtype: int64

In [258]:
change_premise(pat,df,'daycare')

In [None]:
apartment                                                                                                                                                 

In [260]:
df.premise.value_counts()[60:]

social services/public charities                                                                                                                          71
alley                                                                                                                                                     65
apartment                                                                                                                                                 64
freeway service road                                                                                                                                      60
amusement park, bowling alley, skate rink                                                                                                                 57
daycare                                                                                                                                                   56
sexually_oriented_business                                