In [2]:
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tabulate import tabulate
import yaml

%matplotlib inline

In [23]:
#values for csv importing
csv_opts = {'sep': '|',
           'quotechar': '"',
           #'compression': 'gzip',
           'encoding': 'utf-8'}

In [24]:
with open('../hand/arrest_dtypes.yaml', 'r') as yamlfile:
        arrest_dtypes = yaml.load(yamlfile, Loader=yaml.FullLoader)

In [33]:
# reading in CSV file
arrests = pd.read_csv('../input/arrests.csv', **csv_opts, dtype=arrest_dtypes)

In [37]:
# putting event date into m/d/y format
arrests['apprehension_date'] = pd.to_datetime(
    arrests['apprehension_date'], format='%m/%d/%Y')

# Landmark non-specific

In [215]:
# add a column to arrests dataset that contains a binary indicator 
# of whether the landmark includes the term 'non-specific'
arrests['non-specific'] = arrests[
    'apprehension_landmark'].str.contains(
    'non-specific', case=False, na=False).astype(int)

In [216]:
# create subset that only includes location and non-specific marker
dub = arrests[['aor','apprehension_landmark', 'non-specific']]

In [217]:
# filter for rows with non-specific included in the landmark and drop all duplicates
dub = dub.loc[dub['non-specific'] == 1]
dub = dub.drop_duplicates()
dub

Unnamed: 0,aor,apprehension_landmark,non-specific
1,CHI,"CHI GENERAL AREA, NON-SPECIFIC",1
12,NOL,"JAK GENERAL AREA, NON-SPECIFIC",1
18,CHI,STL GENERAL NON-SPECIFIC,1
26,SLC,"LVG GENERAL AREA, NON-SPECIFIC",1
28,SFR,"FRE GENERAL AREA, NON-SPECIFIC",1
...,...,...,...
538944,NEW,"MTG GENERAL AREA, NON-SPECIFIC",1
540446,,"CHM GENERAL AREA, NON-SPECIFIC",1
540946,HOU,"LBK GENERAL AREA, NON-SPECIFIC",1
541425,,"AMT GENERAL AREA, NON-SPECIFIC",1


In [218]:
# create another dataframe where the columns are the apprehension landmark split by spaces
fub = dub['apprehension_landmark'].str.split(' ', expand=True)
fub

Unnamed: 0,0,1,2,3,4,5
1,CHI,GENERAL,"AREA,",NON-SPECIFIC,,
12,JAK,GENERAL,"AREA,",NON-SPECIFIC,,
18,STL,GENERAL,NON-SPECIFIC,,,
26,LVG,GENERAL,"AREA,",NON-SPECIFIC,,
28,FRE,GENERAL,"AREA,",NON-SPECIFIC,,
...,...,...,...,...,...,...
538944,MTG,GENERAL,"AREA,",NON-SPECIFIC,,
540446,CHM,GENERAL,"AREA,",NON-SPECIFIC,,
540946,LBK,GENERAL,"AREA,",NON-SPECIFIC,,
541425,AMT,GENERAL,"AREA,",NON-SPECIFIC,,


In [219]:
# combine the aor and first character string of the landmark together with a space in between
results = dub['aor'].astype(str) + ' ' + fub[0].astype(str)
results

1         CHI CHI
12        NOL JAK
18        CHI STL
26        SLC LVG
28        SFR FRE
           ...   
538944    NEW MTG
540446    nan CHM
540946    HOU LBK
541425    nan AMT
542013    ATL HBG
Length: 624, dtype: object