In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
%matplotlib inline

In [None]:
from bokeh.io import output_notebook, show
from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)

In [None]:
!ls data/

In [None]:
incident_data = pd.read_csv("data/_97bd1c1e5df9537bb13398c9898deed7_detroit-blight-violations.csv")
incident_data.head()

In [None]:
incident_data.columns

In [None]:
incident_data.shape

In [None]:
incident_data.iloc[0, :]

In [None]:
incident_data.iloc[:10, :].ViolationAddress

In [None]:
inci_df = incident_data.copy()
geo_pat = re.compile('\((.+), (.+)\)')
inci_df['lat'] = inci_df.ViolationAddress.apply(lambda x: float(re.search(geo_pat, x).group(1)))
inci_df['lon'] = inci_df.ViolationAddress.apply(lambda x: float(re.search(geo_pat, x).group(2)))

In [None]:
inci_df['geo'] = inci_df.ViolationAddress.apply(lambda x: re.search('\((.+)\)', x).group(1))

In [None]:
inci_df.geo.describe()

In [None]:
inci_df.groupby('geo').count()

In [None]:
inci_df[['lat', 'lon']].describe()

In [None]:
inci_df.lat.iloc[:8]

In [None]:
center_lat = inci_df.lat.mean()
center_lon = inci_df.lon.mean()
map_options = GMapOptions(lat=center_lat, lng=center_lon, map_type="roadmap", zoom=11)

plot = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options, 
)

source = ColumnDataSource(
    data=dict(
        lat=inci_df.lat.iloc[:500],
        lon=inci_df.lon.iloc[:500],
    )
)

circle = Circle(x="lon", y="lat", size=5, fill_color="blue", fill_alpha=0.4, line_color=None)
plot.add_glyph(source, circle)
plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())


output_notebook()
show(plot)

In [None]:
#cluster based on lat and lon
from sklearn.cluster import DBSCAN 
from geopy.distance import distance

In [None]:
distance((42.363182, -83.091677), (42.429391, -83.220394))

In [None]:
model = DBSCAN(min_samples=1)

In [None]:
model.fit(inci_df[['lat', 'lon']])

In [None]:
model

In [None]:
inci_df[['lat', 'lon']].head()