In [67]:
import pandas as pd 
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import folium
from geojson import Feature, FeatureCollection, Point
import json
from scipy.spatial import ConvexHull, convex_hull_plot_2d

In [12]:
# https://alysivji.github.io/getting-started-with-folium.html
# https://opendata.dc.gov/datasets/294e062cdf2c48d5b9cbc374d9709bc0_2/data

In [13]:
# save them to csvs
bikeData = pd.read_csv("data/bikes.csv")
bikePedData = pd.read_csv("data/bikePedData.csv")
bikeVehData = pd.read_csv("data/bikeVehData.csv")
streets = pd.read_csv("data/streets.csv")

In [14]:
bikeData.head()

Unnamed: 0.1,Unnamed: 0,TOTAL_BICYCLES,TOTAL_VEHICLES,TOTAL_PEDESTRIANS,LATITUDE,LONGITUDE,XCOORD,YCOORD,FATAL_BICYCLIST,MPDLATITUDE,MPDLONGITUDE,FROMDATE,NEARESTINTSTREETNAME,NEARESTINTROUTEID,OFFINTERSECTION
0,15,1,1,0,38.97147,-77.024478,397878.852,144839.354,0,38.972236,-77.023671,2018-09-20 04:00:00+00:00,8TH ST NW,11000802,99.39388
1,60,1,1,0,38.90939,-77.042716,396295.124,137948.498,1,38.909894,-77.044153,2018-09-21 04:00:00+00:00,MASSACHUSETTS AVE NW,11059602,1.379358
2,127,1,1,0,38.899698,-77.009065,399213.652,136871.758,0,38.90033,-77.00914,2019-11-15 05:00:00+00:00,NORTH CAPITOL ST BN & H ST NW,11042442,15.1
3,198,1,1,0,38.907249,-77.043423,396233.703,137710.247,0,38.907249,-77.043423,2017-04-16 04:00:00+00:00,19TH ST NW,11001902,1.968938
4,307,1,1,0,38.916884,-77.026942,397655.202,138779.928,0,38.916884,-77.026942,2017-04-02 04:00:00+00:00,U ST NW,11087232,15.208073


In [161]:
colorsList = ['red', 'blue', 'black', 'purple', 'orange', 'pink', 'green']

def getColor(ind):
    colorLen = len(colorsList)
    colorNum = ind%colorLen
    color = colorsList[colorNum]
    return color

def runCluster(dataset, eps, minSamples):
    accPoints = np.empty((0, 2))   
    
    for lat,long in zip(dataset['LATITUDE'], dataset['LONGITUDE']):
        accPoints = np.append(accPoints, np.array([[lat,long]]), axis=0)

    # Compute DBSCAN
    db = DBSCAN(eps=eps, min_samples=minSamples).fit(accPoints)

    clusters = db.fit_predict(accPoints)
    dataset['CLUSTER'] = clusters
    
    
    mainStreetDict = {}
    secondaryStreetDict = {}
    clusters = dataset['CLUSTER'].unique()
    for i in clusters:
        clusterSet = dataset[dataset["CLUSTER"] == i]
        valueCounts = clusterSet['NEARESTINTSTREETNAME'].value_counts()
        mainStreetDict[i] = valueCounts.index[0]
        if (len(valueCounts.index) > 1):
            secondaryStreetDict[i] = valueCounts.index[1]
        else:
            secondaryStreetDict[i] = "None"

    mainStreets = []
    secondaryStreets = []
    for i in dataset["CLUSTER"]:
        mainStreets.append(mainStreetDict[i])
        secondaryStreets.append(secondaryStreetDict[i])
    
    dataset['CLUSTERPRIMARYSTREET'] = mainStreets
    dataset['CLUSTERSECONDARYSTREET'] = secondaryStreets
    return dataset

def makeHulls(dataset):
    features = []
    clusters = {}
    for lat, long, cluster in zip(dataset['LATITUDE'],dataset['LONGITUDE'],dataset['CLUSTER']):
        if (cluster != -1):
            if (cluster in clusters.keys()):
                thisCluster = clusters[cluster]
                thisCluster.append([long,lat])
                clusters[cluster] = thisCluster
            else:
                clusters[cluster] = [[long,lat]]

    hulls = {}
    for cluster in clusters.keys():
        hulls[cluster] = ConvexHull(clusters[cluster])
    return hulls

def makeGeoJson(hulls):
    shapes = {}

    for x in hulls.keys():
        outline = []
        for p in hulls[x].vertices:
            outline.append(list(hulls[x].points[p]))
        outline.append(list(hulls[x].points[hulls[x].vertices[0]]))
        shapes[x] = outline

    myGeoJson = {"type": "FeatureCollection","features":[]}        
    for shape in shapes.keys():
        thisFeature = {
            "type": "Feature",
            "properties": {
                "name": str(shape),
                "stroke": "#fc1717",
                "stroke-opacity": 1,
                "stroke-width": 2
             },
            "geometry" : {
                "type" : "Polygon",
                "coordinates" : [shapes[shape]]
            }
        }
        myGeoJson['features'].append(thisFeature)

    geo_str = json.dumps(myGeoJson)
    return geo_str

def makeMap(dataset):
    # plotting all bike accidents
    dcMap = folium.Map(location=[38.9072, -77.0369], zoom_start=13)

    featureGroupDict = {}
        
    counter = 0
    for lat, long, cluster, clusterPrimaryStreet, clusterSecondaryStreet in zip(dataset['LATITUDE'], dataset['LONGITUDE'],dataset["CLUSTER"],dataset["CLUSTERPRIMARYSTREET"],dataset["CLUSTERSECONDARYSTREET"]):
        if (cluster != -1):
            if (cluster not in featureGroupDict.keys()):
                featureGroupDict[cluster] = folium.FeatureGroup(name=(clusterPrimaryStreet+ " and " + clusterSecondaryStreet))
            folium.CircleMarker(location=[lat, long], radius = .5, color = getColor(cluster)).add_to(featureGroupDict[cluster])

        counter = counter + 1 
     
    for key in featureGroupDict.keys():
        dcMap.add_child(featureGroupDict[key])
        
    hulls = makeHulls(dataset)
    geoString = makeGeoJson(hulls)
    
    folium.GeoJson(
        geoString,
    ).add_to(dcMap)
    
    folium.map.LayerControl('topright', collapsed=False).add_to(dcMap)
        
    return dcMap

In [162]:
# bikeData
# worst 3 hubs runCluster(bikeData, 0.0015, 25)
# more hubs runCluster(bikeData, 0.0014, 15)
# tightly packed runCluster(bikeData, 0.0005, 7)

# bikePedData
# runCluster(bikePedData, 0.0030, 3)

# bikeVehData
# bikeVehData(bikeVehData, 0.0010, 10)

In [163]:
clusteredData = runCluster(bikeData, 0.0015, 25)
dcMap = makeMap(clusteredData)
dcMap

In [164]:
clusteredData = runCluster(bikeData, 0.0014, 15)
dcMap = makeMap(clusteredData)
dcMap

In [165]:
clusteredData = runCluster(bikeData, 0.0005, 7)
dcMap = makeMap(clusteredData)
dcMap

In [166]:
# bikePedData

# clusteredData = runCluster(bikePedData, 0.01, 1)
# dcMap = makeMap(clusteredData)
# dcMap

QhullError: QH6214 qhull input error: not enough points(1) to construct initial simplex (need 3)

While executing:  | qhull i Qt
Options selected for Qhull 2015.2.r 2016/01/18:
  run-id 1920392323  incidence  Qtriangulate  _pre-merge  _zero-centrum


In [167]:
# bike vehicle data

clusteredData = runCluster(bikeVehData, 0.0010, 10)
dcMap = makeMap(clusteredData)
dcMap