In [33]:
import pandas as pd 
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import folium
from geojson import Feature, FeatureCollection, Point
import json
from scipy.spatial import ConvexHull, convex_hull_plot_2d

In [8]:
# https://alysivji.github.io/getting-started-with-folium.html
# https://opendata.dc.gov/datasets/294e062cdf2c48d5b9cbc374d9709bc0_2/data

In [9]:
# save them to csvs
bikeData = pd.read_csv("data/bikes.csv")
bikePedData = pd.read_csv("data/bikePedData.csv")
bikeVehData = pd.read_csv("data/bikeVehData.csv")
streets = pd.read_csv("data/streets.csv")

In [111]:
print(list(bikeData))

['Unnamed: 0', 'TOTAL_BICYCLES', 'TOTAL_VEHICLES', 'TOTAL_PEDESTRIANS', 'LATITUDE', 'LONGITUDE', 'XCOORD', 'YCOORD', 'FATAL_BICYCLIST', 'MPDLATITUDE', 'MPDLONGITUDE', 'FROMDATE', 'NEARESTINTSTREETNAME', 'NEARESTINTROUTEID', 'OFFINTERSECTION', 'CLUSTER', 'CLUSTERPRIMARYSTREET', 'CLUSTERSECONDARYSTREET']


In [10]:
streets.FACILITY.value_counts()

Existing Bike Lane      908
Shared Lane             258
Cycle Track              88
Climbing Lane            62
Contraflow Bike Lane     45
                          5
Bus/Bike Lane             4
Name: FACILITY, dtype: int64

In [108]:
colorsList = ['red', 'blue', 'black', 'purple', 'orange', 'pink', 'green']

def getColor(ind):
    colorLen = len(colorsList)
    colorNum = ind%colorLen
    color = colorsList[colorNum]
    return color

def runCluster(dataset, eps, minSamples):
    accPoints = np.empty((0, 2))   
    
    for lat,long in zip(dataset['LATITUDE'], dataset['LONGITUDE']):
        accPoints = np.append(accPoints, np.array([[lat,long]]), axis=0)

    # Compute DBSCAN
    db = DBSCAN(eps=eps, min_samples=minSamples).fit(accPoints)

    clusters = db.fit_predict(accPoints)
    dataset['CLUSTER'] = clusters
    
    
    mainStreetDict = {}
    secondaryStreetDict = {}
    clusters = dataset['CLUSTER'].unique()
    for i in clusters:
        clusterSet = dataset[dataset["CLUSTER"] == i]
        valueCounts = clusterSet['NEARESTINTSTREETNAME'].value_counts()
        mainStreetDict[i] = valueCounts.index[0]
        if (len(valueCounts.index) > 1):
            secondaryStreetDict[i] = valueCounts.index[1]
        else:
            secondaryStreetDict[i] = "None"

    mainStreets = []
    secondaryStreets = []
    for i in dataset["CLUSTER"]:
        mainStreets.append(mainStreetDict[i])
        secondaryStreets.append(secondaryStreetDict[i])
    
    dataset['CLUSTERPRIMARYSTREET'] = mainStreets
    dataset['CLUSTERSECONDARYSTREET'] = secondaryStreets
    return dataset

def makeHulls(dataset):
    features = []
    clusters = {}
    primaryStreets = {}.
    for lat, long, cluster, primaryStreet in zip(dataset['LATITUDE'],dataset['LONGITUDE'],dataset['CLUSTER'],dataset['NEARESTINTROUTEID']):
        if (cluster != -1):
            if (cluster in clusters.keys()):
                thisCluster = clusters[cluster]
                thisCluster.append([long,lat])
                clusters[cluster] = thisCluster
            else:
                clusters[cluster] = [[long,lat]]
                primaryStreets[cluster] = primaryStreet

    hulls = {}
    for cluster in clusters.keys():
        hulls[cluster] = ConvexHull(clusters[cluster])
        
    return hulls, primaryStreets


def makeGeoJson(hulls, primaryStreets):
    shapes = {}

    for x in hulls.keys():
        outline = []
        for p in hulls[x].vertices:
            outline.append(list(hulls[x].points[p]))
        outline.append(list(hulls[x].points[hulls[x].vertices[0]]))
        shapes[x] = outline

    myGeoJson = {"type": "FeatureCollection","features":[]}        
    for shape in shapes.keys():
        primaryStreet = primaryStreets[shape]
        thisFeature = {
            "type": "Feature",
            "properties": {
                "name": str(shape),
                "primaryStreet" : primaryStreet  
             },
            "geometry" : {
                "type" : "Polygon",
                "coordinates" : [shapes[shape]]
            }
        }
        myGeoJson['features'].append(thisFeature)

    geo_str = json.dumps(myGeoJson)
    return geo_str

def style_function(feature):
    print(feature['properties'])
    # styling guide  https://python-visualization.github.io/folium/modules.html
    fillColor = ""
    if (primaryStreet)
    return {
        'fillColor': 'red',
        'lineColor' : 'red'
    }

def makeMap(dataset):
    # plotting all bike accidents
    dcMap = folium.Map(location=[38.9072, -77.0369], zoom_start=13)

    featureGroupDict = {}
        
    counter = 0
    for lat, long, cluster, clusterPrimaryStreet, clusterSecondaryStreet in zip(dataset['LATITUDE'], dataset['LONGITUDE'],dataset["CLUSTER"],dataset["CLUSTERPRIMARYSTREET"],dataset["CLUSTERSECONDARYSTREET"]):
        if (cluster != -1):
            if (cluster not in featureGroupDict.keys()):
                featureGroupDict[cluster] = folium.FeatureGroup(name=(clusterPrimaryStreet+ " and " + clusterSecondaryStreet))
            folium.CircleMarker(location=[lat, long], radius = .5, color = getColor(cluster)).add_to(featureGroupDict[cluster])

        counter = counter + 1 
     
    for key in featureGroupDict.keys():
        dcMap.add_child(featureGroupDict[key])
        
    hulls, primaryStreets = makeHulls(dataset)
    geoString = makeGeoJson(hulls, primaryStreets)
    folium.GeoJson(json.loads(geoString), style_function=style_function).add_to(dcMap);
    
    folium.map.LayerControl('topright', collapsed=False).add_to(dcMap)
        
    return dcMap

In [109]:
# bikeData
# worst 3 hubs runCluster(bikeData, 0.0015, 25)
# more hubs runCluster(bikeData, 0.0014, 15)
# tightly packed runCluster(bikeData, 0.0005, 7)

# bikePedData
# runCluster(bikePedData, 0.0030, 3)

# bikeVehData
# bikeVehData(bikeVehData, 0.0010, 10)

In [110]:
clusteredData = runCluster(bikeData, 0.0015, 25)
dcMap = makeMap(clusteredData)
dcMap

{'name': '1', 'primaryStreet': '18TH ST NW'}
{'name': '1', 'primaryStreet': '18TH ST NW'}
{'name': '0', 'primaryStreet': '14TH ST NW'}
{'name': '2', 'primaryStreet': 'L ST NW'}


In [107]:
clusteredData = runCluster(bikeData, 0.0014, 15)
dcMap = makeMap(clusteredData)
dcMap

In [15]:
clusteredData = runCluster(bikeData, 0.0005, 7)
dcMap = makeMap(clusteredData)
dcMap

In [16]:
# bikePedData

# clusteredData = runCluster(bikePedData, 0.01, 1)
# dcMap = makeMap(clusteredData)
# dcMap

In [17]:
# bike vehicle data

clusteredData = runCluster(bikeVehData, 0.0010, 10)
dcMap = makeMap(clusteredData)
dcMap