In [22]:
import pandas as pd 
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import folium

In [23]:
# https://alysivji.github.io/getting-started-with-folium.html
# https://opendata.dc.gov/datasets/294e062cdf2c48d5b9cbc374d9709bc0_2/data

In [130]:
# save them to csvs
bikeData = pd.read_csv("data/bikes.csv")
bikePedData = pd.read_csv("data/bikePedData.csv")
bikeVehData = pd.read_csv("data/bikeVehData.csv")

In [135]:
colorsList = ['red', 'blue', 'black', 'purple', 'orange', 'pink', 'green']

def getColor(ind):
    colorLen = len(colorsList)
    colorNum = ind%colorLen
    color = colorsList[colorNum]
    return color

def runCluster(dataset, eps, minSamples):
    accPoints = np.empty((0, 2))   
    
    for lat,long in zip(dataset['LATITUDE'], dataset['LONGITUDE']):
        accPoints = np.append(accPoints, np.array([[lat,long]]), axis=0)

    # Compute DBSCAN
    db = DBSCAN(eps=eps, min_samples=minSamples).fit(accPoints)

    clusters = db.fit_predict(accPoints)
    dataset['CLUSTER'] = clusters
    
    
    mainStreetDict = {}
    clusters = dataset['CLUSTER'].unique()
    for i in clusters:
        clusterSet = dataset[dataset["CLUSTER"] == i]
        valueCounts = clusterSet['NEARESTINTSTREETNAME'].value_counts()
        mainStreetDict[i] = valueCounts.index[0]

    mainStreets = []
    for i in dataset["CLUSTER"]:
        mainStreets.append(mainStreetDict[i])
    
    dataset['CLUSTERSTREET'] = mainStreets
    
    return dataset

def makeMap(dataset):
    # plotting all bike accidents
    dcMap = folium.Map(location=[38.9072, -77.0369], zoom_start=13)

    featureGroupDict = {}
        
    counter = 0
    for lat, long, cluster, clusterStreet in zip(dataset['LATITUDE'], dataset['LONGITUDE'],dataset["CLUSTER"],dataset["CLUSTERSTREET"]):
        if (cluster != -1):
            if (cluster not in featureGroupDict.keys()):
                featureGroupDict[cluster] = folium.FeatureGroup(name=clusterStreet)
            folium.CircleMarker(location=[lat, long], radius = .5, color = getColor(cluster)).add_to(featureGroupDict[cluster])

        counter = counter + 1 
     
    for key in featureGroupDict.keys():
        dcMap.add_child(featureGroupDict[key])

    folium.map.LayerControl('topright', collapsed=False).add_to(dcMap)
        
    return dcMap

In [136]:
# bikeData
# worst 3 hubs runCluster(bikeData, 0.0015, 25)
# more hubs runCluster(bikeData, 0.0014, 15)
# tightly packed runCluster(bikeData, 0.0005, 7)

# bikePedData
# runCluster(bikePedData, 0.0030, 3)

# bikeVehData
# bikeVehData(bikeVehData, 0.0010, 10)

In [149]:
clusteredData = runCluster(bikeData, 0.0015, 25)
dcMap = makeMap(clusteredData)
dcMap

In [138]:
clusteredData = runCluster(bikeData, 0.0014, 15)
dcMap = makeMap(clusteredData)
dcMap

In [139]:
clusteredData = runCluster(bikeData, 0.0005, 7)
dcMap = makeMap(clusteredData)
dcMap

In [148]:
# bikePedData

clusteredData = runCluster(bikePedData, 0.01, 1)
dcMap = makeMap(clusteredData)
dcMap

In [141]:
# bike vehicle data

clusteredData = runCluster(bikeVehData, 0.0010, 10)
dcMap = makeMap(clusteredData)
dcMap