In [448]:
import pandas as pd 
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import folium
from geojson import Feature, FeatureCollection, Point
import json
from scipy.spatial import ConvexHull, convex_hull_plot_2d
from folium import IFrame

In [188]:
# https://alysivji.github.io/getting-started-with-folium.html
# https://opendata.dc.gov/datasets/294e062cdf2c48d5b9cbc374d9709bc0_2/data

In [189]:
# save them to csvs
bikeData = pd.read_csv("data/bikes.csv")
bikePedData = pd.read_csv("data/bikePedData.csv")
bikeVehData = pd.read_csv("data/bikeVehData.csv")
streets = pd.read_csv("data/streets.csv")
streetSegs = pd.read_csv("data/streetSegs.csv")

In [190]:
print(list(bikeData))

['Unnamed: 0', 'TOTAL_BICYCLES', 'TOTAL_VEHICLES', 'TOTAL_PEDESTRIANS', 'LATITUDE', 'LONGITUDE', 'XCOORD', 'YCOORD', 'FATAL_BICYCLIST', 'MPDLATITUDE', 'MPDLONGITUDE', 'FROMDATE', 'STREETSEGID', 'ROUTEID', 'NEARESTINTSTREETNAME', 'NEARESTINTROUTEID', 'OFFINTERSECTION']


In [191]:
print(list(streets))

['OBJECTID', 'FACILITYID', 'STREETSEGID', 'SOURCEID', 'BIKELANELENGTH', 'FACILITY', 'PROPOSEDCYCLETRACK', 'Shape_Length', 'TRAVELDIRECTION', 'NOTES', 'BIKELANE_YEAR', 'PLANSREADY', 'GAP', 'GAP_NOTES', 'NEED_SYMBOL', 'NEED_SYM_1', 'REPAINT_LINE', 'YEAR_INSTALLED']


In [192]:
print(list(streetSegs))

['STREETSEGID', 'FACILITYID', 'SOURCEID', 'STREETID', 'REGISTEREDNAME', 'STREETTYPE', 'QUADRANT', 'DIRECTIONALITY', 'SEGMENTTYPE', 'FROMNODEID', 'TONODEID', 'FROMADDRESSLEFTTHEO', 'TOADDRESSLEFTTHEO', 'FROMADDRESSRIGHTTHEO', 'TOADDRESSRIGHTTHEO', 'BEGINMEASURE', 'ENDMEASURE', 'UPDATETIMESTAMP', 'OBJECTID_1', 'OBJECTID', 'SHAPELEN']


In [265]:
streets.FACILITY.value_counts()

Existing Bike Lane      908
Shared Lane             258
Cycle Track              88
Climbing Lane            62
Contraflow Bike Lane     45
                          5
Bus/Bike Lane             4
Name: FACILITY, dtype: int64

In [713]:
colorsList = ['red', 'blue', 'black', 'purple', 'orange', 'pink', 'green']

def getColor(ind):
    colorLen = len(colorsList)
    colorNum = ind%colorLen
    color = colorsList[colorNum]
    return color

def runCluster(dataset, eps, minSamples):
    accPoints = np.empty((0, 2))   
    
    for lat,long in zip(dataset['LATITUDE'], dataset['LONGITUDE']):
        accPoints = np.append(accPoints, np.array([[lat,long]]), axis=0)

    # Compute DBSCAN
    db = DBSCAN(eps=eps, min_samples=minSamples).fit(accPoints)

    clusters = db.fit_predict(accPoints)
    dataset['CLUSTER'] = clusters
    
    
    mainStreetDict = {}
    secondaryStreetDict = {}
    mainFacilityDict = {}
    secondaryFacilityDict = {}
    clusters = dataset['CLUSTER'].unique()
    for i in clusters:
        clusterSet = dataset[(dataset["CLUSTER"] == i) & (dataset["STREETSEGID"] > 0)]
        valueCounts = clusterSet['STREETSEGID'].value_counts()
        if (len(valueCounts.index) > 1):
            mainValueId = valueCounts.index[0]
            mainValue = streetSegs[streetSegs["STREETSEGID"] == int(mainValueId)].reset_index()["REGISTEREDNAME"][0]
            secondaryValueId = valueCounts.index[1]
            secondaryValue = streetSegs[streetSegs["STREETSEGID"] == int(secondaryValueId)].reset_index()["REGISTEREDNAME"][0]
            mainStreetDict[i] = mainValue
            secondaryStreetDict[i] = secondaryValue
            
            facilityResp = streets[streets["STREETSEGID"] == int(mainValueId)]
            if (len(facilityResp) > 0):    
                mainFacilityDict[i] = facilityResp.reset_index()["FACILITY"][0]
            else:
                mainFacilityDict[i] = "None"
            facilityResp = streets[streets["STREETSEGID"] == int(secondaryValueId)]
            if (len(facilityResp) > 0):  
                secondaryFacilityDict[i] = facilityResp.reset_index()["FACILITY"][0]
            else:
                secondaryFacilityDict[i] = "None"
        elif (len(valueCounts.index) > 0):
            mainValueId = valueCounts.index[0]
            mainValue = streetSegs[streetSegs["STREETSEGID"] == int(mainValueId)].reset_index()["REGISTEREDNAME"][0]
            mainStreetDict[i] = mainValue
            facilityResp = streets[streets["STREETSEGID"] == int(mainValueId)]
            if (len(facilityResp) > 0):    
                mainFacilityDict[i] = facilityResp.reset_index()["FACILITY"][0]
            else:
                mainFacilityDict[i] = "None"
                
            secondaryStreetDict[i] = "None"
            secondaryFacilityDict[i] = "None"
        else:
            mainStreetDict[i] = "None"
            secondaryStreetDict[i] = "None"
            mainFacilityDict[i] = "None"
            secondaryFacilityDict[i] = "None"

    mainStreets = []
    secondaryStreets = []
    mainFacilityTypes = []
    secondaryFacilityTypes = []

    for i in dataset["CLUSTER"]:
        if (mainStreetDict[i] == "None"):
            mainStreets.append("None")
            mainFacilityType.append("None")
        else:
            mainStreets.append(mainStreetDict[i])
            mainFacilityTypes.append(mainFacilityDict[i])
        if (secondaryStreetDict[i] == "None"):
            secondaryStreets.append("None")
            secondaryFacilityTypes.append("None")
        else:
            secondaryStreets.append(secondaryStreetDict[i])
            secondaryFacilityTypes.append(secondaryFacilityDict[i])
       
    dataset['CLUSTERPRIMARYSTREET'] = mainStreets
    dataset['CLUSTERSECONDARYSTREET'] = secondaryStreets
    dataset['CLUSTERMAINFACILITYTYPE'] = mainFacilityTypes
    dataset['CLUSTERSECONDARYFACILITYTYPE'] = secondaryFacilityTypes
    return dataset

def makeHulls(dataset):
    features = []
    clusters = {}
    primaryStreets = {}
    secondaryStreets = {}
    mainFacilityTypes = {}
    secondaryFacilityTypes = {}
    for lat, long, cluster, primaryStreet, secondaryStreet, mainFaciltyType, secondaryFaciltyType  in zip(dataset['LATITUDE'],dataset['LONGITUDE'],dataset['CLUSTER'],dataset['CLUSTERPRIMARYSTREET'],dataset['CLUSTERSECONDARYSTREET'],dataset['CLUSTERMAINFACILITYTYPE'],dataset['CLUSTERSECONDARYFACILITYTYPE']):
        if (cluster != -1):
            if (cluster in clusters.keys()):
                thisCluster = clusters[cluster]
                thisCluster.append([long,lat])
                clusters[cluster] = thisCluster
            else:
                clusters[cluster] = [[long,lat]]
                primaryStreets[cluster] = str(primaryStreet)
                secondaryStreets[cluster] = str(secondaryStreet)
                mainFacilityTypes[cluster] = str(mainFaciltyType)
                secondaryFacilityTypes[cluster] = str(secondaryFaciltyType)

    hulls = {}
    for cluster in clusters.keys():
        hulls[cluster] = ConvexHull(clusters[cluster])
        
    return hulls, primaryStreets, secondaryStreets, mainFacilityTypes, secondaryFacilityTypes


def makeGeoJson(hulls, primaryStreets, secondaryStreets, mainFacilityTypes, secondaryFaciltyTypes):
    shapes = {}

    for x in hulls.keys():
        outline = []
        for p in hulls[x].vertices:
            outline.append(list(hulls[x].points[p]))
        outline.append(list(hulls[x].points[hulls[x].vertices[0]]))
        shapes[x] = outline

    myGeoJson = {"type": "FeatureCollection","features":[]}        
    for shape in shapes.keys():
        primaryStreet = primaryStreets[shape]
        secondaryStreet = secondaryStreets[shape]
        mainFacilityType = mainFacilityTypes[shape]
        secondaryFaciltyType = secondaryFaciltyTypes[shape]
        thisFeature = {
            "type": "Feature",
            "properties": {
                "name": str(shape),
                "primaryStreet" : primaryStreet,
                "secondaryStreet" : secondaryStreet,
                "mainFacilityType" : mainFacilityType,
                "secondaryFaciltyType" : secondaryFaciltyType
             },
            "geometry" : {
                "type" : "Polygon",
                "coordinates" : [shapes[shape]]
            }
        }
        myGeoJson['features'].append(thisFeature)

    geo_str = json.dumps(myGeoJson)
    return geo_str

def style_function(feature):
#     print(feature['properties'])
    # styling guide  https://python-visualization.github.io/folium/modules.html
    fillColor = ""
    if (feature['properties']['mainFacilityType'] == "None"):
        return {
            'fillColor': 'red',
            'fillOpacity' : .5,
            'color' : 'black'
        }
    elif (feature['properties']['mainFacilityType'] == "Shared Lane"):
        return {
            'fillColor': 'orange',
            'fillOpacity' : .5,
            'color' : 'black'
        }
    elif (feature['properties']['mainFacilityType'] == "Contraflow Bike Lane"):
        return {
            'fillColor': 'purple',
            'fillOpacity' : .5,
            'color' : 'black'
        }
    elif (feature['properties']['mainFacilityType'] == "Existing Bike Lane"):
        return {
            'fillColor': 'blue',
            'fillOpacity' : .5,
            'color' : 'black'
        }
    elif (feature['properties']['mainFacilityType'] == "Climbing Lane"):
        return {
            'fillColor': 'pink',
            'fillOpacity' : .5,
            'color' : 'black'
        }
    elif (feature['properties']['mainFacilityType'] == "Cycle Track"):
        return {
            'fillColor': 'green',
            'fillOpacity' : .5,
            'color' : 'black'
        }
    else:
        return {
            'fillColor': 'black',
            'fillOpacity' : .5,
            'color' : 'black'
        }


def makeMap(dataset):
    # plotting all bike accidents
    dcMap = folium.Map(location=[38.9072, -77.0369], zoom_start=13)

    featureGroupDict = {}
        
    counter = 0
    for lat, long, cluster, clusterPrimaryStreet, clusterSecondaryStreet in zip(dataset['LATITUDE'], dataset['LONGITUDE'],dataset["CLUSTER"],dataset["CLUSTERPRIMARYSTREET"],dataset["CLUSTERSECONDARYSTREET"]):
        if (cluster != -1):
            if (cluster not in featureGroupDict.keys()):
                featureGroupDict[cluster] = folium.FeatureGroup(name=(str(clusterPrimaryStreet) + " and " + str(clusterSecondaryStreet)))
            folium.CircleMarker(location=[lat, long], radius = .5, color = 'black').add_to(featureGroupDict[cluster])

        counter = counter + 1 
        
    hulls, primaryStreets, secondaryStreets, mainFacilityTypes, secondaryFaciltyTypes = makeHulls(dataset)
    geoString = makeGeoJson(hulls, primaryStreets, secondaryStreets, mainFacilityTypes, secondaryFaciltyTypes)
    featureGroupDict["Layers"] = folium.FeatureGroup(name="Layers")
    ourLayers = folium.GeoJson(json.loads(geoString), style_function=style_function,
                               tooltip=folium.features.GeoJsonTooltip(
                                fields=['primaryStreet','secondaryStreet','mainFacilityType','secondaryFaciltyType']
                              ))
    ourLayers.add_to(featureGroupDict["Layers"]);
    
    for key in featureGroupDict.keys():
        dcMap.add_child(featureGroupDict[key])
        
    folium.map.LayerControl('topright', collapsed=False).add_to(dcMap)

    item_txt = """<br> &nbsp; {item} &nbsp; <i class="fa fa-map-marker fa-2x" style="color:{col}"></i>"""
    html_itms = item_txt.format(item= "None" , col= "red")

    legend_html = """
     <div style="
     position: fixed; 
     bottom: 5px; left: 5px; width: 220px; height: 160px; 
     border:2px solid grey; z-index:9999; 
     background-color:white;
     opacity: .85;
     font-size:14px;
     font-weight: bold;
     ">
     <div style= "text-align:center">Street Bike Facilities</div>
     <br>
     <div><div style="margin-left:20px;">None </div><div style="margin-top:-14px;margin-left:180px;height:10px;width:10px;background-color:red;">  </div></div>
     <div><div style="margin-left:20px;">Shared Lane </div><div style="margin-top:-14px;margin-left:180px;height:10px;width:10px;background-color:orange;">  </div></div>
     <div><div style="margin-left:20px;">Contraflow Bike Lane </div><div style="margin-top:-14px;margin-left:180px;height:10px;width:10px;background-color:purple;">  </div></div>
     <div><div style="margin-left:20px;">Existing Lane </div><div style="margin-top:-14px;margin-left:180px;height:10px;width:10px;background-color:blue;">  </div></div>
     <div><div style="margin-left:20px;">Climbing Lane </div><div style="margin-top:-14px;margin-left:180px;height:10px;width:10px;background-color:pink;">  </div></div>
     <div><div style="margin-left:20px;">Cycle Track </div><div style="margin-top:-14px;margin-left:180px;height:10px;width:10px;background-color:green;">  </div></div>
      
      
      
      
      
      </div> """
    dcMap.get_root().html.add_child(folium.Element( legend_html ))
    
    return dcMap

In [714]:
# bikeData
# worst 3 hubs runCluster(bikeData, 0.0015, 25)
# more hubs runCluster(bikeData, 0.0014, 15)
# tightly packed runCluster(bikeData, 0.0005, 7)

# bikePedData
# runCluster(bikePedData, 0.0030, 3)

# bikeVehData
# bikeVehData(bikeVehData, 0.0010, 10)

In [715]:
clusteredData = runCluster(bikeData, 0.0015, 25)
dcMap = makeMap(clusteredData)
dcMap

In [716]:
clusteredData = runCluster(bikeData, 0.0014, 15)
dcMap = makeMap(clusteredData)
dcMap

In [717]:
clusteredData = runCluster(bikeData, 0.0005, 7)
dcMap = makeMap(clusteredData)
dcMap

In [718]:
# bikePedData

# clusteredData = runCluster(bikePedData, 0.01, 1)
# dcMap = makeMap(clusteredData)
# dcMap

In [719]:
# bike vehicle data

clusteredData = runCluster(bikeVehData, 0.0010, 10)
dcMap = makeMap(clusteredData)
dcMap