In [1]:
!pip install selenium
!pip install folium
!pip install pykml



In [2]:
#libraries
#handling data
import pandas as pd
#visualization
from matplotlib import cm
from matplotlib import colors 
import matplotlib.path as mplPath
#operating system
import os
#google maps
from selenium import webdriver
#regexs
import re
import folium
from folium.features import (WmsTileLayer, RegularPolygonMarker, Vega, GeoJson,
                       CircleMarker, LatLngPopup,
                       ClickForMarker, TopoJson, PolyLine, 
                       )
#from progress_bar import log_progress 
import numpy as np
#reading json files
import json
#reading/writing csv files
import csv

from pykml import parser

In [3]:
datapath = '.'
#file contains data about links in nyc
linkspath = datapath + '/links.csv'
#file contains data about intersections in nyc
nodespath = datapath + '/nodes.csv'
#link in nyc that contains enough info to analysis
full_links_ids_path = datapath + '/full_link_ids.txt'
#kml file that contains boundaries of regions we are trying to parse
#this file needs to be imported from the google maps
regionspath = datapath + '/Manhattan_13.kml'

#actual pandas dataframes
links=pd.read_csv(linkspath)
nodes=pd.read_csv(nodespath)
nodes = nodes[['node_id','xcoord','ycoord']]


In [4]:
#Plots edge (x1,y1)(x2,y2) on the road
#"nyc" is map objet we are plotting on
#default color is red; default width of road is 3
def plotRoad(x1, x2, y1, y2, nyc, color = 'red', width = 3):
    nyc.add_child(PolyLine(locations=[(x1,y1),(x2,y2)],color=color,weight=width))

#Plots intersection onto map with marker
#x,y - latitude and longitude; nyc - 
def plotNode(x,y,nyc,sides = 4, radius = 2, color = 'black'):
    nyc.add_child(RegularPolygonMarker(location=[x,y],number_of_sides = sides, color = color,radius = radius))

#Given list of linkIDs in ROADS plots them onto the map
#data should be pandas dataframe of links
#nyc - map object; roads - list of linkids that need to be plotted
#data - pandas object should just be link
def plotRoads(nyc, roads, data, color = 'red', width = 3):
    print(len(roads))
    count = 0
    for ID in roads:
        road = data[data['link_id'] == ID]
        plotRoad(road['startY'].values[0], road['endY'].values[0],road['startX'].values[0], road['endX'].values[0], nyc, color, width)
        count = count + 1
        if(count % 100 == 0):
            print(count)

#initialize Map object centered in NYC
def initializeMap():
    NY_COORDINATES = (40.7566,-73.9815)
    m = folium.Map(location=NY_COORDINATES, zoom_start=15)
    folium.TileLayer('cartodbpositron').add_to(m)
    folium.LayerControl().add_to(m)
    #LatLngPopup enables us to know the latitude and longtitude of any position with one click
    m.add_child(LatLngPopup())
    print('initialized map')
    return m

#reads txt file into a list
def readIntTxtToList(path):
        f = open(path)
        arr = []
        for i in f:
            arr.append(i)
        arr = [int(x.strip()) for x in arr]
        return arr

#writes txt file into a list
def writeIntArrayToTxt(path, arr):
    print(len(arr))
    with open(path, 'wb') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        spamwriter.writerow(arr)


#saves map
def saveMap(map_name, str_, ave_):
    citymap = initializeMap()
    if(str_ != None):
        plotRoads(citymap, str_, links, 'blue', 3)
        print('plotted streets')
    if(ave_ != None):
        plotRoads(citymap, ave_, links, 'red', 3)
        print('plotted avenues')

    citymap.save(map_name)
    print('{0} saved'.format(map_name))

In [5]:
#Given kml file path, extracts boundary points in regions
def extractPolyFromKML(kmlpath):
    root = parser.fromstring(open(kmlpath, 'r').read())
    regions = []
    placemark = root.Document.Placemark
    for i in range(len(placemark)):
        block = []
        r = str(root.Document.Placemark[i].Polygon.outerBoundaryIs.LinearRing.coordinates).split()
        if r is not None:
            for point in r:
                loc = point.split(',')[0:2]
                block.append(tuple([float(loc[0]),float(loc[1])]))
            regions.append(block)
    return regions

#visualizes each of the regions with a list of colors onto map m
def visualizePoly(regions, colors, m):
    for region in range(len(regions)):
        for i in range(len(regions[region])):
            if( i < len(regions[region]) -1):
                point1 = regions[region][i]
                point2 = regions[region][i+1]
                plotRoad(point1[1], point2[1], point1[0], point2[0], m, color = colors[region], width = 3)
    return m
#visualizes each of the nodes in inter with a list of colors onto a map m
#nodes should be passed into nodes
def visualizeNodes(inter, colors, nodes, m):
    for region in range(len(inter)):
        c = colors[region]
        for idd in inter[region]:
            p = nodes[nodes['node_id'] == idd]
            x = p['xcoord'].values[0]
            y = p['ycoord'].values[0]
            plotNode(y,x,m,sides = 4, radius = 2, color = c)
    return m

#plots a list of lists containing linkIds with different colors
def visualizeLinks(_regionNodes, _regionLinks, colors, links, m):
    for i in range(len(_regionNodes)):
        plotRoads(m, _regionLinks[i], links, color = colors[i], width = 3)
    return m

#given
def nodesPerRegion(regions,nodes):
    regionNodes = []
    for i in range(len(regions)):
        print(i)
        regionNodes.append(getNodes(regions[i],nodes))
    return regionNodes

def linksPerRegion(regionNodes, links):
    regionLinks = [[] for i in range(len(regionNodes))]
    for i in range(len(links)):
        link = links.iloc[i]
        beginNode = link['begin_node_id']
        endNode = link['end_node_id']
        for i in range(len(regionLinks)):
            if beginNode in regionNodes[i] and endNode in regionNodes[i]:
                regionLinks[i].append(link['link_id'])
    return regionLinks
   


def getNodes(region, nodes):
    inter = []
    verts = region
    codes = [mplPath.Path.LINETO] * len(verts)
    codes[0] = mplPath.Path.MOVETO
    codes[-1] = mplPath.Path.CLOSEPOLY
    poly = mplPath.Path(verts,codes)
    for i in range(len(nodes)):
        p = nodes.iloc[i]
        x = p['xcoord']
        y = p['ycoord']
        if(poly.contains_point((x,y))):
            inter.append(p['node_id'])
    inter = [int(i) for i in inter]
    return inter


def writeToCSV(data, filename):
    with open(filename, "wb") as f:
        writer = csv.writer(f)
        writer.writerows(data)

In [6]:
colors = ['#330700', '#f2553d', '#73341d', '#e57e39', '#736256', '#8c5e00', '#402b00', '#f2deb6', '#f2ce3d', 
          '#595843', '#798020', '#ccff00', '#293300']
urbanRegions = extractPolyFromKML(regionspath)
print('extracted regions from KML file')

extracted regions from KML file


In [7]:
regionNodes = nodesPerRegion(urbanRegions, nodes)
print('extracted nodes for every region')

0
1
2
3
4
5
6
7
8
9
10
11
12
extracted nodes for every region


In [8]:
print("start")
regionLinks = linksPerRegion(regionNodes, links)
print('extracted links for every region')

start
extracted links for every region


In [9]:
nyc1 = initializeMap()
nyc1 = visualizePoly(urbanRegions,colors,nyc1)
print('plotted borders')
nyc1

initialized map
plotted borders


In [15]:
nyc2 = initializeMap()

nyc2 = visualizeNodes([regionNodes[3]],[colors[3]], nodes, nyc2)
nyc2 = visualizeNodes([regionNodes[7]], [colors[7]], nodes, nyc2)
nyc2 = visualizeNodes([regionNodes[11]], [colors[11]], nodes, nyc2)
print('plotted intersections')
nyc2

initialized map
plotted intersections


In [56]:
nyc3 = initializeMap()

nyc3 = visualizeLinks([regionNodes[0]], [regionLinks[0]], [colors[0]], links, nyc3)
nyc3 = visualizeLinks([regionNodes[5]], [regionLinks[5]], [colors[5]], links, nyc3)
nyc3 = visualizeLinks([regionNodes[10]], [regionLinks[10]], [colors[10]], links, nyc3)


print('plotted roads')
nyc3

initialized map
692
100
200
300
400
500
600
621
100
200
300
400
500
600
727
100
200
300
400
500
600
700
plotted roads


In [None]:
writeToCSV(regionLinks, 'fiveboroughsLinkIDs.csv')