# Adding New York bus routes to OSMnx map

## Section 1: Extracting bus stops from KML
KML source: 
<a href="https://www.google.com/maps/d/u/0/viewer?mid=1Y-euNeFcsu06Zxfdl6u6-sca3Yp-KYY&ll=40.75502933824687%2C-74.00066380083778&z=18">
MTA Bus Map
</a>

### Opening KML file and extracting placemarks

In [19]:
from fastkml import KML
from fastkml import Placemark, Point, StyleUrl, Style
from fastkml.utils import find, find_all
import os

In [20]:
#read .kml file as KML object with fastkml
k = KML.parse("../data/doc.kml")

In [21]:
#extract all placemarks in .kml file
placemarks = list(find_all(k, of_type=Placemark))

In [22]:
#print the number of placemarks
print(len(placemarks))

15042


In [23]:
print(placemarks[0].description)

name: HYLAN BLVD/MIDLAND AV<br>routes: S51, S78, S79+, S81<br>direction: SW<br>link: https://bustime.mta.info/m/index?q=200237<br>id: MTA_200237<br>Latitude,Longitude: 40.577699, -74.102611


### Parsing Placemark Description

In [None]:
#extravcting info from placemarker description

def parse_placemark_info(desc_str):
    """
    Parse a description string like:
      "name: HYLAN BLVD/MIDLAND AV<br>"
      "routes: S51, S78, S79+, S81<br>"
      "direction: SW<br>"
      "link: https://…<br>"
      "id: MTA_200237<br>"
      "Latitude,Longitude: 40.577699, -74.102611"
    into a dict with proper types.
    """
    # 1) split into lines
    parts = desc_str.split('<br>')
    
    data = {}
    for part in parts:
        if not part.strip():
            continue
        # split on the first ": "
        key, val = part.split(': ', 1)
        data[key.strip()] = val.strip()
    
    # 2) post‑process some fields:
    #   routes → list of route codes
    if 'routes' in data:
        data['routes'] = [r.strip() for r in data['routes'].split(',')]
    
    #   Latitude,Longitude → two floats
    if 'Latitude,Longitude' in data:
        lat_str, lon_str = data['Latitude,Longitude'].split(',', 1)
        data['latitude']  = float(lat_str)
        data['longitude'] = float(lon_str)
        # optional: you can delete the original key
        del data['Latitude,Longitude']
    
    return data

In [44]:
# testing
test_placemark_info = parse_placemark_info(placemarks[0].description)
test_placemark_info

{'name': 'HYLAN BLVD/MIDLAND AV',
 'routes': ['S51', 'S78', 'S79+', 'S81'],
 'direction': 'SW',
 'link': 'https://bustime.mta.info/m/index?q=200237',
 'id': 'MTA_200237',
 'latitude': 40.577699,
 'longitude': -74.102611}

placemark.description seems to have all the data we might need for the OSM

## Section 2: Getting a Placemark based on OSMnx bus route name

In [28]:
import multiprocessing as mp
import numpy as np
import osmnx as ox
from matplotlib import pyplot as plt

ox.__version__

'2.0.2'

### Converting new york digraph to GeodataFrame

In [61]:
# open New York
place = "New York, New York"
G = ox.graph.graph_from_place(place, network_type="drive")
Gp = ox.projection.project_graph(G)

Getting bus stops

Should I use openstreetmap's data or just add my info to each node by using kd tree? <br>
I'll use a KD tree because this is a DSA project <br>

In [None]:
# convert drive multidigraph nodes to geodataframe
gdf_nodes = ox.convert.graph_to_gdfs(
    G, nodes=True, edges=False, node_geometry=True,
    fill_edge_geometry=False)

In [None]:
# display it on map
gdf_nodes.explore()

The drive network loads much faster than the entire open street map network

### Locating the node closest to a placemarker

In [57]:
def findNearestNode2Placemark(G, placemark_info):
    '''
    get the ID of the node nearest to a placemark
    '''
    placemark_longitude, placemark_latitude = placemark_info['longitude'], placemark_info['latitude']

    nearest_node = ox.distance.nearest_nodes(G,
                                         placemark_longitude, placemark_latitude,
                                         return_dist=True)
    
    return nearest_node

In [None]:
#finding node nearest to placemark[0]
nearest_node = findNearestNode2Placemark(G, test_placemark_info)

In [60]:
# fetch the node geometry based on node id
nearest_node_id = nearest_node[0]
gdf_nodes.loc[nearest_node_id]

y                                    40.577717
x                                    -74.10254
highway                                    NaN
ref                                        NaN
street_count                                 2
junction                                   NaN
railway                                    NaN
geometry        POINT (-74.1025398 40.5777173)
Name: 5490128948, dtype: object

In [None]:
# visualizing node nearest to placemarker
import geopandas as gpd
from shapely import (
    Point, LineString)

# create a new geodataframe with the nearest node and new point
nearest_node_dict = {'col1': ['Chadayamangalam', 'Nearest Node'],
                     'geometry': [Point(test_placemark_info['longitude'], test_placemark_info['latitude']),
                                        LineString([
                                      Point(gdf_nodes.loc[nearest_node_id].x,
                                            gdf_nodes.loc[nearest_node_id].y),
                                      Point(test_placemark_info['longitude'], test_placemark_info['latitude'])])]}
# convert dictionary to geodataframe
nearest_node_gdf = gpd.GeoDataFrame(nearest_node_dict, crs="EPSG:4326")
# nearest node map reference
nearest_node_map = nearest_node_gdf.explore(color="red")
# combine nearest node with existing node map
gdf_nodes.explore(m=nearest_node_map)

### adding placemark info to gdf node