In [124]:
import geopandas as gpd
import pandas as pd
import re
from geopy import distance
import numpy as np

metra_df = gpd.read_file("../data/raw/MetraStations").to_crs(epsg=4326)
metra_routes = gpd.read_file("../data/raw/MetraLines").to_crs(epsg=4326)


In [125]:

from_stations = []
to_stations = []
first_coords = []
last_coords = []

for index, row in metra_routes.explode().iterrows():
    stations = row['DESCRIPTIO'].split(' to ')
    if len(stations) < 2:
        stations = row['DESCRIPTIO'].split(' ot ')
        if len(stations) < 2:
            continue
            

    stations = [re.sub(r'\([^)]*\)', '', s).strip() for s in stations]

    # print(stations)
    route_geom = row['geometry']
    first = route_geom.coords[0]
    last = route_geom.coords[-1]
    
    first_coords.append(first)
    last_coords.append(last)
    from_stations.append(stations[0])
    to_stations.append(stations[1])

    # add bidirectional edges
    first_coords.append(last)
    last_coords.append(first)
    from_stations.append(stations[1])
    to_stations.append(stations[0])

df = pd.DataFrame(
    {
        'src_station': from_stations,
        'src_loc': first_coords,
        'dest_stations': to_stations,
        'dest_loc': last_coords 
    }
)
df.to_csv("../data/processed/MetraRoutes.csv", index=False)


stations = df.drop_duplicates('src_station')[['src_station', 'src_loc']].rename(columns={'src_station': 'station', 'src_loc': 'loc'})
stations.to_csv('../data/processed/MetraStations.csv', index=False)

  for index, row in metra_routes.explode().iterrows():


In [126]:
bus_routes = gpd.read_file("../data/raw/CTA_BusRoutes").to_crs(epsg=4326)
bus_df = gpd.read_file("../data/raw/CTA_BusStops").to_crs(epsg=4326)


In [135]:
import math

def calculate_distance(x1, y1, x2, y2):
    return math.sqrt((x1-x2)**2 + (y1-y2)**2)

from pyproj import Proj, Geod

def utm_distance(lat1, lon1, lat2, lon2):
    p = Proj(proj='utm', zone=33, ellps='WGS84')
    x1, y1 = p(lon1, lat1)
    x2, y2 = p(lon2, lat2)
    distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
    return distance


def find_closest_metra(df, x, y):
    coords = df['geometry'].get_coordinates()

    distances = (np.square(x-coords['x']) + np.square(y-coords['y']))
    # for i in range(len(coords)):
    
    # distances.append(dist)

    idx = np.argmin(distances)
    dist = distance.distance((coords['y'][idx], coords['x'][idx]), (y, x)).mi
    print((coords['y'][idx], coords['x'][idx]), (y, x))
    return coords['x'][idx], coords['y'][idx], dist, df['PUBLIC_NAM'][idx] 


In [136]:
for idx, route in bus_routes.explode().iterrows():
    for bx, by in route['geometry'].coords:
        print(bx, by)
        x, y, dist, name = find_closest_metra(bus_df, bx, by)
        print(name)
        print(dist)

  for idx, route in bus_routes.explode().iterrows():


-87.64790999981973 41.867129999821366
(41.867250677000015, -87.647454251) (41.867129999821366, -87.64790999981973)
Roosevelt & Halsted
0.024942766392312656
-87.6474599997108 41.86713999996754
(41.867250677000015, -87.647454251) (41.86713999996754, -87.6474599997108)
Roosevelt & Halsted
0.007644256079836401
-87.64737999991566 41.867140000251396
(41.867250677000015, -87.647454251) (41.867140000251396, -87.64737999991566)
Roosevelt & Halsted
0.008545106925125983
-87.64688999916365 41.8671499996307
(41.86675062400002, -87.647078249) (41.8671499996307, -87.64688999916365)
Halsted & Roosevelt
0.02922415944870131
-87.64636000015456 41.867159999859545
(41.86707075100003, -87.64632481299998) (41.867159999859545, -87.64636000015456)
Roosevelt & Halsted
0.006421518223183414
-87.6457400001297 41.867169999936074
(41.86707075100003, -87.64632481299998) (41.867169999936074, -87.6457400001297)
Roosevelt & Halsted
0.03093718121241929
-87.6451600004367 41.86718000047094
(41.86707075100003, -87.646324812

In [122]:
print(bus_routes)

    ROUTE ROUTE0                           NAME  WKDAY  SAT  SUN  \
0      12    012                      ROOSEVELT      1    1    1   
1     121    121    UNION/STREETERVILLE EXPRESS      1    0    0   
2       1    001      BRONZEVILLE/UNION STATION      1    0    0   
3     108    108                   HALSTED/95TH      1    0    0   
4      11    011                        LINCOLN      1    1    1   
..    ...    ...                            ...    ...  ...  ...   
123   120    120  OGILVIE/STREETERVILLE EXPRESS      1    0    0   
124   126    126                        JACKSON      1    1    1   
125    20    020                        MADISON      1    1    1   
126   128    128          SOLDIER FIELD EXPRESS      0    0    0   
127   124    124                      NAVY PIER      1    1    1   

        SHAPE_LEN                                           geometry  
0    62586.111408  MULTILINESTRING ((-87.64791 41.86713, -87.6474...  
1    24090.823988  MULTILINESTRING ((-87.

In [123]:
print(bus_df)

       SYSTEMSTOP        STREET     CROSS_ST DIR POS ROUTESSTPG OWLROUTES  \
0          5282.0       JEFFERY  97TH STREET  NB  MT         15      None   
1          1143.0      STOCKTON      WEBSTER  NB  NT    151,156      None   
2         15703.0        HARLEM    SCHNEIDER  NB  NS         90      None   
3          6501.0     WENTWORTH   3200 SOUTH  SB  MB      24,31      None   
4          1593.0      MICHIGAN  14TH STREET  SB  FS      1,3,4        N4   
...           ...           ...          ...  ..  ..        ...       ...   
10755     18379.0        HARLEM      AUGUSTA  NB  FS         90      None   
10756     18084.0   69TH STREET        DAMEN  EB  FS         67      None   
10757     10274.0        CICERO  21ST STREET  NB  NS         54      None   
10758      7111.0  STONY ISLAND  84TH STREET  NB  NS         28      None   
10759     11284.0       KIMBALL      AINSLIE  NB  NS         82      None   

           CITY                  PUBLIC_NAM                    geometry  
0