# Calculation of inter-station paths for the visualization

### Imports

In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd

### Load the geojson data for Amtrak Northeast Corridor Stations with geopandas
* The `geometry` feature gives the (longitude, latitude) coordinate for each station of interest along the route of the Northeast Corridor
* The `STNCODE` feature gives the Amtrak station abbreviation

In [2]:
geo_stations = gpd.read_file('./data/geo/Amtrak_Project_Stations_Expanded.geojson')
geo_stations = geo_stations.set_index('STNCODE')
geo_stations

Unnamed: 0_level_0,OBJECTID,STNNAME,CITY2,STATE,STFIPS,urban,geometry
STNCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BOS,5,"Boston (South Station), Massachusetts",Boston,MA,25,YES,POINT (-71.05530 42.35231)
BBY,17,"Boston (Back Bay), Massachusetts",Boston,MA,25,YES,POINT (-71.07583 42.34732)
RTE,24,"Westwood, Route 128 Station, Massachusetts",Route 128,MA,25,YES,POINT (-71.14789 42.21024)
PVD,10,"Providence, Rhode Island",Providence,RI,44,YES,POINT (-71.41348 41.82949)
KIN,60,"West Kingston, Rhode Island",Kingston,RI,44,,POINT (-71.56060 41.48396)
WLY,166,"Westerly, Rhode Island",Westerly,RI,44,YES,POINT (-71.82978 41.38108)
MYS,223,"Mystic, Connecticut",Mystic,CT,9,YES,POINT (-71.96309 41.35093)
NLC,63,"New London, Connecticut",New London,CT,9,YES,POINT (-72.09322 41.35427)
OSB,123,"Old Saybrook, Connecticut",Old Saybrook,CT,9,YES,POINT (-72.37682 41.30039)
NHV,16,"New Haven, Connecticut",New Haven,CT,9,YES,POINT (-72.92667 41.29771)


### Define stations and mile markers for each station going in the Northbound and Southbound directions
* The Northbound and Southbound mile markers were sourced from Amtrak Northeast Regional time tables

In [3]:
amtrak_stations = list(geo_stations.index)

mile_markers = {
    'NB_MILE': {station: None for station in amtrak_stations} , 
    'SB_MILE': {station: None for station in amtrak_stations} 
} 

# Mile markers for each station along route starting in Boston and heading to DC
SB = [0, 1, 11, 43, 70, 87, 96, 105, 123, 156, 173, 195, 212, 231, 241, 244, 255, 289, 322, 347, 386, 416, 427, 448, 457]

# Mile markers for each station along route starting in DC and heading to Boston
NB = [457, 456, 446, 414, 387, 370, 361, 352, 344, 301, 284, 262, 245, 226, 216, 213, 202, 168, 135, 110, 71, 41, 30, 9, 0]

# Add to dictionary and then create data frame
for station, NB_mile, SB_mile in zip(amtrak_stations, NB, SB):
    mile_markers['NB_MILE'][station] = NB_mile
    mile_markers['SB_MILE'][station] = SB_mile

mile_cols = pd.DataFrame.from_dict(mile_markers, orient='columns')

mile_cols

Unnamed: 0,NB_MILE,SB_MILE
BOS,457,0
BBY,456,1
RTE,446,11
PVD,414,43
KIN,387,70
WLY,370,87
MYS,361,96
NLC,352,105
OSB,344,123
NHV,301,156


In [5]:
mileage = mile_markers['SB_MILE']

In [6]:
for i, station in enumerate(amtrak_stations):
    if i < len(amtrak_stations)-2:
        next_station = amtrak_stations[i+1]
        print('{} -> {} : {}'.format(station, next_station, mileage[next_station] - mileage[station]))

BOS -> BBY : 1
BBY -> RTE : 10
RTE -> PVD : 32
PVD -> KIN : 27
KIN -> WLY : 17
WLY -> MYS : 9
MYS -> NLC : 9
NLC -> OSB : 18
OSB -> NHV : 33
NHV -> BRP : 17
BRP -> STM : 22
STM -> NRO : 17
NRO -> NYP : 19
NYP -> NWK : 10
NWK -> EWR : 3
EWR -> MET : 11
MET -> TRE : 34
TRE -> PHL : 33
PHL -> WIL : 25
WIL -> ABE : 39
ABE -> BAL : 30
BAL -> BWI : 11
BWI -> NCR : 21


### Create data frame of longitude/latitude values, indexed by Amtrak station code

In [4]:
lonlat = np.array([geo_stations.geometry.x, geo_stations.geometry.y]).T
lonlat = pd.DataFrame(lonlat, index = geo_stations.index, columns = ['LON', 'LAT'])

### Create weather city names columns indexed by Amtrak station codes
* The three stops in Massachusetts (South Station, Back Bay, Route 128) all resolve to Boston weather stations, which is why Boston appears three times in the list
* Additionally, the two Newark stops are only three miles apart, so they share the same weather data.
* Otherwise, all weather stations are generally around the same locations as the Amtrak stations

In [5]:
weather_location_names =  ['Boston, MA', 'Boston, MA', 'Boston, MA', 
                           'Providence, RI', 'Kingston, RI', 'Westerly, RI',
                           'Mystic, CT', 'New London, CT', 'Old Saybrook, CT',
                           'New Haven, CT', 'Bridgeport, CT', 'Stamford, CT',
                           'New Rochelle, NY', 'Manhattan, NY', 'Newark, NJ', 
                           'Newark, NJ', 'Iselin, NJ', 'Trenton, NJ', 
                           'Philadelphia, PA', 'Wilmington, DE', 'Aberdeen, MD', 
                           'Baltimore, MD','Baltimore BWI Airport, MD',
                           'New Carrollton, MD', 'Washington, DC']
weather_stations = pd.Series(weather_location_names, name = 'WEATHER_LOC', index = geo_stations.index)

### Create new data frame with all relevant station linking information, indexed by Amtrak station codes

In [6]:
# Create empty data frame with Southbound-oriented ordered index
gdf = pd.DataFrame(geo_stations.index, index = geo_stations.index)
# Extract desired columns from geo_stations data frame
gdf[['STNNAME', 'STATE', 'AMTRAK_LOC']] = geo_stations[['STNNAME','STATE', 'CITY2']]
# Combine with longitude and latitude columns and mile markers columns
gdf = pd.concat([gdf, weather_stations, lonlat, mile_cols], axis = 1)
gdf

Unnamed: 0,STNCODE,STNNAME,STATE,AMTRAK_LOC,WEATHER_LOC,LON,LAT,NB_MILE,SB_MILE
BOS,BOS,"Boston (South Station), Massachusetts",MA,Boston,"Boston, MA",-71.055304,42.352311,457,0
BBY,BBY,"Boston (Back Bay), Massachusetts",MA,Boston,"Boston, MA",-71.075828,42.347317,456,1
RTE,RTE,"Westwood, Route 128 Station, Massachusetts",MA,Route 128,"Boston, MA",-71.147894,42.210242,446,11
PVD,PVD,"Providence, Rhode Island",RI,Providence,"Providence, RI",-71.413478,41.82949,414,43
KIN,KIN,"West Kingston, Rhode Island",RI,Kingston,"Kingston, RI",-71.560597,41.483959,387,70
WLY,WLY,"Westerly, Rhode Island",RI,Westerly,"Westerly, RI",-71.82978,41.381081,370,87
MYS,MYS,"Mystic, Connecticut",CT,Mystic,"Mystic, CT",-71.963093,41.350934,361,96
NLC,NLC,"New London, Connecticut",CT,New London,"New London, CT",-72.093225,41.354267,352,105
OSB,OSB,"Old Saybrook, Connecticut",CT,Old Saybrook,"Old Saybrook, CT",-72.376818,41.30039,344,123
NHV,NHV,"New Haven, Connecticut",CT,New Haven,"New Haven, CT",-72.92667,41.297714,301,156


### Update a couple of values from the GeoJSON file 
* BWI official station name is too long for the GeoJSON so we have to fix the name
* The coordinate of Newark (Airport station - EWR) is slightly off the route coordinates and thus needs to be corrected so that the station-pair paths can be calculated
    * They are slightly truncated but it's fine

In [7]:
gdf['STNNAME'].loc['BWI'] = "BWI Rail Station at Thurgood Marshall Airport, Maryland"
gdf['LON'].loc['EWR'] = -74.19067
gdf['LAT'].loc['EWR'] = 40.70443
gdf

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,STNCODE,STNNAME,STATE,AMTRAK_LOC,WEATHER_LOC,LON,LAT,NB_MILE,SB_MILE
BOS,BOS,"Boston (South Station), Massachusetts",MA,Boston,"Boston, MA",-71.055304,42.352311,457,0
BBY,BBY,"Boston (Back Bay), Massachusetts",MA,Boston,"Boston, MA",-71.075828,42.347317,456,1
RTE,RTE,"Westwood, Route 128 Station, Massachusetts",MA,Route 128,"Boston, MA",-71.147894,42.210242,446,11
PVD,PVD,"Providence, Rhode Island",RI,Providence,"Providence, RI",-71.413478,41.82949,414,43
KIN,KIN,"West Kingston, Rhode Island",RI,Kingston,"Kingston, RI",-71.560597,41.483959,387,70
WLY,WLY,"Westerly, Rhode Island",RI,Westerly,"Westerly, RI",-71.82978,41.381081,370,87
MYS,MYS,"Mystic, Connecticut",CT,Mystic,"Mystic, CT",-71.963093,41.350934,361,96
NLC,NLC,"New London, Connecticut",CT,New London,"New London, CT",-72.093225,41.354267,352,105
OSB,OSB,"Old Saybrook, Connecticut",CT,Old Saybrook,"Old Saybrook, CT",-72.376818,41.30039,344,123
NHV,NHV,"New Haven, Connecticut",CT,New Haven,"New Haven, CT",-72.92667,41.297714,301,156


In [8]:
print(gdf['STNNAME'].loc['BWI'])

BWI Rail Station at Thurgood Marshall Airport, Maryland


### Create dictionary to store information about each station
* Keys:
    * `x`: longitude of Amtrak station
    * `y`: latitude of Amtrak station
    * `prev`: the immediate previous station on the Southbound route
    * `next`: the immediate next station on the Southbound route
    * `path2prev`: the indices in the full list of longitude and latitude values between the previous and specified station
    * `path2next`: the indices in the full list of longitude and latitude values between the specified station and the next station
    * `index_where`: the indices in the full list of longitude and latitude values where the route is within a small distance of the station

In [9]:
# Extract latitude and longitude columns and stations index and create dictionary for storage
lat = gdf['LAT']
lon = gdf['LON']
stations = gdf.index
geoloc_dict = {station: {'x': None, 'y': None, 
                         'prev': None, 'next': None,
                         'path2prev': None,'path2next': None, 
                         'index_where': None} 
               for station in stations}

# For each station, set the x and y coordinates to the longitude and latitude values
# Also set the previous and next stations along the route (using Southbound-oriented indexing)
prev_stat = None
for i, station in enumerate(stations):
    geoloc_dict[station]['x'] = lon[station] 
    geoloc_dict[station]['y'] = lat[station] 
    geoloc_dict[station]['prev'] = prev_stat
    if station != 'WAS':
        geoloc_dict[station]['next'] = stations[i+1]
    prev_stat = station
    print(station, 'lon:', geoloc_dict[station]['x'], 'lat:', geoloc_dict[station]['y'], 
          'prev:', geoloc_dict[station]['prev'], 'next:', geoloc_dict[station]['next'])

BOS lon: -71.05530399962942 lat: 42.35231100012763 prev: None next: BBY
BBY lon: -71.07582800026684 lat: 42.34731700021211 prev: BOS next: RTE
RTE lon: -71.14789400005567 lat: 42.21024199995967 prev: BBY next: PVD
PVD lon: -71.4134779996708 lat: 41.829490000162025 prev: RTE next: KIN
KIN lon: -71.5605969999761 lat: 41.483959000033835 prev: PVD next: WLY
WLY lon: -71.82978000006408 lat: 41.38108100003547 prev: KIN next: MYS
MYS lon: -71.96309300027858 lat: 41.350933999727665 prev: WLY next: NLC
NLC lon: -72.09322499959666 lat: 41.35426700011465 prev: MYS next: OSB
OSB lon: -72.37681800002302 lat: 41.30039000021741 prev: NLC next: NHV
NHV lon: -72.92666999955892 lat: 41.29771399982176 prev: OSB next: BRP
BRP lon: -73.18753900006422 lat: 41.178005999792305 prev: NHV next: STM
STM lon: -73.54215999966355 lat: 41.047130000267344 prev: BRP next: NRO
NRO lon: -73.78432899996976 lat: 40.911450999694146 prev: STM next: NYP
NYP lon: -73.99445899996624 lat: 40.75032699989306 prev: NRO next: NWK
N

### Creating a list of coordinates for latitude and longitude values
* Will convert to CSV format rather than GeoJSON

In [10]:
geo_route = gpd.read_file('./data/geo/Amtrak_Project_Routes.geojson')
geo_route

Unnamed: 0,OBJECTID,NAME,Shape_Leng,Shape_Le_1,Shape_Length,geometry
0,29,Regional,1041187.0,1041187.0,1358052.0,"MULTILINESTRING ((-76.45187 37.02302, -76.4527..."


### Read in the BOS-BBY data
* Data was not included in the geo_route file for some reason
* I manually collected the coordinates between BBY and BOS from Google Maps

In [11]:
bos_bby = pd.read_csv('./data/facts/BBY-BOS.csv')
bos_bby.head()

Unnamed: 0,Longitude,Latitude
0,-71.055112,42.351438
1,-71.055155,42.351094
2,-71.055263,42.350636
3,-71.055519,42.350146
4,-71.056269,42.348733


In [12]:
bos_bby.tail()

Unnamed: 0,Longitude,Latitude
37,-71.072185,42.347442
38,-71.072975,42.347488
39,-71.07337,42.347411
40,-71.074535,42.347319
41,-71.075117,42.347304


### Add all latitude/longitude coordinates to individual lists

In [13]:
lats = []
lons = []
for mlstring in geo_route.geometry:
    linestrings = mlstring.geoms
    for linestring in linestrings: 
        x, y = linestring.xy
        lats = np.append(lats, y)
        lons = np.append(lons, x)
print(len(lats), len(lons))

13258 13258


### Reverse them to Southbound orientation

In [14]:
lons = lons[::-1]
lats = lats[::-1]

### Add BOS-BBY to lats/lons

In [15]:
lats_bos_bby = np.array(bos_bby['Latitude'])
lons_bos_bby = np.array(bos_bby['Longitude'])

In [16]:
lats_full = np.concatenate((lats_bos_bby, lats))
lons_full = np.concatenate((lons_bos_bby, lons))

### Extract only Boston to Washington
* Full Regional route extends to Virginia, but not all trains actually continue past Washington, DC
* For simplicity and consistency, I omitted Virginia stations from my data collection
* This part stops adding coordinates to the lists after it reaches the Washington, DC station coordinate

In [17]:
lats_bos_to_was = []
lons_bos_to_was = []
for x, y in zip(lons_full, lats_full):
    if x < -77.006422 and y < 38.896993:
        break
    else:
        lats_bos_to_was = np.append(lats_bos_to_was, y)
        lons_bos_to_was = np.append(lons_bos_to_was, x)
print(len(lons_bos_to_was))
print(len(lats_bos_to_was))

9592
9592


### Convert to numpy array and then data frame for later use

In [18]:
lonlat_bos_to_was = np.array([lons_bos_to_was, lats_bos_to_was]).T
print(lonlat_bos_to_was.shape)
lonlat_df = pd.DataFrame(lonlat_bos_to_was, columns = ["Longitude","Latitude"])
lonlat_df.tail()

(9592, 2)


Unnamed: 0,Longitude,Latitude
9587,-77.005867,38.886143
9588,-77.006019,38.885972
9589,-77.00611,38.8859
9590,-77.006242,38.885795
9591,-77.006305,38.885746


In [19]:
lonlat_df.head()

Unnamed: 0,Longitude,Latitude
0,-71.055112,42.351438
1,-71.055155,42.351094
2,-71.055263,42.350636
3,-71.055519,42.350146
4,-71.056269,42.348733


### Calculate the indices where the longitude and latitude are within a small tolerance distance of the station
* Coordinates from the original file do not include Boston-South Station, they only have data to/from Boston-Back Bay
* I manually mapped out the route and added some coordinates
* The tolerance is relatively large but further precision would exclude some stations because they have 0 points within the smaller range. 

In [20]:
for station in amtrak_stations:
    station_x, station_y = geoloc_dict[station]['x'], geoloc_dict[station]['y']
    prev_stat, next_stat = geoloc_dict[station]['prev'], geoloc_dict[station]['next']
    where_near_x = np.argwhere(np.isclose(lons_full, station_x, atol = 0.0007))
    where_near_y = np.argwhere(np.isclose(lats_full, station_y, atol = 0.0007))
    intersect = np.intersect1d(where_near_x, where_near_y)
    geoloc_dict[station]['index_where'] = intersect
    print(station, intersect)

BOS [0]
BBY [40 41 42 43 44 45 46 47 48 49]
RTE [195 196 197 198 199 200 201 202 203]
PVD [695 696 697 698 699 700]
KIN [1067 1068 1069]
WLY [1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325]
MYS [1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562]
NLC [1853 1854 1855 1856 1857 1858 1859 1860 1861]
OSB [2413 2414 2415]
NHV [3227]
BRP [3614 3615 3616 3617 3618 3619 3620]
STM [4100 4101]
NRO [4495]
NYP [5109 5110 5111 5112 5113]
NWK [5332 5333 5356]
EWR [5410 5411 5412]
MET [5586 5587 5588 5589 5590 5591 5592]
TRE [5989 5990]
PHL [6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722]
WIL [7327 7328 7329 7330 7331 7332 7333 7334]
ABE [8005]
BAL [8621 8622 8623]
BWI [8942 8943 8944 8945]
NCR [9284 9285 9286]
WAS [9552]


In [21]:
lonlat_df = lonlat_df[0:9552]
lonlat_df.shape[0]

9552

### Calculate inter-station paths to find "path groups" for the visualization separators

In [22]:
# For each station, add the group indicator to a list for a new column in the dataframe
between_station_paths = []
group_num = 0
groups_path = []
for curr_stat in amtrak_stations:
    prev_stat, next_stat = geoloc_dict[curr_stat]['prev'], geoloc_dict[curr_stat]['next']
    if next_stat is not None:
        curr_stat_index_where = geoloc_dict[curr_stat]['index_where'][0]
        next_stat_index_where = geoloc_dict[next_stat]['index_where'][0]
        path2next = []
        for i in range(curr_stat_index_where, next_stat_index_where):
            first_stat, second_stat = sorted((curr_stat, next_stat))
            between_station_paths.append('{}-{}'.format(first_stat, second_stat))
            path2next.append(group_num)
            groups_path.append(group_num)
        geoloc_dict[curr_stat]['path2next'] = path2next
        group_num += 1
    if prev_stat is not None:
        geoloc_dict[curr_stat]['path2prev'] = geoloc_dict[prev_stat]['path2next']
print(len(groups_path))

9552


In [23]:
print(len(groups_path))
print(lonlat_df.shape[0])

9552
9552


### Create series to add to dataframe

In [24]:
group_num = pd.Series(groups_path, name = 'Group')
group_text = pd.Series(between_station_paths, name = 'Station Pairing')
print(group_text.shape[0])
print(group_num.shape[0])

9552
9552


### Create southbound and northbound path groups
* The groups are to indicate what the path extending the station should be called
* Different depending on the direction:
    * Northbound trains originate in Washington, DC and terminate in Boston/South Station, which means there will be departure delay data for all stations (assuming the train stops there) between Washington and Boston/Back Bay)
    * Southbound trains originate in Boston/South Station and terminate in Washington, which means there will be departure delay data for all stations (assuming the train stops there) between Boston/South Station and New Carrollton, Maryland) 
    * The segment beginning at the previous station and ending at the current station will correspond to the delay amount gained or lost along the segment on the way to the current station, and this amount is represented by the difference between the departure time offset reported upon departing the previous station and the arrival time offset reported (if available) or the departure time offset (otherwise) from the current station. 
* Stations are always listed in Southbound-oriented order

In [25]:
stations_orient_southbound = amtrak_stations[1:]
print(stations_orient_southbound)

['BBY', 'RTE', 'PVD', 'KIN', 'WLY', 'MYS', 'NLC', 'OSB', 'NHV', 'BRP', 'STM', 'NRO', 'NYP', 'NWK', 'EWR', 'MET', 'TRE', 'PHL', 'WIL', 'ABE', 'BAL', 'BWI', 'NCR', 'WAS']


In [26]:
stations_orient_northbound = amtrak_stations[0:-1]
print(stations_orient_northbound)

['BOS', 'BBY', 'RTE', 'PVD', 'KIN', 'WLY', 'MYS', 'NLC', 'OSB', 'NHV', 'BRP', 'STM', 'NRO', 'NYP', 'NWK', 'EWR', 'MET', 'TRE', 'PHL', 'WIL', 'ABE', 'BAL', 'BWI', 'NCR']


### Explanation of how the path groups work
* If a train departing from Washington, then the color of the path route segment between Washington and whatever the next stop is represents the additional trip time gained or lost over that segment. 
* Ideally, if enough data were available indicating arrivals at intermediate stations, we would use the time difference of the actual arrival from the scheduled arrival.
* For most intermediate, non-major stations, there is not arrival data directly available; however, since these stations tend to be smaller stations with fewer passengers getting on/off, the train tends to only spend a minute or two in the station and this time is built into the scheduled timetable.
* Therefore, unless there is an issue along the route ahead of the train, the time difference calculated from the time that the train departs from that next station (after Washington) should be approximately on par with the time difference upon arriving at the station after Washington. 
* This is the best I can do with the data available. I initially was using regex to extract the time difference from the `Comments` column in the source data as there can sometimes be arrival time information there, but there is not *always* data there so it made more sense for consistency to calculate this difference myself.

In [27]:
sb_path_group = pd.Series(index=group_num.index, name = 'SB_Group', dtype='object')
nb_path_group = pd.Series(index=group_num.index,  name = 'NB_Group', dtype='object')
for num in group_num.unique():
    nb_station = stations_orient_northbound[num]
    sb_station = stations_orient_southbound[num]
    print(num, '---', 'NB Station:', nb_station, 'SB Station:', sb_station)
    group_index = group_num.loc[group_num == num].index
    nb_path_group.iloc[group_index] = nb_station
    sb_path_group.iloc[group_index] = sb_station

0 --- NB Station: BOS SB Station: BBY
1 --- NB Station: BBY SB Station: RTE
2 --- NB Station: RTE SB Station: PVD
3 --- NB Station: PVD SB Station: KIN
4 --- NB Station: KIN SB Station: WLY
5 --- NB Station: WLY SB Station: MYS
6 --- NB Station: MYS SB Station: NLC
7 --- NB Station: NLC SB Station: OSB
8 --- NB Station: OSB SB Station: NHV
9 --- NB Station: NHV SB Station: BRP
10 --- NB Station: BRP SB Station: STM
11 --- NB Station: STM SB Station: NRO
12 --- NB Station: NRO SB Station: NYP
13 --- NB Station: NYP SB Station: NWK
14 --- NB Station: NWK SB Station: EWR
15 --- NB Station: EWR SB Station: MET
16 --- NB Station: MET SB Station: TRE
17 --- NB Station: TRE SB Station: PHL
18 --- NB Station: PHL SB Station: WIL
19 --- NB Station: WIL SB Station: ABE
20 --- NB Station: ABE SB Station: BAL
21 --- NB Station: BAL SB Station: BWI
22 --- NB Station: BWI SB Station: NCR
23 --- NB Station: NCR SB Station: WAS


In [28]:
lonlat_with_path_groups = pd.concat([lonlat_df, group_num, group_text, nb_path_group, sb_path_group], axis = 1)
#lonlat_without_path_groups = pd.concat([lonlat_df, group_num, group_text], axis = 1)

In [29]:
print(lonlat_with_path_groups[35:46])
#print(lonlat_without_path_groups[35:46])

    Longitude   Latitude  Group Station Pairing NB_Group SB_Group
35 -71.070625  42.347457      0         BBY-BOS      BOS      BBY
36 -71.071269  42.347457      0         BBY-BOS      BOS      BBY
37 -71.072185  42.347442      0         BBY-BOS      BOS      BBY
38 -71.072975  42.347488      0         BBY-BOS      BOS      BBY
39 -71.073370  42.347411      0         BBY-BOS      BOS      BBY
40 -71.074535  42.347319      1         BBY-RTE      BBY      RTE
41 -71.075117  42.347304      1         BBY-RTE      BBY      RTE
42 -71.075149  42.347551      1         BBY-RTE      BBY      RTE
43 -71.075410  42.347485      1         BBY-RTE      BBY      RTE
44 -71.075579  42.347441      1         BBY-RTE      BBY      RTE
45 -71.075729  42.347391      1         BBY-RTE      BBY      RTE


### Export everything to CSV

In [30]:
lonlat_with_path_groups.to_csv("./data/facts/NE_regional_lonlat.csv", index = False)

In [31]:
gdf

Unnamed: 0,STNCODE,STNNAME,STATE,AMTRAK_LOC,WEATHER_LOC,LON,LAT,NB_MILE,SB_MILE
BOS,BOS,"Boston (South Station), Massachusetts",MA,Boston,"Boston, MA",-71.055304,42.352311,457,0
BBY,BBY,"Boston (Back Bay), Massachusetts",MA,Boston,"Boston, MA",-71.075828,42.347317,456,1
RTE,RTE,"Westwood, Route 128 Station, Massachusetts",MA,Route 128,"Boston, MA",-71.147894,42.210242,446,11
PVD,PVD,"Providence, Rhode Island",RI,Providence,"Providence, RI",-71.413478,41.82949,414,43
KIN,KIN,"West Kingston, Rhode Island",RI,Kingston,"Kingston, RI",-71.560597,41.483959,387,70
WLY,WLY,"Westerly, Rhode Island",RI,Westerly,"Westerly, RI",-71.82978,41.381081,370,87
MYS,MYS,"Mystic, Connecticut",CT,Mystic,"Mystic, CT",-71.963093,41.350934,361,96
NLC,NLC,"New London, Connecticut",CT,New London,"New London, CT",-72.093225,41.354267,352,105
OSB,OSB,"Old Saybrook, Connecticut",CT,Old Saybrook,"Old Saybrook, CT",-72.376818,41.30039,344,123
NHV,NHV,"New Haven, Connecticut",CT,New Haven,"New Haven, CT",-72.92667,41.297714,301,156


In [32]:
gdf.to_csv('./data/facts/geo_stations_info.csv', index = False)