# Geojson Data Subsetting

In [1]:
import json
from collections import defaultdict
import geopandas as gpd

## Part 1: Routes
Extract the desired subset of data from the large routes file.

#### 1. Read in the geojson files

In [2]:
path_to_routes = './data/geojson/Amtrak_Routes.geojson'

with open(path_to_routes) as f1:
    gj_routes = json.load(f1)

#### 2. Extract the indices of desired routes from the complete list

In [3]:
routes = ['Acela', 'Regional', 'Empire Service']
for i in range(len(gj_routes['features'])):
    if gj_routes['features'][i]['properties']['NAME'] in routes:
        print(i, gj_routes['features'][i]['properties']['NAME'])

0 Acela
14 Empire Service
28 Regional


In [4]:
print(gj_routes['features'][0]['properties'])

{'OBJECTID': 1, 'NAME': 'Acela', 'Shape_Leng': 741010.475197, 'Shape_Le_1': 741010.475197, 'Shape_Length': 977920.7454226009}


#### 3. Create a new dictionary of the subset in geojson format and save to a new geojson file

In [5]:
#acela_route = gj_routes['features'][0]
#empire_service_route = gj_routes['features'][14]

ne_regional_route= gj_routes['features'][28]
                                                                          
project_routes = {
    'type': 'FeatureCollection',
    'name': 'Amtrak_Project_Routes',
    'crs': {
        'type': 'name', 
        'properties': {
            'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}
    },
    'features': [ne_regional_route]
}

In [6]:
path_to_save_routes = './data/geojson/Amtrak_Project_Routes.geojson'
with open(path_to_save_routes, 'w') as savefile:
    json.dump(project_routes, savefile)
    

## Part 2: Stations
Extract the desired subset of data from the large stations file.

#### 1. Read in the geojson files

In [7]:
path_to_stations = './data/geojson/Amtrak_Stations.geojson'
with open(path_to_stations) as f2:
    gj_stations = json.load(f2)

#### 2. Stations list obtained from unique stations present in the data collected

In [8]:
stations = set(['WAS', 'NCR', 'BWI', 'BAL', 'WIL', 'PHL', 'TRE', 'NWK', 'NYP', 'STM', 'NHV','NLC','KIN', 'PVD','RTE','BBY','BOS'])


#### 3. Extract the indices of desired routes from the complete list

In [9]:
station_index = []
for i in range(len(gj_stations['features'])):
    stn_code = gj_stations['features'][i]['properties']['STNCODE']
    if stn_code in stations:
        station_index.append(i)
        print(i, stn_code)

0 NYP
1 WAS
2 PHL
4 BOS
7 BAL
9 PVD
11 WIL
12 BWI
13 NWK
15 NHV
16 BBY
23 RTE
24 TRE
26 STM
55 NCR
59 KIN
62 NLC


#### 4. Create dictionary from specific indices of the desired stations

In [10]:
features_list = []
for i in station_index:
    features_list.append(gj_stations['features'][i])
    
print([route['properties']['STNCODE'] for route in features_list])

['NYP', 'WAS', 'PHL', 'BOS', 'BAL', 'PVD', 'WIL', 'BWI', 'NWK', 'NHV', 'BBY', 'RTE', 'TRE', 'STM', 'NCR', 'KIN', 'NLC']


#### 5. Create a new dictionary of the subset in geojson format and save to a new geojson file

In [11]:
project_stations = {
    'type': 'FeatureCollection',
    'name': 'Amtrak_Project_Stations',
    'crs': {
        'type': 'name', 
        'properties': {
            'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}
    },
    'features': features_list
}

In [12]:
path_to_save_stations = './data/geojson/Amtrak_Project_Stations.geojson'
with open(path_to_save_stations, 'w') as save_f2:
    json.dump(project_stations, save_f2)
    

#### 6. Load the file again (checking that it works with geopandas!) and update station ordering

In [13]:
geo_stations = gpd.read_file('./data/geojson/Amtrak_Project_Stations.geojson')

amtrak_stations = ['BOS', 'BBY', 'RTE', 'PVD', 'KIN', 'NLC', 'NHV', 'STM',
                   'NYP', 'NWK', 'TRE', 'PHL', 'WIL', 'BAL', 'BWI', 'NCR', 'WAS']

#### The stations are not ordered in a meaningful way, so I reorder them in Southbound order

In [14]:
geo_stations.head()

Unnamed: 0,OBJECTID,STNCODE,STNNAME,CITY2,STATE,STFIPS,urban,geometry
0,1,NYP,"New York (Penn Station), New York",New York,NY,36,YES,POINT (-73.99446 40.75033)
1,2,WAS,"Washington, District of Columbia",Washington,DC,11,YES,POINT (-77.00642 38.89699)
2,3,PHL,"Philadelphia (30th St), Pennsylvania",Philadelphia,PA,42,YES,POINT (-75.18104 39.95562)
3,5,BOS,"Boston (South Station), Massachusetts",Boston,MA,25,YES,POINT (-71.05530 42.35231)
4,8,BAL,"Baltimore (Penn Station), Maryland",Baltimore,MD,24,YES,POINT (-76.61569 39.30730)


In [15]:
geo_stations = geo_stations.set_index(geo_stations['STNCODE']).drop('STNCODE', axis=1)

In [16]:
geo_stations = geo_stations.loc[amtrak_stations]
geo_stations

Unnamed: 0_level_0,OBJECTID,STNNAME,CITY2,STATE,STFIPS,urban,geometry
STNCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BOS,5,"Boston (South Station), Massachusetts",Boston,MA,25,YES,POINT (-71.05530 42.35231)
BBY,17,"Boston (Back Bay), Massachusetts",Boston,MA,25,YES,POINT (-71.07583 42.34732)
RTE,24,"Westwood, Route 128 Station, Massachusetts",Route 128,MA,25,YES,POINT (-71.14789 42.21024)
PVD,10,"Providence, Rhode Island",Providence,RI,44,YES,POINT (-71.41348 41.82949)
KIN,60,"West Kingston, Rhode Island",Kingston,RI,44,,POINT (-71.56060 41.48396)
NLC,63,"New London, Connecticut",New London,CT,9,YES,POINT (-72.09322 41.35427)
NHV,16,"New Haven, Connecticut",New Haven,CT,9,YES,POINT (-72.92667 41.29771)
STM,27,"Stamford, Connecticut",Stamford,CT,9,YES,POINT (-73.54216 41.04713)
NYP,1,"New York (Penn Station), New York",New York,NY,36,YES,POINT (-73.99446 40.75033)
NWK,14,"Newark (Penn Station), New Jersey",Newark,NJ,34,YES,POINT (-74.16475 40.73471)


In [17]:
path_to_save_stations = './data/geojson/Amtrak_Project_Stations.geojson'
geo_stations.to_file(path_to_save_stations, driver='GeoJSON')