# Geojson Data Subsetting

In [1]:
import json
from collections import defaultdict

## Part 1: Routes
Extract the desired subset of data from the large routes file.

#### 1. Read in the geojson files

In [2]:
path_to_routes = './data/geojson/Amtrak_Routes.geojson'

with open(path_to_routes) as f1:
    gj_routes = json.load(f1)

#### 2. Extract the indices of desired routes from the complete list

In [3]:
routes = ['Acela', 'Regional', 'Empire Service']
for i in range(len(gj_routes['features'])):
    if gj_routes['features'][i]['properties']['NAME'] in routes:
        print(i, gj_routes['features'][i]['properties']['NAME'])

0 Acela
14 Empire Service
28 Regional


#### 3. Create a new dictionary of the subset in geojson format and save to a new geojson file

In [4]:
#acela_route = gj_routes['features'][0]
ne_regional_route= gj_routes['features'][28]
#empire_service_route = gj_routes['features'][14]

                                                                          
project_routes = {
    'type': 'FeatureCollection',
    'name': 'Amtrak_Project_Routes',
    'crs': {
        'type': 'name', 
        'properties': {
            'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}
    },
    'features': [ne_regional_route]
}

In [5]:
path_to_save_routes = './data/geojson/Amtrak_Project_Routes.geojson'
with open(path_to_save_routes, 'w') as savefile:
    json.dump(project_routes, savefile)
    

## Part 2: Stations
Extract the desired subset of data from the large stations file.

#### 1. Read in the geojson files

In [6]:
path_to_stations = './data/geojson/Amtrak_Stations.geojson'
with open(path_to_stations) as f2:
    gj_stations = json.load(f2)

#### 2. Stations list obtained from unique stations present in the data collected

In [7]:
stations = set(['WAS', 'NCR', 'BWI', 'BAL', 'WIL', 'PHL', 'TRE', 'NWK', 'NYP', 'STM', 'NHV','NLC','KIN', 'PVD','RTE','BBY','BOS'])


#### 3. Extract the indices of desired routes from the complete list

In [8]:
station_index = []
for i in range(len(gj_stations['features'])):
    stn_code = gj_stations['features'][i]['properties']['STNCODE']
    if stn_code in stations:
        station_index.append(i)
        print(i, stn_code)

0 NYP
1 WAS
2 PHL
4 BOS
7 BAL
9 PVD
11 WIL
12 BWI
13 NWK
15 NHV
16 BBY
23 RTE
24 TRE
26 STM
55 NCR
59 KIN
62 NLC


#### 4. Create dictionary from specific indices of the desired stations

In [9]:
features_list = []
for i in station_index:
    features_list.append(gj_stations['features'][i])
    
print([route['properties']['STNCODE'] for route in features_list])

['NYP', 'WAS', 'PHL', 'BOS', 'BAL', 'PVD', 'WIL', 'BWI', 'NWK', 'NHV', 'BBY', 'RTE', 'TRE', 'STM', 'NCR', 'KIN', 'NLC']


#### 5. Create a new dictionary of the subset in geojson format and save to a new geojson file

In [10]:
project_stations = {
    'type': 'FeatureCollection',
    'name': 'Amtrak_Project_Stations',
    'crs': {
        'type': 'name', 
        'properties': {
            'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}
    },
    'features': features_list
}

In [11]:
path_to_save_stations = './data/geojson/Amtrak_Project_Stations.geojson'
with open(path_to_save_stations, 'w') as save_f2:
    json.dump(project_stations, save_f2)
    