# Obtaining BART Data and Writing to JSON File


- [BART API testing](#section1)
- [Loading BART Data into JSON File](#section2)

<a id='section1'></a>

## BART API testing

In [2]:
import requests
import xmltodict

In [3]:
# This is an API call for BART's routesched, which gives the schedule of a certain route.
# In this case, the Fremont train:
response = requests.get('http://api.bart.gov/api/sched.aspx?cmd=routesched&route=6&key=MW9S-E7SL-26DU-VV8V')

In [4]:
text = response.content

In [5]:
bart = xmltodict.parse(text)

In [6]:
bart.keys()

odict_keys(['root'])

In [7]:
bart['root'].keys()

odict_keys(['uri', 'date', 'sched_num', 'route', 'message'])

In [26]:
bart['root']['route'].keys()

odict_keys(['train'])

In [77]:
# If we walk the dict to 'train', it's a list of all the trains on the route and a dict of their schedules.
bart['root']['route']['train'][:2]

[OrderedDict([('@index', '1'),
              ('stop',
               [OrderedDict([('@station', 'DALY'),
                             ('@origTime', '6:13 AM'),
                             ('@bikeflag', '1')]),
                OrderedDict([('@station', 'BALB'),
                             ('@origTime', '6:17 AM'),
                             ('@bikeflag', '1')]),
                OrderedDict([('@station', 'GLEN'),
                             ('@origTime', '6:19 AM'),
                             ('@bikeflag', '1')]),
                OrderedDict([('@station', '24TH'),
                             ('@origTime', '6:22 AM'),
                             ('@bikeflag', '1')]),
                OrderedDict([('@station', '16TH'),
                             ('@origTime', '6:24 AM'),
                             ('@bikeflag', '1')]),
                OrderedDict([('@station', 'CIVC'),
                             ('@origTime', '6:26 AM'),
                             ('@bikeflag', '1')]),
    

In [9]:
bart['root']['date']

'7/11/2016'

In [10]:
type(bart)

collections.OrderedDict

In [11]:
len(bart)

1

In [12]:
len(bart['root'])

5

In [13]:
len(bart['root']['route']['train'])

52

In [28]:
type(bart['root']['route']['train'])

list

In [60]:
bart['root']['route']['train'][0]['stop']

[OrderedDict([('@station', 'DALY'),
              ('@origTime', '6:13 AM'),
              ('@bikeflag', '1')]),
 OrderedDict([('@station', 'BALB'),
              ('@origTime', '6:17 AM'),
              ('@bikeflag', '1')]),
 OrderedDict([('@station', 'GLEN'),
              ('@origTime', '6:19 AM'),
              ('@bikeflag', '1')]),
 OrderedDict([('@station', '24TH'),
              ('@origTime', '6:22 AM'),
              ('@bikeflag', '1')]),
 OrderedDict([('@station', '16TH'),
              ('@origTime', '6:24 AM'),
              ('@bikeflag', '1')]),
 OrderedDict([('@station', 'CIVC'),
              ('@origTime', '6:26 AM'),
              ('@bikeflag', '1')]),
 OrderedDict([('@station', 'POWL'),
              ('@origTime', '6:27 AM'),
              ('@bikeflag', '1')]),
 OrderedDict([('@station', 'MONT'),
              ('@origTime', '6:29 AM'),
              ('@bikeflag', '1')]),
 OrderedDict([('@station', 'EMBR'),
              ('@origTime', '6:30 AM'),
              ('@bikeflag', 

Now to write this data to a JSON file so that we can load it into PostgreSQL.

In [78]:
import json

In [79]:
bart_as_string = json.dumps(bart['root'])

In [80]:
print(bart_as_string)

{"uri": "http://api.bart.gov/api/sched.aspx?cmd=routesched&route=6", "date": "7/11/2016", "sched_num": "39", "route": {"train": [{"@index": "1", "stop": [{"@station": "DALY", "@origTime": "6:13 AM", "@bikeflag": "1"}, {"@station": "BALB", "@origTime": "6:17 AM", "@bikeflag": "1"}, {"@station": "GLEN", "@origTime": "6:19 AM", "@bikeflag": "1"}, {"@station": "24TH", "@origTime": "6:22 AM", "@bikeflag": "1"}, {"@station": "16TH", "@origTime": "6:24 AM", "@bikeflag": "1"}, {"@station": "CIVC", "@origTime": "6:26 AM", "@bikeflag": "1"}, {"@station": "POWL", "@origTime": "6:27 AM", "@bikeflag": "1"}, {"@station": "MONT", "@origTime": "6:29 AM", "@bikeflag": "1"}, {"@station": "EMBR", "@origTime": "6:30 AM", "@bikeflag": "1"}, {"@station": "WOAK", "@origTime": "6:37 AM", "@bikeflag": "1"}, {"@station": "LAKE", "@origTime": "6:43 AM", "@bikeflag": "1"}, {"@station": "FTVL", "@origTime": "6:46 AM", "@bikeflag": "1"}, {"@station": "COLS", "@origTime": "6:50 AM", "@bikeflag": "1"}, {"@station": "

In [81]:
with open('bart_data.json', 'a') as f:
    f.write(bart_as_string)

Reminder, this was only for one route, Daly City-Fremont. We need all other routes, as well.

<a id='section2'></a>

# Loading BART data into JSON file

BART routes are numbered 1 through 12. With that in mind, let's collect the JSON of the schedule for each route and append it to one file.

In [1]:
import requests
import xmltodict
import json

In [2]:
# Code to obtain the url's for the API calls for each route, 1-8, 11-12, and 19-20.
for route_num in range(1,9):
    url = 'http://api.bart.gov/api/sched.aspx?cmd=routesched&route={0}&key=MW9S-E7SL-26DU-VV8V'.format(route_num)
    print(url)
for route_num in [11,12,19,20]:
    url = 'http://api.bart.gov/api/sched.aspx?cmd=routesched&route={0}&key=MW9S-E7SL-26DU-VV8V'.format(route_num)
    print(url)

http://api.bart.gov/api/sched.aspx?cmd=routesched&route=1&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=2&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=3&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=4&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=5&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=6&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=7&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=8&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=11&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=12&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=19&key=MW9S-E7SL-26DU-VV8V
http://api.bart.gov/api/sched.aspx?cmd=routesched&route=20&key=MW9S-E7SL-26DU-VV8V


In [15]:
routes = [i for i in range(1,9)]
routes.extend([11,12,19,20])
print(routes)

[1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 19, 20]


In [3]:
def get_BART_schedule(route_num):
    # URL to obtain the schedule data for route number [route_num]
    url = 'http://api.bart.gov/api/sched.aspx?cmd=routesched&route={0}&key=MW9S-E7SL-26DU-VV8V'.format(route_num)
    response = requests.get(url)  # making API call
    text = response.content  # content obtained
    bart = xmltodict.parse(text)  # Parsing xml content to be ordered dictionary
    return bart

In [16]:
# For each BART route
for route_num in routes:
    sched = get_BART_schedule(route_num)  # Get ordered dict of schedule for this route
    sched_as_string = json.dumps(sched['root'])  # Converting dict to string containing JSON object
    with open('bart_schedules_by_route.json', 'a') as f:
        f.write(sched_as_string + '\n')   # Appending to the file as a new row

Checking the file:

In [17]:
with open('bart_schedules_by_route.json', 'r') as f:
    txt = f.read()

In [18]:
len(txt)

1051060

In [19]:
txt.split('\n')[0]

'{"uri": "http://api.bart.gov/api/sched.aspx?cmd=routesched&route=1", "date": "7/13/2016", "sched_num": "39", "route": {"train": [{"@index": "1", "stop": [{"@station": "PITT", "@origTime": "4:02 AM", "@bikeflag": "1"}, {"@station": "NCON", "@origTime": "4:08 AM", "@bikeflag": "1"}, {"@station": "CONC", "@origTime": "4:12 AM", "@bikeflag": "1"}, {"@station": "PHIL", "@origTime": "4:17 AM", "@bikeflag": "1"}, {"@station": "WCRK", "@origTime": "4:20 AM", "@bikeflag": "1"}, {"@station": "LAFY", "@origTime": "4:25 AM", "@bikeflag": "1"}, {"@station": "ORIN", "@origTime": "4:30 AM", "@bikeflag": "1"}, {"@station": "ROCK", "@origTime": "4:35 AM", "@bikeflag": "1"}, {"@station": "MCAR", "@origTime": "4:38 AM", "@bikeflag": "1"}, {"@station": "19TH", "@origTime": "4:42 AM", "@bikeflag": "1"}, {"@station": "12TH", "@origTime": "4:43 AM", "@bikeflag": "1"}, {"@station": "WOAK", "@origTime": "4:48 AM", "@bikeflag": "1"}, {"@station": "EMBR", "@origTime": "4:55 AM", "@bikeflag": "1"}, {"@station": 

In [20]:
txt.split('\n')[-1]

''

__Nice. Now bart_schedules_by_route.json has one row for each route (and an empty row at the end).__