In [1]:
# relative import
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 

from src.buses.reportcard_helpers import *

In [2]:
# test using the 80Mb 119 database from June 2018
source = 'nj'
stop_report = 30189 # webster and congress
route_report = 119


### debugging parse_route_xml

In [3]:
# alex original

import os
import datetime
import xml.etree.ElementTree

# API like: https://github.com/harperreed/transitapi/wiki/Unofficial-Bustracker-API

_sources = {
  'nj': 'http://mybusnow.njtransit.com/bustime/map/'
}

_api = {
  'all_buses': 'getBusesForRouteAll.jsp',
  'routes': 'getRoutePoints.jsp',
  'pattern_points': 'getPatternPoints.jsp',
  'stop_predictions': 'getStopPredictions.jsp',
  'bus_predictions': 'getBusPredictions.jsp',
  'buses_for_route': 'getBusesForRoute.jsp',
  'schedules': 'schedules.jsp',
  'time_and_temp': 'getTimeAndTemp.jsp',
  'route_directions_xml':  'routeDirectionStopAsXML',
}

# parsers done for all_buses, routes, stop_predictions
# ignored: time_and_temp
# not available / not fully documented: schedules (not sure what the right kwargs are, agency=1 & route=87 ?)


def _gen_command(source, func, **kwargs):
    result = _sources[source] + _api[func]
    params = ''
    for k, v in kwargs.items():
        params = params + k + '=' + str(v) + '&'
    if params:
        result += '?' + params[:-1]
    return result

def _cond_get_single(tree, key, default=''):
    res = tree.find(key)
    if res is not None:
        return res.text 
    return default

class KeyValueData:
    def __init__(self, **kwargs):
        self.name = 'KeyValueData'
        for k, v in kwargs.items():
            setattr(self, k, v)

    def add_kv(self, key, value):
        setattr(self, key, value)

    def __repr__(self):
        line = []
        for prop, value in vars(self).iteritems():
            line.append((prop, value))
        line.sort(key=lambda x: x[0])
        out_string = ' '.join([k + '=' + str(v) for k, v in line])
        return self.name + '[%s]' % out_string

class Route(KeyValueData):

    class Path(KeyValueData):
        def __init__(self):
            KeyValueData.__init__(self)
            self.name = 'Path'
            self.points = []
            self.id = ''
            self.d = ''
            self.dd = ''

    class Point:
        def __init__(self):
            self.lat = ''
            self.lon = ''
            self.d = ''

    class Stop:
        def __init__(self):
            self.identity = ''
            self.st = ''
            self.lat = ''
            self.lon = ''
            self.d = ''

    def __init__(self):
        KeyValueData.__init__(self)
        self.name = 'route'
        self.identity = ''
        self.paths = []


def parse_route_xml(data):

    routes = list()

    route = Route()

    e = xml.etree.ElementTree.fromstring(data)
    for child in e.getchildren():

        if len(child.getchildren()) == 0:
            if child.tag == 'id':
                route.identity = child.text
            else:
                route.add_kv(child.tag, child.text)

        if child.tag == 'pas':

            for pa in child.findall('pa'):

                path = Route.Path()

                for path_child in pa.getchildren():
                    if len(path_child.getchildren()) == 0:
                        if path_child.tag == 'id':
                            path.id = path_child.text
                        elif path_child.tag == 'd':
                            path.d = path_child.text
                        elif path_child.tag == 'dd':
                            path.dd = path_child.text
                        else:
                            path.add_kv(path_child.tag, path_child.text)
                    elif path_child.tag == 'pt':
                        pt = path_child
                        stop = False
                        for bs in pt.findall('bs'):
                            stop = True
                            _stop_id = _cond_get_single(bs, 'id')
                            _stop_st = _cond_get_single(bs, 'st')
                            break

                        p = None
                        if not stop:
                            p = Route.Point()
                        else:
                            p = Route.Stop()
                            p.identity = _stop_id
                            p.st = _stop_st

                        p.d = path.d
                        p.lat = _cond_get_single(pt, 'lat')
                        p.lon = _cond_get_single(pt, 'lon')

                        path.points.append(p) # <------ dont append to same list each time

                route.paths.append(path)

                routes.append(route) # is this the change that broke stopwatcher? used to just be routes.append (route)
                # how can i change the waypoint map thing so this doesnt have to change, or change stopwatcher...

            break

    return routes

def get_xml_data(source, function, **kwargs):
    import urllib2
    data = urllib2.urlopen(_gen_command(source, function, **kwargs)).read()
    return data


In [4]:
# what it outputs
routes=parse_route_xml(get_xml_data(source, 'routes', route=route_report))
routes



[route[c=#ff3399 displayDesignator=119 identity=119 name=route nm=119 Bayonne-Jersey City-NY paths=[Path[d=Bayonne dd=Bayonne directionRtpiFeedName=None id=1860 l=72312.0 name=Path points=[<__main__.Stop instance at 0x10cb19440>, <__main__.Point instance at 0x10cb19518>, <__main__.Point instance at 0x10cb195a8>, <__main__.Point instance at 0x10cb195f0>, <__main__.Point instance at 0x10cb19680>, <__main__.Point instance at 0x10cb19710>, <__main__.Point instance at 0x10cb197a0>, <__main__.Point instance at 0x10cb19830>, <__main__.Point instance at 0x10cb198c0>, <__main__.Point instance at 0x10cb19950>, <__main__.Point instance at 0x10cb199e0>, <__main__.Point instance at 0x10cb19a70>, <__main__.Point instance at 0x10cb19b00>, <__main__.Point instance at 0x10cb19b90>, <__main__.Point instance at 0x10cb19c20>, <__main__.Point instance at 0x10cb19cb0>, <__main__.Point instance at 0x10cb19d40>, <__main__.Point instance at 0x10cb19dd0>, <__main__.Point instance at 0x10cb19e60>, <__main__.Poin

In [12]:
routes[0].paths

[Path[d=Bayonne dd=Bayonne directionRtpiFeedName=None id=1860 l=72312.0 name=Path points=[<__main__.Stop instance at 0x10cb19440>, <__main__.Point instance at 0x10cb19518>, <__main__.Point instance at 0x10cb195a8>, <__main__.Point instance at 0x10cb195f0>, <__main__.Point instance at 0x10cb19680>, <__main__.Point instance at 0x10cb19710>, <__main__.Point instance at 0x10cb197a0>, <__main__.Point instance at 0x10cb19830>, <__main__.Point instance at 0x10cb198c0>, <__main__.Point instance at 0x10cb19950>, <__main__.Point instance at 0x10cb199e0>, <__main__.Point instance at 0x10cb19a70>, <__main__.Point instance at 0x10cb19b00>, <__main__.Point instance at 0x10cb19b90>, <__main__.Point instance at 0x10cb19c20>, <__main__.Point instance at 0x10cb19cb0>, <__main__.Point instance at 0x10cb19d40>, <__main__.Point instance at 0x10cb19dd0>, <__main__.Point instance at 0x10cb19e60>, <__main__.Point instance at 0x10cb19ef0>, <__main__.Point instance at 0x10cb19f80>, <__main__.Point instance at 0

In [5]:
print "number of routes", len(routes)
for route in routes:
    print "number of paths", len(route.paths)
    for p in route.paths:
        # print p.d
        print "number of points", len(p.points)
        for p in p.points:
            if isinstance(p, Route.Stop):
                print p.st
            else:
                print '-',


number of routes 2
number of paths 2
number of points 241
PORT AUTHORITY BUS TERMINAL
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WILLOW AVE + 19TH ST
- - - - WILLOW AVE + 15TH ST
CONGRESS ST + PALISADE AVE
CONGRESS ST + WEBSTER AVE
CONGRESS ST + HANCOCK AVE
CONGRESS ST + CENTRAL AVE
- - CENTRAL AVE + SOUTH ST
CENTRAL AVE + THORNE ST
CENTRAL AVE + BOWERS ST
CENTRAL AVE + GRIFFITH ST
CENTRAL AVE + HUTTON ST
CENTRAL AVE + SHERMAN PL
CENTRAL AVE + MANHATTAN AVE
CENTRAL AVE + BOOREAM AVE
CENTRAL AVE + PERSHING PLAZA
CENTRAL AVE + WAVERLY ST
CENTRAL AVE + LAIDLAW AVE
CENTRAL AVE + BEACON AVE
CENTRAL AVE + SAINT PAULS AVE
- CENTRAL AVE + HOBOKEN AVE
- HOBOKEN AVE + SUMMIT AVE
- SUMMIT AVE + PAVONIA AVE
- - - JOURNAL SQUARE LANE D
- - - - - - - - - - - - - - - - SIP AVE + TONNELLE AVE
- JFK BLVD + SIP AVE
- - - - JFK BLVD + STUYVESANT AVE
- JFK BLVD + HIGHLAND AVE
- JFK BLVD + MONTGOMERY ST
- - - - - - JFK BLVD + DUNCAN AVE
- - JFK BLVD 

# my modified

In [None]:
# what uses it (insert into db in original script)

In [6]:
# my modified

import os
import datetime
import xml.etree.ElementTree

# API like: https://github.com/harperreed/transitapi/wiki/Unofficial-Bustracker-API

_sources = {
  'nj': 'http://mybusnow.njtransit.com/bustime/map/'
}

_api = {
  'all_buses': 'getBusesForRouteAll.jsp',
  'routes': 'getRoutePoints.jsp',
  'pattern_points': 'getPatternPoints.jsp',
  'stop_predictions': 'getStopPredictions.jsp',
  'bus_predictions': 'getBusPredictions.jsp',
  'buses_for_route': 'getBusesForRoute.jsp',
  'schedules': 'schedules.jsp',
  'time_and_temp': 'getTimeAndTemp.jsp',
  'route_directions_xml':  'routeDirectionStopAsXML',
}

# parsers done for all_buses, routes, stop_predictions
# ignored: time_and_temp
# not available / not fully documented: schedules (not sure what the right kwargs are, agency=1 & route=87 ?)


def _gen_command(source, func, **kwargs):
    result = _sources[source] + _api[func]
    params = ''
    for k, v in kwargs.items():
        params = params + k + '=' + str(v) + '&'
    if params:
        result += '?' + params[:-1]
    return result

def _cond_get_single(tree, key, default=''):
    res = tree.find(key)
    if res is not None:
        return res.text 
    return default

class KeyValueData:
    def __init__(self, **kwargs):
        self.name = 'KeyValueData'
        for k, v in kwargs.items():
            setattr(self, k, v)

    def add_kv(self, key, value):
        setattr(self, key, value)

    def __repr__(self):
        line = []
        for prop, value in vars(self).iteritems():
            line.append((prop, value))
        line.sort(key=lambda x: x[0])
        out_string = ' '.join([k + '=' + str(v) for k, v in line])
        return self.name + '[%s]' % out_string

class Route(KeyValueData):

    class Path(KeyValueData):
        def __init__(self):
            KeyValueData.__init__(self)
            self.name = 'Path'
            self.points = []
            self.id = ''
            self.d = ''
            self.dd = ''

    class Point:
        def __init__(self):
            self.lat = ''
            self.lon = ''
            self.d = ''

    class Stop:
        def __init__(self):
            self.identity = ''
            self.st = ''
            self.lat = ''
            self.lon = ''
            self.d = ''

    def __init__(self):
        KeyValueData.__init__(self)
        self.name = 'route'
        self.identity = ''
        self.paths = []


def parse_route_xml(data):

    routes = list()

    route = Route()

    e = xml.etree.ElementTree.fromstring(data)
    for child in e.getchildren():

        if len(child.getchildren()) == 0:
            if child.tag == 'id':
                route.identity = child.text
            else:
                route.add_kv(child.tag, child.text)

        if child.tag == 'pas':

            for pa in child.findall('pa'):

                path = Route.Path()

                for path_child in pa.getchildren():
                    if len(path_child.getchildren()) == 0:
                        if path_child.tag == 'id':
                            path.id = path_child.text
                        elif path_child.tag == 'd':
                            path.d = path_child.text
                        elif path_child.tag == 'dd':
                            path.dd = path_child.text
                        else:
                            path.add_kv(path_child.tag, path_child.text)
                    elif path_child.tag == 'pt':
                        pt = path_child
                        stop = False
                        for bs in pt.findall('bs'):
                            stop = True
                            _stop_id = _cond_get_single(bs, 'id')
                            _stop_st = _cond_get_single(bs, 'st')
                            break

                        p = None
                        if not stop:
                            p = Route.Point()
                        else:
                            p = Route.Stop()
                            p.identity = _stop_id
                            p.st = _stop_st

                        p.d = path.d
                        p.lat = _cond_get_single(pt, 'lat')
                        p.lon = _cond_get_single(pt, 'lon')

                        path.points.append(p) # <------ dont append to same list each time

                route.paths.append(path)

                routes.append(route.paths) # is this the change that broke stopwatcher? used to just be routes.append (route)
                # how can i change the waypoint map thing so this doesnt have to change, or change stopwatcher...

            break

    return routes

def get_xml_data(source, function, **kwargs):
    import urllib2
    data = urllib2.urlopen(_gen_command(source, function, **kwargs)).read()
    return data


In [7]:
# what it outputs
paths=parse_route_xml(get_xml_data(source, 'routes', route=route_report))
paths



[[Path[d=Bayonne dd=Bayonne directionRtpiFeedName=None id=1860 l=72312.0 name=Path points=[<__main__.Stop instance at 0x10ccfaa70>, <__main__.Point instance at 0x10ccfab00>, <__main__.Point instance at 0x10ccfab90>, <__main__.Point instance at 0x10ccfac20>, <__main__.Point instance at 0x10ccfacb0>, <__main__.Point instance at 0x10ccfad40>, <__main__.Point instance at 0x10ccfadd0>, <__main__.Point instance at 0x10ccfae60>, <__main__.Point instance at 0x10ccfaef0>, <__main__.Point instance at 0x10ccfaf80>, <__main__.Point instance at 0x10cd02050>, <__main__.Point instance at 0x10cd020e0>, <__main__.Point instance at 0x10cd02170>, <__main__.Point instance at 0x10cd02200>, <__main__.Point instance at 0x10cd02290>, <__main__.Point instance at 0x10cd02320>, <__main__.Point instance at 0x10cd023b0>, <__main__.Point instance at 0x10cd02440>, <__main__.Point instance at 0x10cd024d0>, <__main__.Point instance at 0x10cd02560>, <__main__.Point instance at 0x10cd025f0>, <__main__.Point instance at 

In [8]:
print "number of paths", len(paths)
for p in paths:
    for x in p:
        print "number of points", len(x.points)
        for i in x.points:
            if isinstance(i, Route.Stop):
                print i.st
            else:
                print '-',

number of paths 2
number of points 241
PORT AUTHORITY BUS TERMINAL
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WILLOW AVE + 19TH ST
- - - - WILLOW AVE + 15TH ST
CONGRESS ST + PALISADE AVE
CONGRESS ST + WEBSTER AVE
CONGRESS ST + HANCOCK AVE
CONGRESS ST + CENTRAL AVE
- - CENTRAL AVE + SOUTH ST
CENTRAL AVE + THORNE ST
CENTRAL AVE + BOWERS ST
CENTRAL AVE + GRIFFITH ST
CENTRAL AVE + HUTTON ST
CENTRAL AVE + SHERMAN PL
CENTRAL AVE + MANHATTAN AVE
CENTRAL AVE + BOOREAM AVE
CENTRAL AVE + PERSHING PLAZA
CENTRAL AVE + WAVERLY ST
CENTRAL AVE + LAIDLAW AVE
CENTRAL AVE + BEACON AVE
CENTRAL AVE + SAINT PAULS AVE
- CENTRAL AVE + HOBOKEN AVE
- HOBOKEN AVE + SUMMIT AVE
- SUMMIT AVE + PAVONIA AVE
- - - JOURNAL SQUARE LANE D
- - - - - - - - - - - - - - - - SIP AVE + TONNELLE AVE
- JFK BLVD + SIP AVE
- - - - JFK BLVD + STUYVESANT AVE
- JFK BLVD + HIGHLAND AVE
- JFK BLVD + MONTGOMERY ST
- - - - - - JFK BLVD + DUNCAN AVE
- - JFK BLVD + KENSINGTON AVE
- 

In [9]:
# what uses it (the waypoints map notebook)