In [1]:
from math import sqrt
import os
import re
from datetime import time, timedelta, date, datetime
import pandas as pd
import zipfile
from shapely.geometry import Point, LineString
from geopandas import GeoDataFrame
#########################################################################################
# Matching Parameters
DRIVEBY_TOLERANCE = 400  # meters. Used to define the catchment area of a bus stop
TIME_TOLERANCE = 15 # minutes.  Used to define the +/- value for matching to the GTFS schedule Start End Points

# Filter Paramaters
ROUTE_DEVIATION = 10 # meters.  Used to filter potential trips by their average deviation(RMS) from the route geometry
TIME_DEVIATION = timedelta(minutes=30) # Used to filter potential trips by their average deviation(RMS) from the GTFS schedule
#########################################################################################

# Prep GPS Points

In [3]:
print('Finding GPS files')
gps_files = [f for f in os.listdir(r'\Data') if f.endswith('txt')]
print('Found {0} files'.format(len(gps_files)))
dataframe = pd.DataFrame()

for f in gps_files:
    unit = f[:3]
    print("processing {0}".format(f))
    df = pd.read_csv(r'\Data\{0}'.format(f),header=None,encoding='latin', 
                     names=['Date','Time','Latitude','Longitude','Speed','Direction','Elevation'], infer_datetime_format=True,
                     parse_dates={'Timestamp':[0,1]})
    df['Unit'] = unit
    dataframe = dataframe.append(df)
    
print('Converting Degrees/Minutes/Seconds to Decimal Degrees')
def dms2dd(measure):
    dms_list = re.split('[\s°\'"]+', measure)
    if dms_list[0] == 'W':
        return(-1*(float(dms_list[1])+((float(dms_list[2])+(float(dms_list[3])/60))/60)))
    else:
        return(float(dms_list[1])+((float(dms_list[2])+(float(dms_list[3])/60))/60))

dataframe['Latitude'] = [dms2dd(dms) for dms in dataframe['Latitude']]
dataframe['Longitude'] = [dms2dd(dms) for dms in dataframe['Longitude']]

print('Adding Geometry')
dataframe['geometry'] = [Point(x.Longitude,x.Latitude) for x in dataframe.itertuples()]
dataframe = GeoDataFrame(dataframe)
dataframe.crs = {'init': 'epsg:4326', 'no_defs': True}
dataframe.to_crs(epsg=3005, inplace=True)

shpframe = dataframe.copy()
shpframe.Timestamp = shpframe.Timestamp.astype('str')

print('Exporting to shape')
shpframe.to_file('data.shp')

print('Calculating Service Days')
dataframe['ServiceDay'] = [sd.date() if sd.time() > time(4) else (sd.date()-timedelta(days=1)) for sd in dataframe.Timestamp]

print('Add a Timestamp Index')
dataframe.set_index('Timestamp', drop=False, inplace=True)

print('Done')

Finding GPS files
Found 13 files
processing 201-020817.txt
processing 202-021217.txt
processing 204-021017.txt
processing 205-021517.txt
processing 206-020217.txt
processing 208-021217.txt
processing 209-020617.txt
processing 210-030717.txt
processing 211-021217.txt
processing 213-021217.txt
processing 214-021217.txt
processing 216-020917.txt
processing 217-022317.txt
Converting Degrees/Minutes/Seconds to Decimal Degrees
Adding Geometry
Exporting to shape
Calculating Service Days
Add a Timestamp Index
Done


In [4]:
# Define GTFS object 
class GTFS(object):
    """Create an object to represent the package of GTFS files"""
    def __init__(self, input_file):
        #Read the source GTFS files from zip
        if zipfile.is_zipfile(input_file):
            with zipfile.ZipFile(input_file, 'r') as gtfs_zip:
                with gtfs_zip.open('agency.txt') as agency:
                    self.agency = pd.read_csv(agency)
                with gtfs_zip.open('calendar.txt') as calendar:
                    self.calendar = pd.read_csv(
                            calendar,
                            parse_dates=['start_date','end_date'],
                            infer_datetime_format=True)
                with gtfs_zip.open('calendar_dates.txt') as calendar_dates:
                    self.calendar_dates = pd.read_csv(
                        calendar_dates,
                        parse_dates=['date',],
                        infer_datetime_format=True)
                with gtfs_zip.open('feed_info.txt') as feed_info:
                    self.feed_info = pd.read_csv(feed_info)
                with gtfs_zip.open('routes.txt') as routes:
                    self.routes = pd.read_csv(routes)
                with gtfs_zip.open('shapes.txt') as shapes:
                    self.shapes = pd.read_csv(shapes)
                with gtfs_zip.open('stop_times.txt') as stop_times:
                    self.stop_times = pd.read_csv(
                            stop_times,
                            parse_dates=['arrival_time','departure_time'],
                            infer_datetime_format=True)
                with gtfs_zip.open('stops.txt') as stops:
                    self.stops = pd.read_csv(stops)
                with gtfs_zip.open('trips.txt') as trips:
                    self.trips = pd.read_csv(trips)
        
        # Or read the source GTFS files from folder
        else:
            self.agency = pd.read_csv(input_file + 'agency.txt')
            self.calendar = pd.read_csv(
                    input_file + 'calendar.txt',
                    parse_dates=['start_date','end_date'],
                    infer_datetime_format=True)
            self.calendar_dates = pd.read_csv(
                    input_file + 'calendar_dates.txt',
                    parse_dates=['date',],
                    infer_datetime_format=True)
            self.feed_info = pd.read_csv(input_file + 'feed_info.txt')
            self.routes = pd.read_csv(input_file + 'routes.txt')
            self.shapes = pd.read_csv(input_file + 'shapes.txt')
            self.stop_times = pd.read_csv(
                    input_file + 'stop_times.txt',
                    parse_dates=['arrival_time','departure_time'],
                    infer_datetime_format=True)
            self.stops = pd.read_csv(input_file + 'stops.txt')
            self.trips = pd.read_csv(input_file + 'trips.txt')
        
        # Add Geometry to stops and shapes
        self.stops['geometry'] = self.stops.apply(lambda x: Point(float(x.stop_lon), float(x.stop_lat)), axis=1)
        self.stops = GeoDataFrame(self.stops)
        self.stops.crs = {'init': 'epsg:4326', 'no_defs': True}
        self.stops.to_crs(epsg=3005, inplace=True)
        
        # Could improve timing of these three or four lines
        self.shapes['geometry'] = self.shapes.apply(lambda x: Point(float(x.shape_pt_lon), float(x.shape_pt_lat)), axis=1)
        self.shapes = self.shapes.groupby('shape_id').aggregate(lambda x: tuple(x)).geometry.apply(lambda x: LineString(x))
        self.shapes = GeoDataFrame(self.shapes)
        self.shapes.crs = {'init': 'epsg:4326', 'no_defs': True}
        self.shapes.to_crs(epsg=3005, inplace=True)
     
    def trips_by_date(self,service_date):
        """Return a list of the GTFS trips active for a specific date"""
        flat_gtfs = pd.merge(
                pd.merge(self.calendar,self.calendar_dates,on='service_id', how='outer'),
                self.trips,
                on='service_id',
                how='outer'
        )

        # Base Service
        base_trips = flat_gtfs[
                (flat_gtfs.start_date <= service_date) & 
                (flat_gtfs.end_date >= service_date) & 
                (flat_gtfs[service_date.strftime('%A').lower()] == 1)
        ]

        # trips to add
        add_trips = flat_gtfs[(flat_gtfs.date==service_date)&(flat_gtfs.exception_type==1)]

        # trips to remove
        remove_trips = flat_gtfs[(flat_gtfs.date==service_date)&(flat_gtfs.exception_type==2)]

        trips = pd.concat([base_trips,add_trips])
        trips = pd.merge(trips,remove_trips,on=['trip_id','service_id'],how='left',indicator=True)
        trips = trips[trips._merge == 'left_only']
        trips.drop_duplicates(subset='trip_id')
        
        return(trips)
    
    def stop_times_by_date(self,service_date):
        """Return a list of stopids and times for a specific date"""
        trips = self.trips_by_date(service_date)
        return(pd.merge(trips,self.stop_times,on='trip_id'))

    def start_points(self,by_date=None):
        start_points = None
        
        if by_date:
            stop_times = self.stop_times_by_date(by_date)
        else:
            stop_times = self.stop_times
        
        if len(stop_times) > 0:
            start_points = stop_times[stop_times.stop_sequence==1]
            start_points = pd.merge(start_points,self.stops,on='stop_id')
            start_points = start_points.rename(columns = {'arrival_time':'start_time','stop_id':'start_stop'})
        return (start_points)

    def end_points(self,by_date=None):
        end_points = None
        
        if by_date:
            stop_times = self.stop_times_by_date(by_date)
        else:
            stop_times = self.stop_times
        
        if len(stop_times) > 0:
            end_points = pd.merge(stop_times,stop_times.groupby('trip_id').max().reset_index(),on='trip_id',suffixes=('','_max'))
            end_points = end_points[end_points.stop_sequence == end_points.stop_sequence_max]
            end_points = pd.merge(end_points,Schedule.stops,on='stop_id')
            end_points = end_points.rename(columns = {'arrival_time':'end_time','stop_id':'end_stop'})
        return (end_points)

In [5]:
Schedule = GTFS(
        r'Kamloops_20170125.zip'
)

In [6]:
#TODO skip all the start stop end stop crap and just find the start and finish of the trip
# Merge intermediate frames together to create a table with geometry for the start and and points as well as the trip shape line
AllTrips = pd.merge(Schedule.start_points(by_date=date(2017,2,3)),
                    Schedule.end_points(by_date=date(2017,2,3)), 
                    on='trip_id', suffixes=('_start','_end'))

AllTrips = pd.merge(AllTrips,Schedule.trips[['trip_id','shape_id']],on='trip_id')
Schedule.shapes['shape_id'] = Schedule.shapes.index
AllTrips = pd.merge(AllTrips,Schedule.shapes,on='shape_id')
AllTrips = AllTrips.drop_duplicates(subset='trip_id').copy()

In [7]:
ServiceDay = date(2017,2,3)

data = dataframe[(dataframe.ServiceDay==ServiceDay) & (dataframe.Unit=='201')]

###TODO- TOO SLOW!!!!!

def find_trip_start(row,apc_data):
    """
        Takes a trip starting time and selects window of APC data based on the defined interval to scan for potential matches.
        Returns the timestamp from the last record where the bus was within the specified distance of the starting location OR
        returns None if no potential matches are found.
    """
    possible_start_points = []
   
    hour = int(row.start_time[0:2])
    minute = int(row.start_time[3:5])
    second = int(row.start_time[6:8])
    
    if hour < 24:
        scheduled_start = datetime.combine(ServiceDay,time(hour,minute,second))
    else:
        hour -= 24
        scheduled_start = datetime.combine(ServiceDay,time(hour,minute,second)) + timedelta(1)
    
    lower_bound = scheduled_start-timedelta(minutes=TIME_TOLERANCE)
    upper_bound = scheduled_start+timedelta(minutes=TIME_TOLERANCE)
    
    # create a subset of APC data for the requested time period i.e. Plus or minus the TIME_TOLERANCE from the trip start time
    possible_start_points=apc_data[lower_bound:upper_bound].copy()

    # Assuming we have a subset of data
    if len(possible_start_points) > 0:
        # Check distance.  Adjust DRIVEBY_TOLERANCE so that we have a reasonable chance of catching "Drive By" datapoints 
        matching_start_points = possible_start_points[
                possible_start_points.geometry.distance(row.geometry_start) < DRIVEBY_TOLERANCE
        ]
        if len(matching_start_points) > 0:
            # Return the most recent timestamp i.e. the last point within DRIVEBY_TOLERANCE of the trip start
            ##TODO - This is where the error concerning trip start times is being created
            trip_start = matching_start_points.tail(1)
            return trip_start.Timestamp

def find_trip_end(row,apc_data):
    """
        Takes a trip ending time and selects a window of APC data based on the defined interval to scan for potential matches.
        Returns the timestamp from the most first record where the bus was within the specified distance of the ending location 
        OR returns None if no potential matches are found.
    """
    possible_end_points = []

    hour = int(row.end_time[0:2])
    minute = int(row.end_time[3:5])
    second = int(row.end_time[6:8])
    
    if hour < 24:
        scheduled_end = datetime.combine(ServiceDay,time(hour,minute,second))
    else:
        hour -= 24
        scheduled_end = datetime.combine(ServiceDay,time(hour,minute,second)) + timedelta(1)

    lower_bound = scheduled_end-timedelta(minutes=TIME_TOLERANCE)
    upper_bound = scheduled_end+timedelta(minutes=TIME_TOLERANCE)
    
    # create a subset of APC data for the requested time period i.e. Plus or minus the TIME_TOLERANCE from the trip start time
    possible_end_points=apc_data[lower_bound:upper_bound].copy()
 
    #Assuming we have a subset of data
    if len(possible_end_points) > 0:
        # Check distance.  Adjust DRIVEBY_TOLERANCE so that we have a reasonable chance of catching "Drive By" datapoints 
        matching_end_points = possible_end_points[possible_end_points.geometry.distance(row.geometry_end) < DRIVEBY_TOLERANCE]
    
        if len(matching_end_points) > 0:
            # Return the oldest timestamp. i.e. the first point within DRIVEBY_TOLERANCE of the trip start
            ##TODO - This is where the error concerning trip end times is being created
            trip_end = matching_end_points.head(1)
            return trip_end.Timestamp


# Apply the find actual start and end time functions to each trip
AllTrips['actual_start'] = [find_trip_start(x,data) for x in AllTrips.itertuples()]
AllTrips['actual_end'] = [find_trip_end(x,data) for x in AllTrips.itertuples()]

# Create a subset of the possible trips by removing trips that do not meet the start and end points of the trip within the 
# specified TIME_TOLERANCE
# More useful for non-blocked matching
# Changed this to or to open up matching
FirstFilterTrips = AllTrips.loc[(AllTrips.actual_start.notnull()) & (AllTrips.actual_end.notnull())].copy()

In [8]:
ServiceDay = date(2017,2,3)


def calculate_ShapeRMS(row, apc_data):
    """ 
        Extracts all of the data points comprising each potential trip from the APC data and compares 
        them to the trip geometry.  Returns the RMS value based on how closely the data points match the trip shape.
    """

    # Extract APC data subset based on trip "actual" start and end points representing the trip
    trip_points = apc_data[row.actual_start[0]:row.actual_end[0]].copy()
    if len(trip_points) > 0:
        # Calculate the spatial RMS from the trip shape geometry.
        trip_points['deviation'] = trip_points.apply(lambda x: x.geometry.distance(row.geometry),axis=1)
        trip_points['squared_deviation'] = trip_points.apply(lambda x: x.deviation**2,axis=1)
        variance = trip_points.squared_deviation.mean()
        RMS = sqrt(variance)
        return RMS

FirstFilterTrips['ShapeRMS'] = [calculate_ShapeRMS(x,data) for x in FirstFilterTrips.itertuples()]

In [9]:
SecondFilterTrips = FirstFilterTrips[FirstFilterTrips.ShapeRMS < ROUTE_DEVIATION].copy()

In [16]:
data.Timestamp

Timestamp
2017-02-03 06:45:06   2017-02-03 06:45:06
2017-02-03 06:45:07   2017-02-03 06:45:07
2017-02-03 06:45:08   2017-02-03 06:45:08
2017-02-03 06:45:09   2017-02-03 06:45:09
2017-02-03 06:45:10   2017-02-03 06:45:10
2017-02-03 06:45:11   2017-02-03 06:45:11
2017-02-03 06:45:12   2017-02-03 06:45:12
2017-02-03 06:45:13   2017-02-03 06:45:13
2017-02-03 06:45:14   2017-02-03 06:45:14
2017-02-03 06:45:15   2017-02-03 06:45:15
2017-02-03 06:45:16   2017-02-03 06:45:16
2017-02-03 06:45:17   2017-02-03 06:45:17
2017-02-03 06:45:18   2017-02-03 06:45:18
2017-02-03 06:45:19   2017-02-03 06:45:19
2017-02-03 06:45:20   2017-02-03 06:45:20
2017-02-03 06:45:21   2017-02-03 06:45:21
2017-02-03 06:45:22   2017-02-03 06:45:22
2017-02-03 06:45:23   2017-02-03 06:45:23
2017-02-03 06:45:24   2017-02-03 06:45:24
2017-02-03 06:45:25   2017-02-03 06:45:25
2017-02-03 06:45:26   2017-02-03 06:45:26
2017-02-03 06:45:27   2017-02-03 06:45:27
2017-02-03 06:45:28   2017-02-03 06:45:28
2017-02-03 06:45:29   20

In [20]:
ServiceDay = date(2017,2,3)

# Compares the timing of the intermediate stop points of the trips and calculates an average time variance(RMS) for each
# candidate trip

# Find all stop times for each trip
TripTimingPoints = pd.merge(SecondFilterTrips,Schedule.stop_times_by_date(ServiceDay),on='trip_id')
TripTimingPoints = pd.merge(TripTimingPoints,Schedule.stops, on='stop_id')

# The timing point filter requires that the timing point variable is set otherwise all stops are compared
#if len(TripTimingPoints[TripTimingPoints.timepoint == 1]) > 1:
#    TripTimingPoints = TripTimingPoints[TripTimingPoints.timepoint == 1].copy()
    
# Create a Point geometry object for each stop to fix it in space and time
TripTimingPoints['geometry'] = TripTimingPoints.apply(lambda x: Point(float(x.stop_lon), float(x.stop_lat)), axis=1)
TripTimingPoints = GeoDataFrame(TripTimingPoints)

# Reproject the points to BC Albers:3005(meters)
TripTimingPoints.crs = {'init': 'epsg:4326', 'no_defs': True}
TripTimingPoints.to_crs(epsg=3005, inplace=True)

def find_TP_match(row,apc_data):
    """
        Takes a trip timing point and selects a window of APC data based on the defined interval to scan for potential 
        matches. Returns the timestamp from the closest record where the bus was within the specified distance of the 
        timing point location OR returns None if no potential matches are found.
    """
    hour = int(row.arrival_time[0:2])
    minute = int(row.arrival_time[3:5])
    second = int(row.arrival_time[6:8])
    
    if hour < 24:
        scheduled = datetime.combine(ServiceDay,time(hour,minute,second))
    else:
        hour -= 24
        scheduled = datetime.combine(ServiceDay,time(hour,minute,second)) + timedelta(1)
    
    lower_bound = scheduled-timedelta(minutes=TIME_TOLERANCE)
    upper_bound = scheduled+timedelta(minutes=TIME_TOLERANCE)
    
    
    # create a subset of APC data for the requested time period i.e. Plus or minus the TIME_TOLERANCE 
    # from the scheduled stop time
    possible_timing_points=apc_data[
            lower_bound:upper_bound
    ].copy()
    
    # Check distance.  Adjust DRIVEBY_TOLERANCE so that we have a reasonable chance of catching "Drive By" datapoints 
    matching_timing_points = possible_timing_points[
            possible_timing_points.geometry.distance(row.geometry) < DRIVEBY_TOLERANCE
    ]
    
    # find closest time 
    ##TODO Probably should be fixed to distance?
    try:
        matching_timing_points['deviation'] = matching_timing_points.apply(
                lambda x: abs((scheduled - x.Timestamp).total_seconds()), axis=1
        )
        return matching_timing_points.deviation.min()
    except ValueError:
        return None
    
# Loop through the list of timing points and search for matching records in the data stream in order to calculate RMS
TripTimingPoints['deviation'] = TripTimingPoints.apply(find_TP_match, args=(data,), axis=1)

# Calculate the average scheduled stop time deviation for each candidate trip 
TripTimingPoints['squared_deviation'] = TripTimingPoints.apply(lambda x: x.deviation**2, axis=1)
TripRMS = TripTimingPoints.groupby(['trip_id']).mean()
TripRMS['TimeRMS'] = TripRMS.apply(lambda x: sqrt(x.squared_deviation),axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [22]:
# further refine the list of candidate trips by removing those greater than the allowed TIME_DEVIATION from the trip schedule
# More useful for non-blocked matching
ThirdFilterTrips = pd.merge(SecondFilterTrips,TripRMS.reset_index(),on='trip_id')
ThirdFilterTrips = ThirdFilterTrips[ThirdFilterTrips.TimeRMS < TIME_DEVIATION.total_seconds()].copy()

In [30]:
# Identify overlapping candidate trips
def find_conflicts(row, TripSet):
    return str([trip for trip in list(TripSet.apply(lambda x: x.trip_id if row.start_time < x.start_time < row.end_time or row.start_time < x.end_time < row.end_time else None,axis=1))if trip is not None])
FourthFilterTrips = ThirdFilterTrips
FourthFilterTrips['conflicts_with'] = ThirdFilterTrips.apply(find_conflicts, args=(ThirdFilterTrips,), axis=1)    
FourthFilterTrips = FourthFilterTrips[FourthFilterTrips['conflicts_with'] =='[]'].copy()

In [55]:
MatchedTrips = pd.merge(FourthFilterTrips, AllTrips, on='trip_id')
MatchedTrips['trip_time'] = MatchedTrips.start_time_x
MatchedTrips.sort_values(by='trip_time')#[['block_id','trip_sequence','trip_id','route_id','trip_time','actual_start','actual_end','ShapeRMS_x','TimeRMS']]

Unnamed: 0,service_id_start_x,monday_x_start_x,tuesday_x_start_x,wednesday_x_start_x,thursday_x_start_x,friday_x_start_x,saturday_x_start_x,sunday_x_start_x,start_date_x_start_x,end_date_x_start_x,...,stop_code_end,location_type_end,parent_station_end,stop_short_name_end_y,geometry_end_y,shape_id_y,geometry_y,actual_start_y,actual_end_y,trip_time
6,aaKA1609-KASEP16-Weekday-05,1,1,1,1,1,0,0,2017-01-24,2017-04-28,...,104534,0,,Eastbound Laurier at Sifton,POINT (1399017.339253946 640692.671462765),8-7-48,LINESTRING (1400380.599716809 645165.799602209...,Timestamp 2017-02-03 07:00:20 2017-02-03 07:...,Timestamp 2017-02-03 07:31:58 2017-02-03 07:...,06:50:00
5,aaKA1609-KASEP16-Weekday-05,1,1,1,1,1,0,0,2017-01-24,2017-04-28,...,104534,0,,Eastbound Laurier at Sifton,POINT (1399017.339253946 640692.671462765),8-7-48,LINESTRING (1400380.599716809 645165.799602209...,Timestamp 2017-02-03 07:07:55 2017-02-03 07:...,Timestamp 2017-02-03 07:32:00 2017-02-03 07:...,07:20:00
7,aaKA1609-KASEP16-Weekday-05,1,1,1,1,1,0,0,2017-01-24,2017-04-28,...,104504,0,,Lansdowne Exchange Bay C,POINT (1400341.569959427 645155.0774439623),8-7-44,LINESTRING (1399026.299716809 640697.497602219...,Timestamp 2017-02-03 08:35:36 2017-02-03 08:...,Timestamp 2017-02-03 09:01:44 2017-02-03 09:...,08:23:00
4,aaKA1609-KASEP16-Weekday-05,1,1,1,1,1,0,0,2017-01-24,2017-04-28,...,104479,0,,Lansdowne Exchange Bay D,POINT (1400360.931345826 645156.6326602336),8-6-29,LINESTRING (1400380.599716809 645165.799602209...,Timestamp 2017-02-03 11:02:54 2017-02-03 11:...,Timestamp 2017-02-03 11:15:21 2017-02-03 11:...,11:00:00
2,aaKA1609-KASEP16-Weekday-05,1,1,1,1,1,0,0,2017-01-24,2017-04-28,...,104747,0,,Eastbound 9150 block Dallas,POINT (1418073.518299596 644162.9146484142),8-17-64,LINESTRING (1400258.299716799 645130.499602226...,Timestamp 2017-02-03 16:07:34 2017-02-03 16:...,Timestamp 2017-02-03 16:53:18 2017-02-03 16:...,16:10:00
3,aaKA1609-KASEP16-Weekday-05,1,1,1,1,1,0,0,2017-01-24,2017-04-28,...,104654,0,,Lansdowne Exchange Bay E,POINT (1400352.219065559 645146.8865003213),8-17-68,LINESTRING (1418203.098716853 644153.294602213...,Timestamp 2017-02-03 16:56:40 2017-02-03 16:...,Timestamp 2017-02-03 17:21:16 2017-02-03 17:...,17:00:00
1,aaKA1609-KASEP16-Weekday-05,1,1,1,1,1,0,0,2017-01-24,2017-04-28,...,104585,0,,Southbound Springhill at Gleneagles,POINT (1398014.604520974 642321.7419424163),8-9-38,LINESTRING (1400380.599716809 645165.799602209...,Timestamp 2017-02-03 17:33:22 2017-02-03 17:...,Timestamp 2017-02-03 18:03:28 2017-02-03 18:...,17:37:00
0,aaKA1609-KASEP16-Weekday-05,1,1,1,1,1,0,0,2017-01-24,2017-04-28,...,104577,0,,Lansdowne Exchange Bay B,POINT (1400317.711554821 645153.0494227738),8-9-44,LINESTRING (1398017.602716806 642316.498602209...,Timestamp 2017-02-03 20:10:36 2017-02-03 20:...,Timestamp 2017-02-03 20:39:07 2017-02-03 20:...,19:57:00


In [52]:
MatchedTrips.start_time

AttributeError: 'DataFrame' object has no attribute 'start_time'