In [2]:
from google.transit import gtfs_realtime_pb2
import requests
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
import numpy as np
import geopy.distance as geo

# Functions

In [2]:
def pull_vehicle_data():
    # Pull feed from RTD-Denver's VehiclePosition URL
    feed = gtfs_realtime_pb2.FeedMessage()
    response = requests.get('https://www.rtd-denver.com/files/gtfs-rt/VehiclePosition.pb')
    feed.ParseFromString(response.content)
    return feed 

In [3]:
def parse_to_df(feed):
    # Store the necessary values in a dictionary to convert to a Pandas Dataframe for later analysis
    rtd_dict = {}

    for entity in feed.entity:
        rtd_dict[entity.id] = {'trip_id': entity.vehicle.trip.trip_id
                            ,'route_id': entity.vehicle.trip.route_id
                            ,'vehicle_lat': float(entity.vehicle.position.latitude)
                            ,'vehicle_lng': float(entity.vehicle.position.longitude)
                            ,'bearing': int(entity.vehicle.position.bearing)
                            ,'status': int(entity.vehicle.current_status)
                            ,'timestamp': int(entity.vehicle.timestamp)
                            ,'stop_id': entity.vehicle.stop_id
                            ,'vehicle_id': entity.vehicle.vehicle.id
                            ,'vehicle_label': entity.vehicle.vehicle.label}

    return pd.DataFrame.from_dict(rtd_dict, orient='index').reset_index()

In [4]:
def clean_up_nulls(rtd_column, new_value):
    rtd_column = rtd_column.replace('', new_value)
        
    if type(new_value) == int: 
        rtd_column = rtd_column.astype('int')

    return rtd_column

In [5]:
def update_col_names(rtd_cols):
    # Update the first column name to be entity_id, a unique value for the pull (combination of 'timestamp + _ + vehicle_id')
    col_list = rtd_cols.tolist()
    col_list[0] = 'entity_id'
    return col_list

In [6]:
def convert_timezone_local(rtd_timestamps): 
    # Convert the timestamp values to Mountain Time from UTC
    return pd.to_datetime(rtd_timestamps, unit='s').dt.tz_localize('UTC').dt.tz_convert('US/Mountain')

In [7]:
def convert_vals(rtd_column, val_dict):
    # Update status columns with their corresponding real_world values
    return rtd_column.replace(to_replace = val_dict)

In [8]:
def put_it_all_together(rtd_df, routes, trips, stops, stop_times):
    rtd_df = pd.merge(rtd_df, routes, how='left', on=['route_id'], suffixes=('', '_r'))
    rtd_df = pd.merge(rtd_df, trips, how='left', on=['trip_id'], suffixes=('', '_t'))
    rtd_df = pd.merge(rtd_df, stops, how='left', on=['stop_id'], suffixes=('', '_s'))
    rtd_df = pd.merge(rtd_df, stop_times, how='left', on=['trip_id', 'stop_id'], suffixes=('', '_st'))
    return rtd_df.loc[:,['entity_id'
                  ,'trip_id'
                  ,'trip_headsign'
                  ,'route_id'
                  ,'route_long_name'
                  ,'route_short_name'
                  ,'route_type'
                  ,'route_desc'
                  ,'vehicle_lat'
                  ,'vehicle_lng'
                  ,'bearing'
                  ,'status'
                  ,'timestamp'
                  ,'stop_id'
                  ,'stop_name'
                  ,'stop_desc'
                  ,'stop_lat'
                  ,'stop_lon'
                  ,'arrival_time'
                  ,'departure_time'
                  ,'vehicle_id'
                  ,'vehicle_label']]

In [9]:
def initialize_csv(filepath, col_length, col_names):
    # Initialize the CSV to store the data (Remove for subsequent runs)
    init_csv = pd.DataFrame([['']*col_length], columns = col_names.tolist()).reset_index(drop=True)
    init_csv.to_csv(filepath, index=False)

In [10]:
def append_csv(filepath, rtd_df, update_datetime):
    # Append the data onto the end of the CSV
    try:
        rtd_df.to_csv(filepath, mode='a', header=False, index=False)
        print(f"Feed Updated at: {update_datetime}")
    except:
        print('Somthing went wrong...')

In [17]:
def calculate_distance(point_1_lat, point_1_lng, point_2_lat, point_2_lng):
    # Calculate the distance between two geo points in meters
    point_1 = list(zip(point_1_lat, point_1_lng))
    point_2 = list(zip(point_2_lat, point_2_lng))
    
    return [round(geo.distance(point_1, point_2).m,2) if (~pd.isnull(point_2[0])) & (point_1[0] > 0) else np.nan for point_1, point_2 in zip(point_1, point_2)]

In [18]:
def calculate_time(time_1, time_2):
    # Calculate the time between a Timestamp and an H:M:S string in minutes
    time_diff = time_1.dt.tz_localize(None) - time_2.apply(lambda x: pd.Timestamp(x))
    return time_diff.apply(lambda x: x.total_seconds()/60)

In [22]:
type(calculate_time(rtd_df.timestamp, rtd_df.scheduled_arrival_time)[0])

numpy.float64

# Pull Scripts

In [14]:
status_dict = {0: 'incoming_at'
              ,1: 'stopped_at'
              ,2: 'in_transit_to'}

route_dict = {0: 'light_rail'
             ,2: 'union_station_rail'
             ,3: 'bus'}

In [15]:
rtd_pull = pull_vehicle_data()
rtd_df = parse_to_df(rtd_pull)

rtd_df.trip_id = clean_up_nulls(rtd_df.trip_id, -1)
rtd_df.stop_id = clean_up_nulls(rtd_df.stop_id, -1)
rtd_df.route_id = clean_up_nulls(rtd_df.route_id, np.nan)
rtd_df.columns = update_col_names(rtd_df.columns)

rtd_df.head()

Unnamed: 0,entity_id,trip_id,route_id,vehicle_lat,vehicle_lng,bearing,status,timestamp,stop_id,vehicle_id,vehicle_label
0,1613088221_1505,-1,,39.616585,-104.897766,156,2,1613088189,-1,1505,1505
1,1613088221_1516,113624099,AB1,39.974968,-105.209198,300,2,1613088188,33855,1516,1516
2,1613088221_1535,113611060,104L,39.884766,-104.942902,91,2,1613088205,23499,1535,1535
3,1613088221_1543,113611074,104L,39.8857,-104.781754,270,2,1613088167,33278,1543,1543
4,1613088221_1549,-1,,39.771233,-104.984314,0,2,1613088215,-1,1549,1549


In [3]:
routes = pd.read_csv('~/Documents/dsi/repos/capstone_1/data/google_transit/routes.txt', delimiter=',')
trips = pd.read_csv('~/Documents/dsi/repos/capstone_1/data/google_transit/trips.txt', delimiter=',')
stops = pd.read_csv('~/Documents/dsi/repos/capstone_1/data/google_transit/stops.txt', delimiter=',')
stop_times = pd.read_csv('~/Documents/dsi/repos/capstone_1/data/google_transit/stop_times.txt', delimiter=',')

In [5]:
stop_times[stop_times.trip_id == '113627040']

  res_values = method(rvalues)


Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint


In [17]:
rtd_df = put_it_all_together(rtd_df, routes, trips, stops, stop_times)
rtd_df.timestamp = convert_timezone_local(rtd_df.timestamp)
rtd_df.status = convert_vals(rtd_df.status, status_dict)
rtd_df.route_type = convert_vals(rtd_df.route_type, route_dict)
col_names = rtd_df.columns.tolist()
col_names[col_names.index('arrival_time')] = 'scheduled_arrival_time'
col_names[col_names.index('departure_time')] = 'scheduled_departure_time'
rtd_df.columns = col_names

In [18]:
rtd_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 415 entries, 0 to 414
Data columns (total 22 columns):
 #   Column                    Non-Null Count  Dtype                      
---  ------                    --------------  -----                      
 0   entity_id                 415 non-null    object                     
 1   trip_id                   415 non-null    int64                      
 2   trip_headsign             324 non-null    object                     
 3   route_id                  324 non-null    object                     
 4   route_long_name           324 non-null    object                     
 5   route_short_name          324 non-null    object                     
 6   route_type                324 non-null    object                     
 7   route_desc                324 non-null    object                     
 8   vehicle_lat               415 non-null    float64                    
 9   vehicle_lng               415 non-null    float64                

In [19]:
rtd_df.head()

Unnamed: 0,entity_id,trip_id,trip_headsign,route_id,route_long_name,route_short_name,route_type,route_desc,vehicle_lat,vehicle_lng,bearing,status,timestamp,stop_id,stop_name,stop_desc,stop_lat,stop_lon,scheduled_arrival_time,scheduled_departure_time,vehicle_id,vehicle_label
0,1613088221_1505,-1,,,,,,,39.616585,-104.897766,156,in_transit_to,2021-02-11 17:03:09-07:00,-1,,,,,,,1505,1505
1,1613088221_1516,113624099,Downtown Boulder via E-470,AB1,Boulder / Denver Airport,AB1,bus,This Route Travels Eastbound & Westbound,39.974968,-105.209198,300,in_transit_to,2021-02-11 17:03:08-07:00,33855,US 36 & Table Mesa Station Gate B,Vehicles Travelling East,39.986706,-105.232717,17:16:00,17:16:00,1516,1516
2,1613088221_1535,113611060,Denver Airport Station via Thornton Crossroads,104L,Wagon Road / Denver Airport Limited,104L,bus,This Route Travels Eastbound & Westbound,39.884766,-104.942902,91,in_transit_to,2021-02-11 17:03:25-07:00,23499,,,,,,,1535,1535
3,1613088221_1543,113611074,Wagon Road PnR via Thornton Crossroads,104L,Wagon Road / Denver Airport Limited,104L,bus,This Route Travels Eastbound & Westbound,39.8857,-104.781754,270,in_transit_to,2021-02-11 17:02:47-07:00,33278,104th Ave & Landmark Dr,Vehicles Travelling Southwest,39.885712,-104.791061,17:02:56,17:02:56,1543,1543
4,1613088221_1549,-1,,,,,,,39.771233,-104.984314,0,in_transit_to,2021-02-11 17:03:35-07:00,-1,,,,,,,1549,1549


# Routes, Trips, Stops, & Stop Times

In [171]:
routes = pd.read_csv('~/Documents/dsi/repos/capstone_1/data/google_transit/routes.txt', delimiter=',')
routes.route_desc.unique()

array(['This Route Travels Eastbound & Westbound',
       'This Route Travels Northbound & Southbound',
       'This Route Travels Civic Ctr & Union Stn'], dtype=object)

In [172]:
routes[routes.route_type == 2]

Unnamed: 0,route_long_name,route_type,route_text_color,route_color,agency_id,route_id,route_url,route_desc,route_short_name
31,Union Station to Westminster Station,2,FFFFFF,4E9D2D,RTD,113B,http://www.rtd-denver.com/Schedules.shtml,This Route Travels Eastbound & Westbound,B
32,Union Station to Wheat Ridge Ward Station,2,FFFFFF,F6B221,RTD,113G,http://www.rtd-denver.com/Schedules.shtml,This Route Travels Eastbound & Westbound,G
91,Union Station to Eastlake & 124th Station,2,FFFFFF,9F26B5,RTD,117N,http://www.rtd-denver.com/Schedules.shtml,This Route Travels Northbound & Southbound,N
92,Union Station to Denver Airport Station,2,FFFFFF,57C1E9,RTD,A,http://www.rtd-denver.com/Schedules.shtml,This Route Travels Eastbound & Westbound,A


In [173]:
trips = pd.read_csv('~/Documents/dsi/repos/capstone_1/data/google_transit/trips.txt', delimiter=',')
trips.head()

Unnamed: 0,block_id,route_id,direction_id,trip_headsign,shape_id,service_id,trip_id
0,a_51 3,51,0,US36 & Sheridan,1169566,WK_merged_113737461,113621307
1,b_76 5,76,1,Lakewood Commons,1178320,SU_merged_113737468,113732618
2,b_FF 5,FF1,1,Downtown Boulder All-Station,1178609,SU_merged_113737468,113735135
3,b_FF 4,FF3,1,US36 & Broomfield,1178617,SU_merged_113737468,113735134
4,b_FF 2,FF1,1,Downtown Boulder All-Station,1178616,SU_merged_113737468,113735137


In [174]:
stops = pd.read_csv('~/Documents/dsi/repos/capstone_1/data/google_transit/stops.txt', delimiter=',')
stops.head()

Unnamed: 0,stop_lat,wheelchair_boarding,stop_code,stop_lon,stop_timezone,stop_url,parent_station,stop_desc,stop_name,location_type,stop_id,zone_id
0,39.781251,1,35236,-105.138657,,,33852.0,Vehicles Travelling South,Ward Rd PnR (Ar),0,35236,35236
1,39.767591,1,35234,-104.973545,,,,Vehicles Travelling East,Larimer St & Downing St,0,35234,35234
2,39.764239,1,35232,-104.97787,,,,Vehicles Travelling East,Larimer St & 32nd St,0,35232,35232
3,39.765885,1,35233,-104.975756,,,,Vehicles Travelling East,Larimer St & 34th St,0,35233,35233
4,39.760294,1,35230,-104.982985,,,,Vehicles Travelling East,Larimer St & 28th St,0,35230,35230


In [175]:
stop_times = pd.read_csv('~/Documents/dsi/repos/capstone_1/data/google_transit/stop_times.txt', delimiter=',')
stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint
0,113621307,08:03:00,08:03:00,26180,1,,,1.0,,
1,113621307,08:04:43,08:04:43,20171,2,,,,,
2,113621307,08:06:14,08:06:14,35227,3,,,,,
3,113621307,08:07:54,08:07:54,20312,4,,,,,
4,113621307,08:08:55,08:08:55,20314,5,,,,,


# Calculating Meters between 2 points

In [179]:
# point_1 = (float(AB_route[(AB_route.entity_id == '1612967622_1510')].vehicle_lat), float(AB_route[(AB_route.entity_id == '1612967622_1510')].vehicle_lng))
# point_2 = (float(AB_route[(AB_route.entity_id == '1612967743_1510')].vehicle_lat), float(AB_route[(AB_route.entity_id == '1612967743_1510')].vehicle_lng))
geom_df = rtd_df[~(rtd_df.vehicle_lat.isnull()) & ~(rtd_df.stop_lat.isnull())]
geom_df['vehicle_point'] = list(zip(geom_df.loc[:,'vehicle_lat'], geom_df.loc[:,'vehicle_lng']))
geom_df['stop_point'] = list(zip(geom_df.loc[:,'stop_lat'], geom_df.loc[:,'stop_lon']))

geom_df['distance_to_stop_meters'] = geom_df.apply(lambda row: geo.distance(row['vehicle_point'], row['stop_point']).m, axis=1).round(1)
geom_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0,entity_id,trip_id,trip_headsign,route_id,route_long_name,route_short_name,route_type,route_desc,vehicle_lat,vehicle_lng,bearing,status,timestamp,stop_id,stop_name,stop_desc,stop_lat,stop_lon,scheduled_arrival_time,scheduled_departure_time,vehicle_id,vehicle_label,vehicle_point,stop_point,distance_to_stop_meters
0,1613015131_1502,113611084,Wagon Road PnR via Thornton Crossroads,104L,Wagon Road / Denver Airport Limited,104L,bus,This Route Travels Eastbound & Westbound,39.848083,-104.672447,0,stopped_at,2021-02-10 20:45:10-07:00,34654,Denver Airport Station Gate 10,Vehicles Travelling East,39.847957,-104.672991,20:50:00,20:50:00,1502,1502,"(39.84808349609375, -104.67244720458984)","(39.847957, -104.67299100000001)",48.6
1,1613015131_1516,113611064,Denver Airport Station via Thornton Crossroads,104L,Wagon Road / Denver Airport Limited,104L,bus,This Route Travels Eastbound & Westbound,39.885216,-104.899986,88,in_transit_to,2021-02-10 20:45:05-07:00,33267,104th Ave & Brighton Rd,Vehicles Travelling East,39.885204,-104.889301,20:47:04,20:47:04,1516,1516,"(39.885215759277344, -104.89998626708984)","(39.885203999999995, -104.889301)",914.0
4,1613015131_1529,113624239,Arapahoe Stn,AT,Arapahoe County / Denver Airport,AT,bus,This Route Travels Northbound & Southbound,39.625648,-104.902901,156,in_transit_to,2021-02-10 20:44:50-07:00,19074,Arapahoe at Village Center Station Gate A,Vehicles Travelling Southeast,39.600876,-104.887187,20:52:00,20:52:00,1529,1529,"(39.625648498535156, -104.90290069580078)","(39.600876, -104.88718700000001)",3063.6
10,1613015131_1653,113630356,Free MallRide Civic Center,MALL,Free Mall Ride,MALLRIDE,bus,This Route Travels Civic Ctr & Union Stn,39.746582,-104.99398,94,in_transit_to,2021-02-10 20:45:01-07:00,22356,16th Street Mall & Stout St,Vehicles Travelling Southeast,39.745893,-104.993246,20:43:30,20:43:30,1653,1653,"(39.74658203125, -104.99398040771484)","(39.745893, -104.993246)",99.1
14,1613015131_1679,113630437,Free MallRide Union Station,MALL,Free Mall Ride,MALLRIDE,bus,This Route Travels Civic Ctr & Union Stn,39.751183,-105.0,315,in_transit_to,2021-02-10 20:45:11-07:00,25425,16th Street Mall & Wynkoop St,Vehicles Travelling Northwest,39.751815,-105.000764,20:45:54,20:45:54,1679,1679,"(39.751182556152344, -105.0)","(39.751815, -105.00076399999999)",96.0
15,1613015131_1681,113630357,Free MallRide Civic Center,MALL,Free Mall Ride,MALLRIDE,bus,This Route Travels Civic Ctr & Union Stn,39.754768,-105.0037,249,in_transit_to,2021-02-10 20:44:56-07:00,35194,16th St Mall & Wewatta St,Vehicles Travelling Southeast,39.752522,-105.001819,20:44:24,20:44:24,1681,1681,"(39.75476837158203, -105.00370025634766)","(39.752522, -105.001819)",297.0
18,1613015131_3605,113632634,Dtwn Boulder,BOLT,Boulder / Longmont,BOLT,bus,This Route Travels Northbound & Southbound,40.101215,-105.178413,214,in_transit_to,2021-02-10 20:44:57-07:00,24826,Hwy 119 & Monarch Rd,Vehicles Travelling Southwest,40.094024,-105.185684,20:39:02,20:39:02,3605,3605,"(40.10121536254883, -105.17841339111328)","(40.094024, -105.18568400000001)",1010.9
19,1613015131_3609,113624983,Union Station All-Station,FF1,Flatiron Flyer,FF1,bus,This Route Travels Eastbound & Westbound,39.769684,-104.993164,220,in_transit_to,2021-02-10 20:45:09-07:00,23955,Park Ave West & Wewatta St,Vehicles Travelling Southeast,39.759506,-104.994238,20:49:20,20:49:20,3609,3609,"(39.769683837890625, -104.9931640625)","(39.759506, -104.994238)",1133.8
22,1613015131_3615,113624981,Union Station All-Station,FF1,Flatiron Flyer,FF1,bus,This Route Travels Eastbound & Westbound,40.017384,-105.276436,186,stopped_at,2021-02-10 20:44:54-07:00,18977,Downtown Boulder Station Gate 1,Vehicles Travelling Southeast,40.017238,-105.276595,21:01:00,21:01:00,3615,3615,"(40.01738357543945, -105.27643585205078)","(40.017238, -105.276595)",21.1
23,1613015131_3618,113624085,Denver Airport via E-470,AB1,Boulder / Denver Airport,AB1,bus,This Route Travels Eastbound & Westbound,39.985767,-105.241852,90,in_transit_to,2021-02-10 20:45:06-07:00,21103,Table Mesa Dr & Tantra Dr,Vehicles Travelling East,39.985736,-105.24081,20:46:18,20:46:18,3618,3618,"(39.98576736450195, -105.24185180664062)","(39.985735999999996, -105.24081000000001)",89.1


# Calculating time between two times

In [199]:
rtd_df.timestamp[0].tz_localize(None)

Timestamp('2021-02-10 20:45:10')

In [198]:
pd.Timestamp(rtd_df.scheduled_arrival_time[0])

Timestamp('2021-02-10 20:50:00')

In [213]:
(rtd_df.timestamp[0].tz_localize(None) - pd.Timestamp(rtd_df.scheduled_arrival_time[0])).total_seconds()//60

-5.0

In [242]:
time_df = rtd_df[~(rtd_df.timestamp.isnull()) & ~(rtd_df.scheduled_arrival_time.isnull())]

In [243]:
time_diff = time_df['timestamp'].dt.tz_localize(None) - time_df['scheduled_arrival_time'].apply(lambda x: pd.Timestamp(x))
time_diff.apply(lambda x: x.total_seconds()/60)

0      -4.833333
1      -1.983333
4      -7.166667
10      1.516667
14     -0.716667
15      0.533333
18      5.916667
19     -4.183333
22    -16.100000
23     -1.200000
24     -3.916667
34     -1.950000
35     -6.083333
36      2.183333
38     -1.150000
43      4.583333
49      1.100000
50      3.550000
53      1.266667
54      1.150000
55     -0.883333
56     -1.683333
57     -0.116667
58     -1.250000
61     -0.183333
64      4.000000
65      0.350000
71      2.900000
72     -0.133333
73      2.283333
74      2.283333
75      1.166667
79      3.350000
80      6.066667
81      1.250000
82      0.883333
83      1.316667
85      1.600000
86      4.066667
87      3.000000
89      0.283333
91      1.616667
93     -1.283333
94      0.566667
95      3.000000
97      0.766667
98      2.600000
100     2.466667
101     1.616667
103    -1.516667
105     0.933333
106     0.133333
108    -6.033333
112    -0.283333
115     1.800000
117     3.066667
124    -0.016667
126    -5.816667
127     3.1333

# Calculating null values by vehicle ID

In [244]:
rtd_csv.stop_name.isnull().groupby(rtd_csv.vehicle_id).sum().astype(int).reset_index(name='count_nulls').sort_values('count_nulls', ascending=False)

Unnamed: 0,vehicle_id,count_nulls


In [20]:
rtd_df.head()

Unnamed: 0,entity_id,trip_id,trip_headsign,route_id,route_long_name,route_short_name,route_type,route_desc,vehicle_lat,vehicle_lng,bearing,status,timestamp,stop_id,stop_name,stop_desc,stop_lat,stop_lon,scheduled_arrival_time,scheduled_departure_time,vehicle_id,vehicle_label
0,1613069281_1504,113624249,Arapahoe Stn,AT,Arapahoe County / Denver Airport,AT,bus,This Route Travels Northbound & Southbound,39.657768,-104.846603,202,in_transit_to,2021-02-11 11:47:37-07:00,19074,Arapahoe at Village Center Station Gate A,Vehicles Travelling Southeast,39.600876,-104.887187,11:52:00,11:52:00,1504,1504
1,1613069281_1505,113624215,Denver Airport,AT,Arapahoe County / Denver Airport,AT,bus,This Route Travels Northbound & Southbound,39.828133,-104.782166,38,in_transit_to,2021-02-11 11:47:54-07:00,34651,Denver Airport Station Gate 7,Vehicles Travelling East,39.847919,-104.673639,11:45:00,11:45:00,1505,1505
2,1613069281_1509,113624080,Denver Airport via E-470,AB1,Boulder / Denver Airport,AB1,bus,This Route Travels Eastbound & Westbound,39.986767,-105.232735,121,stopped_at,2021-02-11 11:44:56-07:00,33855,US 36 & Table Mesa Station Gate B,Vehicles Travelling East,39.986706,-105.232717,11:44:00,11:52:00,1509,1509
3,1613069281_1533,113611087,Wagon Road PnR via Thornton Crossroads,104L,Wagon Road / Denver Airport Limited,104L,bus,This Route Travels Eastbound & Westbound,39.8853,-104.890984,270,in_transit_to,2021-02-11 11:47:09-07:00,13960,Thornton Crossroads & 104th Station Gate F,Vehicles Travelling South,39.882055,-104.942018,11:55:00,11:55:00,1533,1533
4,1613069281_1538,-1,,,,,,,40.016582,-105.249031,0,in_transit_to,2021-02-11 11:39:00-07:00,-1,,,,,,,1538,1538


In [47]:
calculate_distance(rtd_df.vehicle_lat, rtd_df.vehicle_lng, rtd_df.stop_lat, rtd_df.stop_lon)

([(39.65776824951172, -104.84660339355469),
  (39.82813262939453, -104.78216552734375),
  (39.98676681518555, -105.23273468017578),
  (39.88529968261719, -104.89098358154297),
  (40.01658248901367, -105.24903106689453),
  (39.885101318359375, -104.86481475830078),
  (39.77126693725586, -104.98419952392578),
  (39.77095031738281, -104.98432922363281),
  (39.75603485107422, -105.00364685058594),
  (39.74128341674805, -104.98693084716797),
  (39.74391555786133, -104.99057006835938),
  (39.76878356933594, -104.98560333251953),
  (39.748016357421875, -104.99581909179688),
  (39.75163269042969, -105.00080108642578),
  (39.745731353759766, -104.99283599853516),
  (39.75189971923828, -105.00111389160156),
  (39.74678421020508, -104.99473571777344),
  (40.035316467285156, -105.25830078125),
  (39.754615783691406, -105.00174713134766),
  (39.82848358154297, -104.9977798461914),
  (39.90409851074219, -105.08309936523438),
  (40.01738357543945, -105.27645111083984),
  (39.887168884277344, -105.072

In [14]:
rtd_df = pd.read_csv('/Users/joesanfilippo/Documents/dsi/repos/capstone_1/data/rtd_data_capture.csv')
rtd_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98078 entries, 0 to 98077
Data columns (total 22 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   entity_id                 98077 non-null  object 
 1   trip_id                   98077 non-null  float64
 2   trip_headsign             77471 non-null  object 
 3   route_id                  77471 non-null  object 
 4   route_long_name           77471 non-null  object 
 5   route_short_name          77471 non-null  object 
 6   route_type                77471 non-null  object 
 7   route_desc                77471 non-null  object 
 8   vehicle_lat               98077 non-null  float64
 9   vehicle_lng               98077 non-null  float64
 10  bearing                   98077 non-null  float64
 11  status                    98077 non-null  object 
 12  timestamp                 98077 non-null  object 
 13  stop_id                   98077 non-null  float64
 14  stop_n

In [15]:
rtd_df['distance_to_stop'] = calculate_distance(rtd_df.vehicle_lat, rtd_df.vehicle_lng, rtd_df.stop_lat, rtd_df.stop_lon)

In [54]:
# rtd_df.scheduled_departure_time.apply(lambda x: print(type(x)))
set([int(x[0:2]) if type(x)==str else x for x in rtd_df.scheduled_departure_time])

{nan, 4, 5, 6, 7, 8, 9, 10, 11, 24}

In [50]:
for __ in range(len(rtd_df.scheduled_departure_time)):
    print(f'{rtd_df.scheduled_departure_time[__]}: ')
    print(f'{pd.Timestamp(rtd_df.scheduled_departure_time[__])}')

nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
nan: 
NaT
24:01:40: 


ValueError: could not convert string to Timestamp

In [None]:
rtd_df['time_to_departure'] = calculate_time(pd.to_datetime(rtd_df.timestamp), rtd_df.scheduled_departure_time)