# 06: Get realtime timestamp

We later need to check, which was the latest status of the PT rides, to get the most accurate arrival and departure information. To do so, we need to extract the timestamp from the filename

Note: Notebook was performed on workstation

In [1]:
# import libraries
import pandas as pd
import numpy as np
import sklearn
from datetime import datetime
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
import shapely
import hashlib

In [2]:
data_month_double = '07'

In [3]:
# load data from last notebook
realtime = pd.read_csv('../Data/05_vrs_all_realtime.csv')

In [4]:
realtime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75056352 entries, 0 to 75056351
Data columns (total 8 columns):
 #   Column                 Dtype 
---  ------                 ----- 
 0   trip_id                object
 1   start_date             int64 
 2   schedule_relationship  int64 
 3   route_id               int64 
 4   stop_id                int64 
 5   stop_arrival_time      object
 6   stop_departure_time    object
 7   filename               object
dtypes: int64(4), object(4)
memory usage: 4.5+ GB


In [5]:
# cut the filename
realtime['filename'] = realtime['filename'].str.split('-', n=1).str[1]

In [6]:
realtime.head()

Unnamed: 0,trip_id,start_date,schedule_relationship,route_id,stop_id,stop_arrival_time,stop_departure_time,filename
0,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,687,0,2023-07-01 01:35:00,000308-UTC_gtfs-realtime-scraper_vrs.csv
1,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,692,2023-07-01 01:40:30,2023-07-01 01:40:45,000308-UTC_gtfs-realtime-scraper_vrs.csv
2,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,691,2023-07-01 01:41:45,2023-07-01 01:42:00,000308-UTC_gtfs-realtime-scraper_vrs.csv
3,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,690,2023-07-01 01:42:45,2023-07-01 01:43:00,000308-UTC_gtfs-realtime-scraper_vrs.csv
4,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,698,2023-07-01 01:44:00,2023-07-01 01:44:15,000308-UTC_gtfs-realtime-scraper_vrs.csv


In [7]:
# remove the last part of the filename
realtime['filename'] = realtime['filename'].str.replace('-UTC_gtfs-realtime-scraper_vrs.csv', '')

In [8]:
realtime.head()

Unnamed: 0,trip_id,start_date,schedule_relationship,route_id,stop_id,stop_arrival_time,stop_departure_time,filename
0,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,687,0,2023-07-01 01:35:00,308
1,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,692,2023-07-01 01:40:30,2023-07-01 01:40:45,308
2,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,691,2023-07-01 01:41:45,2023-07-01 01:42:00,308
3,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,690,2023-07-01 01:42:45,2023-07-01 01:43:00,308
4,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,698,2023-07-01 01:44:00,2023-07-01 01:44:15,308


In [9]:
# rename colum to timestamp
realtime.rename(columns={'filename': 'vrs_timestamp'}, inplace=True)

In [10]:
# export
realtime.to_csv('../Data/06_VRS_with_timestamp.csv', index=False)

In [11]:
realtime

Unnamed: 0,trip_id,start_date,schedule_relationship,route_id,stop_id,stop_arrival_time,stop_departure_time,vrs_timestamp
0,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,687,0,2023-07-01 01:35:00,000308
1,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,692,2023-07-01 01:40:30,2023-07-01 01:40:45,000308
2,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,691,2023-07-01 01:41:45,2023-07-01 01:42:00,000308
3,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,690,2023-07-01 01:42:45,2023-07-01 01:43:00,000308
4,6870009-687-006-687.2.24:253500-42-1_FA880471-...,20230630,0,687,698,2023-07-01 01:44:00,2023-07-01 01:44:15,000308
...,...,...,...,...,...,...,...,...
75056347,1918-16-001-343.1.12:243300-37-260_608FDFB6-AD...,20230731,0,16,694,2023-08-01 01:31:00,2023-08-01 01:31:00,234305
75056348,1918-16-001-343.1.12:243300-37-260_608FDFB6-AD...,20230731,0,16,693,2023-08-01 01:33:00,2023-08-01 01:33:00,234305
75056349,1918-16-001-343.1.12:243300-37-260_608FDFB6-AD...,20230731,0,16,43,2023-08-01 01:35:00,2023-08-01 01:35:00,234305
75056350,1918-16-001-343.1.12:243300-37-260_608FDFB6-AD...,20230731,0,16,688,2023-08-01 01:36:00,2023-08-01 01:36:00,234305


: 