# API connection

https://medium.com/@bhaveshpatelaus/gtfs-realtime-vehicle-positions-using-python-and-databricks-tfnsw-a33b98f22e97

In [1]:
import os
from dotenv import load_dotenv
import requests
from pathlib import Path
import zipfile
import pandas as pd

from google.transit import gtfs_realtime_pb2
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import MessageToDict
from google.protobuf.json_format import MessageToJson

from protobuf_to_dict import protobuf_to_dict

from data import data

load_dotenv()

True

In [3]:
FILENAME_SCHEDULE = 'gtfs.zip'

In [4]:
app_name = os.getenv("APP_NAME")
api_key = os.getenv("API_KEY")

In [5]:
BASE_URL = "https://api.transport.nsw.gov.au"
BUS_POSITION_URI = f"{BASE_URL}/v1/gtfs/vehiclepos/buses"
BUS_SCHEDULE_URI = f"{BASE_URL}/v1/gtfs/schedule/buses"
FERRY_POSITION = f"{BASE_URL}/v1/gtfs/historical"

In [6]:
headers = {
    "Authorization": f"apikey {api_key}"
}
request_details = dict(
    headers=headers,
    stream=True
)
# On our network, we need to add a certificate or the request will fail
# Look at the readme for instructions on how to set this up
if cert:=os.getenv("CERT", None):
    request_details['verify'] = cert

## Extract schedules

In [11]:
response = requests.get(BUS_SCHEDULE_URI, **request_details)
response

<Response [200]>

In [8]:
zip_path = Path(data.path / FILENAME_SCHEDULE)

In [13]:
with open(zip_path, "wb") as f:
    f.write(response.content)

In [7]:
with zipfile.ZipFile(zip_path) as zip:
    print(zip.namelist())

['agency.txt', 'calendar.txt', 'calendar_dates.txt', 'notes.txt', 'routes.txt', 'shapes.txt', 'stops.txt', 'stop_times.txt', 'trips.txt']


In [9]:
with zipfile.ZipFile(zip_path) as zip:
    for name in zip.namelist():
        with zip.open(name) as f:
            df = pd.read_csv(f)
            print(name)
            print(df.columns)

agency.txt
Index(['agency_id', 'agency_name', 'agency_url', 'agency_timezone',
       'agency_lang', 'agency_phone'],
      dtype='object')
calendar.txt
Index(['service_id', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday',
       'saturday', 'sunday', 'start_date', 'end_date'],
      dtype='object')
calendar_dates.txt
Index(['service_id', 'date', 'exception_type'], dtype='object')
notes.txt
Index(['note_id', 'note_text'], dtype='object')
routes.txt
Index(['route_id', 'agency_id', 'route_short_name', 'route_long_name',
       'route_desc', 'route_type', 'route_color', 'route_text_color'],
      dtype='object')
shapes.txt
Index(['shape_id', 'shape_pt_lat', 'shape_pt_lon', 'shape_pt_sequence',
       'shape_dist_traveled'],
      dtype='object')
stops.txt
Index(['stop_id', 'stop_name', 'stop_lat', 'stop_lon', 'location_type',
       'parent_station', 'wheelchair_boarding', 'platform_code'],
      dtype='object')
stop_times.txt
Index(['trip_id', 'arrival_time', 'departure_time', 'st

In [23]:
with zipfile.ZipFile(zip_path) as z:
   with z.open("calendar.txt") as f:
      calendar = pd.read_csv(f)
      print(calendar.head())    # print the first 5 rows

   service_id  monday  tuesday  wednesday  thursday  friday  saturday  sunday  \
0           1       0        0          0         0       0         1       0   
1           2       1        1          1         1       1         0       1   
2           3       0        1          0         0       0         0       0   
3           4       0        0          0         0       1         0       0   
4           5       1        1          1         1       1         0       0   

   start_date  end_date  
0    20230909  20240727  
1    20230910  20231231  
2    20230912  20230919  
3    20230908  20231229  
4    20230911  20231229  


In [26]:
with zipfile.ZipFile(zip_path) as z:
   with z.open("trips.txt") as f:
      trips = pd.read_csv(f)
      print(trips.head())    # print the first 5 rows

    route_id  service_id  trip_id  shape_id     trip_headsign  direction_id  \
0  2447_S886         294  1000095     79361     Mount View HS             1   
1  2447_S871         294  1000097     79367     Mount View HS             1   
2  2447_S879         294  1000103     79353       Cessnock PS             1   
3  2454_8615         294  1000377     44723  Blaxland Station             0   
4  2454_8311         294  1000381     44720  Warrimoo Primary             1   

   block_id  wheelchair_accessible  trip_note  \
0       NaN                      2        NaN   
1       NaN                      2        NaN   
2       NaN                      2        NaN   
3       NaN                      2        NaN   
4       NaN                      2        NaN   

                                     route_direction  
0               Pelton to Mount View HS via Ellalong  
1  Middle Rd after Dunlop Dr to Mount View HS via...  
2  Millfield Rd opp Irwin Cr to Cessnock PS via A...  
3         

In [27]:
# Check for uniqueness
trips[trips[['route_id', 'trip_id']].duplicated(keep=False)]

Unnamed: 0,route_id,service_id,trip_id,shape_id,trip_headsign,direction_id,block_id,wheelchair_accessible,trip_note,route_direction


In [21]:
# This trip hits the same stop twice within the same minute
# Would be an interesting case to map using the model
df[df.trip_id == 1948209].head(20)

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint,stop_note
2261020,1948209,15:10:00,15:10:00,2165100,1,,0,0,0,1,
2261021,1948209,15:11:00,15:11:00,2165101,2,,0,0,219,0,
2261022,1948209,15:15:00,15:15:00,2165138,3,,0,0,854,1,
2261023,1948209,15:17:00,15:17:00,2165139,4,,0,0,1168,0,
2261024,1948209,15:18:00,15:18:00,2165140,5,,0,0,1345,0,
2261025,1948209,15:18:00,15:18:00,2165207,6,,0,0,1448,1,
2261026,1948209,15:18:00,15:18:00,2165140,7,,0,0,1567,0,
2261027,1948209,15:19:00,15:19:00,2165141,8,,0,0,1704,0,
2261028,1948209,15:19:00,15:19:00,2165142,9,,0,0,1961,0,
2261029,1948209,15:20:00,15:20:00,216563,10,,0,0,2108,0,


## Realtime locations

In [7]:
from data import realtime

In [8]:
positions = realtime.get_latest_positions()

In [9]:
df = realtime.get_positions_dataframe(positions)
df.head()

Unnamed: 0,id,trip_id,route_id,schedule_relationship,lat,lon,bearing,speed,timestamp,congestion_level,stop_id,vehicle_id,label,request_timestamp
0,34456_337859_2456_55C_1,1865390,2456_55C,0,-34.424908,150.892899,299.0,5.6,1694648127,4,,34456_337859_2456_55C_1,,1694648131
1,43264_206648993_2508_161_1,1950206,2508_161,0,-33.800179,151.285797,317.0,0.0,1694648041,0,,43264_206648993_2508_161_1,,1694648131
2,33553_26249597_2436_661_1,1039121,2436_661,0,-33.816727,151.002487,104.0,0.0,1694648118,1,,33553_26249597_2436_661_1,,1694648131
3,43280_230178013_2509_373X_1,1833783,2509_373X,0,-33.886799,151.219482,313.0,0.0,1694648122,1,,43280_230178013_2509_373X_1,,1694648131
4,33553_26251340_2436_611_1,1938691,2436_611,0,-33.771156,150.938629,57.0,3.2,1694648126,1,,33553_26251340_2436_611_1,,1694648131


In [10]:
from visualisations import maps
maps.position_map(df.rename(columns={'latitude': 'lat', 'longitude': 'lon'}))

In [11]:
realtime.upload_realtime(df)