# Prototype of API connection

https://medium.com/@bhaveshpatelaus/gtfs-realtime-vehicle-positions-using-python-and-databricks-tfnsw-a33b98f22e97

In [2]:
import os
from dotenv import load_dotenv
import requests
from pathlib import Path
import zipfile
import pandas as pd

from google.transit import gtfs_realtime_pb2
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import MessageToDict
from google.protobuf.json_format import MessageToJson

from collections import OrderedDict
from protobuf_to_dict import protobuf_to_dict

from data import data

load_dotenv()

True

In [3]:
FILENAME_SCHEDULE = 'gtfs.zip'

In [4]:
app_name = os.getenv("APP_NAME")
api_key = os.getenv("API_KEY")

In [5]:
BASE_URL = "https://api.transport.nsw.gov.au"
BUS_POSITION_URI = f"{BASE_URL}/v1/gtfs/vehiclepos/buses"
BUS_SCHEDULE_URI = f"{BASE_URL}/v1/gtfs/schedule/buses"
FERRY_POSITION = f"{BASE_URL}/v1/gtfs/historical"

In [6]:
headers = {
    "Authorization": f"apikey {api_key}"
}
request_details = dict(
    headers=headers,
    stream=True
)
if cert:=os.getenv("CERT", None) is not None:
    request_details['verify'] = cert

## Extract schedules

In [53]:
response = requests.get(BUS_SCHEDULE_URI, **request_details)
response

In [66]:
zip_path = Path(data.path / FILENAME_SCHEDULE)

In [62]:
with open(zip_path, "wb") as f:
    f.write(response.content)

In [68]:
with zipfile.ZipFile(zip_path) as zip:
    print(zip.namelist())

['agency.txt', 'calendar.txt', 'calendar_dates.txt', 'notes.txt', 'routes.txt', 'shapes.txt', 'stops.txt', 'stop_times.txt', 'trips.txt']


In [73]:
with zipfile.ZipFile(zip_path) as z:
   with z.open("shapes.txt") as f:
      train = pd.read_csv(f)
      print(train.head())    # print the first 5 rows

   shape_id  shape_pt_lat  shape_pt_lon  shape_pt_sequence  \
0       177    -33.418715    151.341307                  1   
1       177    -33.418708    151.341302                  2   
2       177    -33.418686    151.341128                  3   
3       177    -33.418567    151.340324                  4   
4       177    -33.418519    151.339836                  5   

   shape_dist_traveled  
0                    0  
1                    9  
2                   25  
3                  101  
4                  147  


## Realtime locations

In [76]:
response = requests.get(BUS_POSITION_URI, **request_details)
response

<Response [200]>

In [77]:
feed = gtfs_realtime_pb2.FeedMessage()
feed.ParseFromString(response.content)

94272

In [80]:
print(len(feed.entity))
feed.entity[0]

525


id: "43280_232209662_2509_379_1"
vehicle {
  trip {
    trip_id: "1778929"
    route_id: "2509_379"
    start_time: "22:46:00"
    start_date: "20230829"
    schedule_relationship: SCHEDULED
  }
  vehicle {
    id: "43280_232209662_2509_379_1"
  }
  position {
    latitude: -33.9046211
    longitude: 151.267029
    bearing: 117
    speed: 0
  }
  timestamp: 1693312898
  congestion_level: UNKNOWN_CONGESTION_LEVEL
  occupancy_status: MANY_SEATS_AVAILABLE
}

In [86]:
feed.ParseFromString(response.content)
#for entity in feed.entity:
#    if entity.HasField('vehicle'):
#        print(entity.vehicle)

dict_obj = protobuf_to_dict(feed)
dict_obj['entity'][0]

{'id': '43280_232209662_2509_379_1',
 'vehicle': {'trip': {'trip_id': '1778929',
   'start_time': '22:46:00',
   'start_date': '20230829',
   'schedule_relationship': 0,
   'route_id': '2509_379'},
  'position': {'latitude': -33.90462112426758,
   'longitude': 151.26702880859375,
   'bearing': 117.0,
   'speed': 0.0},
  'timestamp': 1693312898,
  'congestion_level': 0,
  'vehicle': {'id': '43280_232209662_2509_379_1'},
  'occupancy_status': 1}}

In [89]:
collector = []
counter=0
for block in dict_obj["entity"]:
    counter += 1
    row = OrderedDict()
    row["id"] = block["id"]
    trip = block["vehicle"]["trip"]
    row["trip_id"] = trip.get("trip_id","")
    row["route_id"] = trip.get("route_id","")
    row["schedule_relationship"] = trip.get("schedule_relationship","")
    if "position" in block['vehicle']:
        row["latitude"] = block["vehicle"]["position"].get("latitude","")
        row["longitude"] = block["vehicle"]["position"].get("longitude","")
    else:
        row['latitude'], row['longitude'] = None, None
    
    row["timestamp"] = block["vehicle"].get("timestamp","")
    row["congestion_level"] = block["vehicle"].get("congestion_level","")
    row["stop_id"] = block["vehicle"].get("stop_id","")
    row["vehicle_id"] = block["vehicle"]["vehicle"].get("id","")
    row["label"] = block["vehicle"]["vehicle"].get("label","")
    collector.append(row)
    df = pd.DataFrame(collector)

In [90]:
df

Unnamed: 0,id,trip_id,route_id,schedule_relationship,latitude,longitude,timestamp,congestion_level,stop_id,vehicle_id,label
0,43280_232209662_2509_379_1,1778929,2509_379,0,-33.904621,151.267029,1693312898,0,,43280_232209662_2509_379_1,
1,3936_249346_2453_28_1,1434298,2453_28,0,-33.432343,151.341812,1693313083,1,,3936_249346_2453_28_1,
2,43280_230177416_2509_304_1,1759340,2509_304,0,-33.878521,151.210327,1693313087,1,,43280_230177416_2509_304_1,
3,33553_26250035_2436_665_1,1822328,2436_665,0,-33.713531,150.934647,1693313090,0,,33553_26250035_2436_665_1,
4,43280_231821081_2509_313_1,1597194,2509_313,0,-33.911320,151.250305,1693313082,1,,43280_231821081_2509_313_1,
...,...,...,...,...,...,...,...,...,...,...,...
520,12534_135684910062_2452_64_1,1910420,2452_64,0,-33.506317,151.340454,1693313090,0,,12534_135684910062_2452_64_1,
521,12534_135684931262_2452_50/3_1,1910672,2452_50/3,0,-33.421642,151.328323,1693313091,0,,12534_135684931262_2452_50/3_1,
522,12534_135683020062_2452_37_1,998980,2452_37,0,-33.418213,151.361664,1693313084,0,,12534_135683020062_2452_37_1,
523,12534_135684920062_2452_50/3_1,1978968,2452_50/3,0,-33.517666,151.317383,1693313090,0,,12534_135684920062_2452_50/3_1,
