Data extraction phase
Location:  './siri_csv_data_new_4'

In [1]:
import xml.etree.ElementTree as ET
import csv
import os
import time
from datetime import datetime, timedelta
from bods_client.client import BODSClient
from bods_client.models import BoundingBox, SIRIVMParams

class Siri:

    def __init__(self, xml):
        self.xml = xml
        self.dict = self.parse(self.xml)

    def parse(self, raw_xml):
        try:
            tree = ET.ElementTree(ET.fromstring(raw_xml))
            root = tree.getroot()
            return self.parse_element(root)
        except ET.ParseError as e:
            print(f"ParseError: {e}")
            return {}

    def parse_element(self, root):
        data = {}
        ns = {'n': 'http://www.siri.org.uk/siri'}

        # Mapping for required headers
        mapping = {
            "Version": ".//n:Version",
            "ResponseTimestamp (ServiceDelivery)": ".//n:ServiceDelivery/n:ResponseTimestamp",
            "ProducerRef": ".//n:ServiceDelivery/n:ProducerRef",
            "ResponseTimestamp (VehicleMonitoringDelivery)": ".//n:VehicleMonitoringDelivery/n:ResponseTimestamp",
            "RequestMessageRef": ".//n:VehicleMonitoringDelivery/n:RequestMessageRef",
            "ValidUntil": ".//n:VehicleMonitoringDelivery/n:ValidUntil",
            "ShortestPossibleCycle": ".//n:VehicleMonitoringDelivery/n:ShortestPossibleCycle",
            "RecordedAtTime": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:RecordedAtTime",
            "ItemIdentifier": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:ItemIdentifier",
            "ValidUntilTime": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:ValidUntilTime",
            "LineRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:LineRef",
            "DirectionRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:DirectionRef",
            "DataFrameRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:DataFrameRef",
            "DatedVehicleJourneyRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:FramedVehicleJourneyRef/n:DatedVehicleJourneyRef",
            "PublishedLineName": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:PublishedLineName",
            "OperatorRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:OperatorRef",
            "OriginRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:OriginRef",
            "OriginName": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:OriginName",
            "DestinationRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:DestinationRef",
            "DestinationName": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:DestinationName",
            "OriginAimedDepartureTime": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:OriginAimedDepartureTime",
            "Longitude": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:VehicleLocation/n:Longitude",
            "Latitude": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:VehicleLocation/n:Latitude",
            "Bearing": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:Bearing",
            "VehicleRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:VehicleRef",
            "BlockRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:BlockRef",
            "DriverRef": ".//n:VehicleMonitoringDelivery/n:VehicleActivity/n:MonitoredVehicleJourney/n:DriverRef"
        }

        for key, path in mapping.items():
            element = root.find(path, ns)
            data[key] = element.text if element is not None else ""

        return data

    def to_csv(self, file_path, data_list):
        if not data_list:
            return

        # Define the header order
        header = ["Version", "ResponseTimestamp (ServiceDelivery)", "ProducerRef",
                  "ResponseTimestamp (VehicleMonitoringDelivery)", "RequestMessageRef",
                  "ValidUntil", "ShortestPossibleCycle", "RecordedAtTime", "ItemIdentifier",
                  "ValidUntilTime", "LineRef", "DirectionRef", "DataFrameRef",
                  "DatedVehicleJourneyRef", "PublishedLineName", "OperatorRef", "OriginRef",
                  "OriginName", "DestinationRef", "DestinationName", "OriginAimedDepartureTime",
                  "Longitude", "Latitude", "Bearing", "VehicleRef", "BlockRef", "DriverRef"]

        with open(file_path, 'w', newline='') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(header)  # Write the header
            for data in data_list:
                row = [data.get(key, '') for key in header]  # Order data according to header
                csvwriter.writerow(row)

# Set this to your API key, either save to an environment variable or put in plain text
API_KEY = '27ae05d04cc3d92c53e5f516a5be5dcbf9ba3bad'

bods = BODSClient(api_key=API_KEY)

#  briar rd to victoria 52.94389885658099, -1.2790324517682254, 53.21242553866858, -1.2701060605614256 53.20831336525762, -0.9597423047557652 52.93323311824804, -0.9437763876450186
# Same bounding box as in other examples
box = BoundingBox(min_longitude=-1.2790324517682254, min_latitude=52.93323311824804, max_longitude=-0.9437763876450186, max_latitude=53.21242553866858) #briar rd to victoria, also queen street to harviston
siri_params = SIRIVMParams(bounding_box=box)

# Calculate end time two weeks from now
end_time = datetime.now() + timedelta(weeks=2)

# Directory to save CSV files
csv_dir = 'siri_csv_data_new_5'
os.makedirs(csv_dir, exist_ok=True)

# Time interval to collect data (every 15 seconds)
interval = 5  # 15 seconds

# Number of intervals per hour (3600 seconds / 15 seconds)
batch_size = 1800 // interval
batch_counter = 0

while datetime.now() < end_time:
    batch_counter += 1
    batch_file_path = os.path.join(csv_dir, f'batch_5_{batch_counter}.csv')
    collected_data = []

    start_time = time.time()

    for _ in range(batch_size):
        try:
            data = bods.get_siri_vm_data_feed(params=siri_params)
            if data:
                siri = Siri(data)
                collected_data.append(siri.dict)
                print("Data Appended")
            else:
                print("No data received")
        except Exception as e:
            print(f"Error fetching data: {e}")
        
        time.sleep(interval - ((time.time() - start_time) % interval))  # Ensures exactly 15 seconds between calls

    # Save collected data to CSV
    if collected_data:
        siri.to_csv(batch_file_path, collected_data)

    print(f'Batch {batch_counter} saved to {batch_file_path}')


Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data Appended
Data A