In [None]:
!pip install BODSDataExtractor

In [None]:
import os
import pandas as pd
from BODSDataExtractor.extractor import TimetableExtractor

## Insert your API Key from https://data.bus-data.dft.gov.uk/


In [None]:
api = os.environ.get('bods_api_key')


## Use TimetableExtractor to get all info on bus service lines

3 levels of granularity for the data:
 - Metadata (dataset info)
 - Service Level (bus service info)
 - Stop Level (timetable)
 

In [None]:
bods_data = TimetableExtractor(api_key=api
                               ,service_line_level=True
                               ,stop_level=False
                               ,limit = 5)

#### View the metadata for all service lines

This is the same as the data that can be seen on dataset info pages in the BODS UI. E.g. https://data.bus-data.dft.gov.uk/timetable/dataset/322/

In [None]:
metadata = bods_data.metadata
print(metadata.head())

#### View the service line level info using "bods_data.service_line_extract"

In [None]:
service_line_info = bods_data.service_line_extract

print(service_line_info.head())

#### Count the number of unique operators who have published data to BODS using the count_operators() method.

In [None]:
bods_data.count_operators()

## Extract a Timetable

The code cell below extracts and parses timetable data for a single dataset. It takes the TransXChange data from each file, and produces a pandas dataframe for each inbound and outbound journey associated with a file.

In [None]:
timetable_ = TimetableExtractor(api_key=api # Your API Key Here
                                 ,limit=1 # How many datasets to view
                                 ,status = 'published' # Only view published datasets
                                 ,service_line_level=True # True if you require Service line data 
                                 ,stop_level=True # True if you require stop level data
                               )

Use the code below to view the first row of the 'stop level extract' dataframe. This will contain the service level data as mentioned above, but will also hold a dataframe for each inbound and outbound timetable for each file.

Take the first row of the stop level extract, this represents the first file in the dataset:

In [None]:
first_file = timetable_.stop_level_extract.iloc[0]
print(first_file)

As you can see, this variable now contains some useful information about the file and inbound and outbound timetables. Lets look at the outbound timetable:

In [None]:
outbound_timetable = first_file['collated_timetable_outbound']
print(outbound_timetable)

You can save all of the timetables for the data you have queried with the function below. This will save to the current working directory.

In [None]:
timetable_.save_timetables()