<h2> Steps for extracting bus routes </h2>
<ul>
<li> Get the choices(bus numbers) from the home page
<li> Get the routes from the responses
<li> Iteratively make requests to get the bus route
<li> Store the results in CSV
</ul>

In [176]:
import requests
import bs4
import pandas as pd
from tqdm import tqdm

<h3>Get the choices(bus numbers) from the home page</h3>

In [171]:
def get_all_bus_numbers():
    base_url = ('http://www.transitlink.com.sg'\
                '/eservice/eguide/service_idx.php')
    res = requests.get(base_url)
    soup = bs4.BeautifulSoup(res.content)
    services = soup.find_all("dl",{'class':'eguide'})
    service_buses = {}
    for s in services:
        service_name = s.find_all("dt")[0].text
        options = s.find_all("option")
        bus_list = []
        [bus_list.append(bus.text) 
        for bus in options]
        bus_list.pop(0)
        service_buses[service_name]=bus_list
    return service_buses

<h3>Get the routes from the responses
</h3>

In [184]:
def get_bus_routes(bus_number,operator):
    request_url = ('http://www.transitlink.com.sg'\
                   '/eservice/eguide/service_route.php')
    data = {"service":bus_number,"B1":"submit"}
    res = requests.post(request_url,data=data)
    route = bs4.BeautifulSoup(res.content)
    table = route.find_all("section",{"class":"eguide-table"})[1]
    table_rows=table.find_all("tr")
    table_data=table.find_all("td")
    route = []
    road = ""
    bus_stop_name = ""
    bus_stop_no = ""
    bus_stop_value = ""
    distance = ""
    data_flag=0
    for data in table_data[4:]:
        if ("class" in data.attrs.keys() and 
            ("subhead2") in data.attrs["class"][0] and 
            ("route") not in data.attrs["class"]):
            continue
        elif ("class" in data.attrs.keys() and 
              ("subhead2") in data.attrs["class"] and 
              ("route") in data.attrs["class"]):
            road = data.text.replace("•","").strip()
        elif ("class" in data.attrs.keys() and 
              ("route") in data.attrs["class"]):
            bus_stop_name=data.text.replace("•","").strip()
            if bus_stop_name!="":
                #The last row of the table also contains the same tags
                #so checking if there is indeed a bus stop name
                data_flag=1
        elif data.find("input") is not None:
            bus_stop_value = data.find("input").attrs["value"]
            bus_stop_no = data.text.strip()
        else:
            distance = data.text
        if data_flag==1:
            route.append({
                "road":road,
                "bus_stop_name":bus_stop_name,
                "bus_stop_value":bus_stop_value,
                "bus_stop_no":bus_stop_no,
                "distance":distance,
                "bus_number":bus_number,
                "operator":operator
                })
            data_flag=0
    return route

<h3>Iteratively make requests to get the bus route</h3>

In [185]:
all_routes = []
bus_numbers = get_all_bus_numbers()
for k,v in tqdm(bus_numbers.items()):
    [ all_routes.extend(get_bus_routes(bus,k))
     for bus in v]



 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))
100%|██████████| 4/4 [01:49<00:00, 30.58s/it]


<h3>Store the results in CSV</h3>

In [188]:
bus_routes = pd.DataFrame(all_routes)

In [189]:
bus_routes.head(3)

Unnamed: 0,bus_number,bus_stop_name,bus_stop_no,bus_stop_value,distance,operator,road
0,4,Tampines Int,75009,6822,0.0,SBS Transit,Tampines Ctrl 1
1,4,Opp Tampines Reg Lib,76191,4546,0.5,SBS Transit,Tampines Ave 7
2,4,Blk 423,76201,4548,0.8,SBS Transit,Tampines Ave 7


In [190]:
bus_routes.to_csv("sg_bus_routes.csv",index=False)