In [None]:
# Install the required packages
%pip install geopandas pandas folium requests tqdm matplotlib xlrd

In [3]:
# Importing libraries
import geopandas
import re
import pandas as pd
import folium
import requests
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
import os
import xlrd
pd.options.mode.chained_assignment = None

### The code below will gather bus/MRT geospatial data needed for our project by making API Calls to LTA DataMall

In [6]:
# Set an environmental variable named LTA_Key to store your API key for the LTA DataMall
# Query the LTA DataMall API for the bus routes data
all_data = []

skip = 0

resource_url = "https://datamall2.mytransport.sg/ltaodataservice/BusRoutes"
headers = {
    'AccountKey': os.getenv("LTA_Key"),
    'accept': "application/json"
}

# API only allows 500 records to be retrieved at a time, hence a while loop is used to retrieve all records
while True:
    url = f"{resource_url}?$skip={skip}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        break
    
    res_list = response.json()
    data = res_list.get('value', [])
    
    if not data:
        break
    
    all_data.extend(data)
    skip += 500

# Convert the collected data to a DataFrame
df = pd.json_normalize(all_data)

# Save the DataFrame to a CSV file
df.to_csv('data/bus_routes.csv', index=False)

In [7]:
# Query the LTA DataMall API for the bus stops data (Can be merged with bus_routes data)
all_data1 = []

skip1 = 0

resource_url1 = "https://datamall2.mytransport.sg/ltaodataservice/BusStops"
headers = {
    'AccountKey': os.getenv("LTA_Key"),
    'accept': "application/json"
}

# API only allows 500 records to be retrieved at a time, hence a while loop is used to retrieve all records
while True:
    url = f"{resource_url1}?$skip={skip1}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        break
    
    res_list = response.json()
    data = res_list.get('value', [])
    
    if not data:
        break
    
    all_data1.extend(data)
    skip1 += 500

# Convert the collected data to a DataFrame
df1 = pd.json_normalize(all_data1)

# Save the DataFrame to a CSV file
df1.to_csv('data/bus_stops.csv', index=False)

In [8]:
# Query the LTA DataMall API for the passenger volume by bus stop
all_data2 = []

skip2 = 0

resource_url2 = "https://datamall2.mytransport.sg/ltaodataservice/PV/Bus"
headers = {
    'AccountKey': os.getenv("LTA_Key"),
    'accept': "application/json"
}

# API only allows 500 records to be retrieved at a time, hence a while loop is used to retrieve all records
while True:
    url = f"{resource_url2}?$skip={skip2}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        break
    
    res_list = response.json()
    data = res_list.get('value', [])
    
    if not data:
        break
    
    all_data2.extend(data)
    skip2 += 500

# Convert the collected data to a DataFrame
df2 = pd.json_normalize(all_data2)

# Save the DataFrame to a CSV file
df2.to_csv('data/passenger_vol_bus_stops.csv', index=False)

# Note that for this API Call, the output is a hyperlink where you will need to download the files manually (File Name: transport_node_bus_202408.csv)


In [9]:
# Query the LTA DataMall API for the passenger volume by origin-destination bus stop
all_data3 = []

skip3 = 0

resource_url3 = "https://datamall2.mytransport.sg/ltaodataservice/PV/ODBus"
headers = {
    'AccountKey': os.getenv("LTA_Key"),
    'accept': "application/json"
}

# API only allows 500 records to be retrieved at a time, hence a while loop is used to retrieve all records
while True:
    url = f"{resource_url3}?$skip={skip3}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        break
    
    res_list = response.json()
    data = res_list.get('value', [])
    
    if not data:
        break
    
    all_data3.extend(data)
    skip3 += 500

# Convert the collected data to a DataFrame
df3 = pd.json_normalize(all_data3)

# Save the DataFrame to a CSV file
df3.to_csv('data/passenger_vol_OD_bus_stops.csv', index=False)

# Note that for this API Call, the output is a hyperlink where you will need to download the files manually (File Name: origin_destination_bus_202408)

In [10]:
# Query the LTA DataMall API for the passenger volume by train station
all_data4 = []

skip4 = 0

resource_url4 = "https://datamall2.mytransport.sg/ltaodataservice/PV/Train"
headers = {
    'AccountKey': os.getenv("LTA_Key"),
    'accept': "application/json"
}

# API only allows 500 records to be retrieved at a time, hence a while loop is used to retrieve all records
while True:
    url = f"{resource_url4}?$skip={skip4}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        break
    
    res_list = response.json()
    data = res_list.get('value', [])
    
    if not data:
        break
    
    all_data4.extend(data)
    skip4 += 500

# Convert the collected data to a DataFrame
df4 = pd.json_normalize(all_data4)

# Save the DataFrame to a CSV file
df4.to_csv('data/passenger_vol_train.csv', index=False)

# Note that for this API Call, the output is a hyperlink where you will need to download the files manually (File Name: transport_node_train_202408.csv)

In [11]:
# Query the LTA DataMall API for the passenger volume by origin-destination MRT Station
all_data5 = []

skip5 = 0

resource_url5 = "https://datamall2.mytransport.sg/ltaodataservice/PV/ODTrain"
headers = {
    'AccountKey': os.getenv("LTA_Key"),
    'accept': "application/json"
}

# API only allows 500 records to be retrieved at a time, hence a while loop is used to retrieve all records
while True:
    url = f"{resource_url5}?$skip={skip5}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        break
    
    res_list = response.json()
    data = res_list.get('value', [])
    
    if not data:
        break
    
    all_data5.extend(data)
    skip5 += 500

# Convert the collected data to a DataFrame
df5 = pd.json_normalize(all_data5)

# Save the DataFrame to a CSV file
df5.to_csv('data/passenger_vol_OD_MRT_stations.csv', index=False)

# Note that for this API Call, the output is a hyperlink where you will need to download the files manually (File Name: origin_destination_train_202408.csv)

### Below is our code to read in the datasets obtained from above

In [14]:
bus_stops = pd.read_csv('data/bus_stops.csv') # All Bus Stops in Singapore Long and Lat data 
bus_routes = pd.read_csv('data/bus_routes.csv') # Bus routes for each bus service (Can be merged with bus_stops data)

passenger_vol_bus_stops = pd.read_csv('data/transport_node_bus_202408.csv') # Passenger volume for each bus stop
passenger_vol_OD_bus_stops = pd.read_csv('data/origin_destination_bus_202408.csv') # Passenger volume for each origin-destination bus stop

passenger_vol_train = pd.read_csv('data/transport_node_train_202408.csv') # Passenger volume for each train station
passenger_vol_OD_MRT_stations = pd.read_csv('data/origin_destination_train_202408.csv') # Passenger volume for each origin-destination train station

train_station_code = pd.read_excel('data/Train Station Codes and Chinese Names.xls') # Train station code for each train station