# Retrieve Data From Datamall

Data Retrieved from LTA Datamall:
- Bus Stop
- Bus Services
- Bus Routes
- Passenger Volume (Bus Stops)
- Passenger Volume (Origin Destination Bus Stops)
- Passenger Volume (Train Stations)
- Passenger Volume (Origin Destination Train Stations)

Some APIs such as GeospatialWholeIsland (to retrieve Cycling paths) do not work, have to download from LTA's website manually

### Loading Libraries and API Configurations

In [41]:
# General Data  Tools
import pandas as pd
import numpy as np
import os

# Utilities
from utility import retrieve_from_datamall

# Load API Token
from dotenv import load_dotenv
load_dotenv()
datamall_token = os.getenv('datamall_token')


### Bus Stop Data

In [2]:
df_bus_stops = retrieve_from_datamall(datamall_token, 'BusStops')

In [5]:
df_bus_stops

Unnamed: 0,BusStopCode,RoadName,Description,Latitude,Longitude
0,01012,Victoria St,Hotel Grand Pacific,1.296848,103.852536
1,01013,Victoria St,St. Joseph's Ch,1.297710,103.853225
2,01019,Victoria St,Bras Basah Cplx,1.296990,103.853022
3,01029,Nth Bridge Rd,Opp Natl Lib,1.296673,103.854414
4,01039,Nth Bridge Rd,Bugis Cube,1.298208,103.855491
...,...,...,...,...,...
63,99139,Changi Village Rd,Blk 5,1.388195,103.987234
64,99161,Nicoll Dr,Bef Changi Beach CP 3,1.390262,103.992957
65,99171,Nicoll Dr,Changi Beach CP 2,1.391128,103.991021
66,99181,Telok Paku Rd,Bef S'pore Aviation Ac,1.387754,103.988503


In [3]:
df_bus_stops[df_bus_stops.duplicated()]

Unnamed: 0,BusStopCode,RoadName,Description,Latitude,Longitude


In [4]:
df_bus_stops.dtypes

BusStopCode     object
RoadName        object
Description     object
Latitude       float64
Longitude      float64
dtype: object

In [6]:
print(df_bus_stops.shape)
print(df_bus_stops.isnull().sum())

(5068, 5)
BusStopCode    0
RoadName       0
Description    0
Latitude       0
Longitude      0
dtype: int64


In [7]:
df_bus_stops.to_csv("../data/bus_stops.csv", encoding="utf-8", index=False)

### Bus Services Data

In [44]:
df_bus_services = retrieve_from_datamall(datamall_token, 'BusServices')

In [20]:
df_bus_services[df_bus_services.duplicated()]

Unnamed: 0,ServiceNo,Operator,Direction,Category,OriginCode,DestinationCode,AM_Peak_Freq,AM_Offpeak_Freq,PM_Peak_Freq,PM_Offpeak_Freq,LoopDesc


In [45]:
df_bus_services

Unnamed: 0,ServiceNo,Operator,Direction,Category,OriginCode,DestinationCode,AM_Peak_Freq,AM_Offpeak_Freq,PM_Peak_Freq,PM_Offpeak_Freq,LoopDesc
0,118,GAS,1,TRUNK,65009,97009,5-08,8-12,8-10,09-14,
1,118,GAS,2,TRUNK,97009,65009,10-10,8-11,4-08,9-12,
2,118A,GAS,1,TRUNK,65199,96119,14-16,-,-,-,
3,118B,GAS,1,TRUNK,96111,65191,-,-,24-57,-,
4,119,GAS,1,TRUNK,65009,65009,09-13,12-18,12-15,15-17,Hougang St 21
...,...,...,...,...,...,...,...,...,...,...,...
195,981,TTS,2,TRUNK,47121,58009,-,-,10-10,10-10,
196,98A,TTS,1,TRUNK,28091,21179,-,-,08-16,07-19,
197,98B,TTS,1,TRUNK,28501,21099,06-51,10-10,-,-,
198,98M,TTS,1,TRUNK,28009,28009,-,17-18,-,12-17,Corporation Rd


In [22]:
df_bus_services.dtypes

ServiceNo          object
Operator           object
Direction           int64
Category           object
OriginCode         object
DestinationCode    object
AM_Peak_Freq       object
AM_Offpeak_Freq    object
PM_Peak_Freq       object
PM_Offpeak_Freq    object
LoopDesc           object
dtype: object

In [23]:
print(df_bus_services.shape)
print(df_bus_services.isnull().sum())

(700, 11)
ServiceNo          0
Operator           0
Direction          0
Category           0
OriginCode         0
DestinationCode    0
AM_Peak_Freq       0
AM_Offpeak_Freq    0
PM_Peak_Freq       0
PM_Offpeak_Freq    0
LoopDesc           0
dtype: int64


In [24]:
df_bus_services.to_csv("../data/bus_services.csv", encoding="utf-8", index=False)

### Bus Routes Data

In [13]:
df_bus_routes = retrieve_from_datamall(datamall_token, 'BusRoutes')

In [14]:
df_bus_routes[df_bus_routes.duplicated()]

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus


In [15]:
df_bus_routes

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus
0,10,SBST,1,1,75009,0.0,0500,2300,0500,2300,0500,2300
1,10,SBST,1,2,76059,0.6,0502,2302,0502,2302,0502,2302
2,10,SBST,1,3,76069,1.1,0504,2304,0504,2304,0503,2304
3,10,SBST,1,4,96289,2.3,0508,2308,0508,2309,0507,2308
4,10,SBST,1,5,96109,2.7,0509,2310,0509,2311,0508,2309
...,...,...,...,...,...,...,...,...,...,...,...,...
36,9A,SBST,1,17,76101,6.9,1851,2026,-,-,-,-
37,9A,SBST,1,18,76031,7.1,1852,2027,-,-,-,-
38,9A,SBST,1,19,76231,7.5,1854,2029,-,-,-,-
39,9A,SBST,1,20,76241,7.8,1856,2030,-,-,-,-


In [16]:
df_bus_routes.dtypes

ServiceNo        object
Operator         object
Direction         int64
StopSequence      int64
BusStopCode      object
Distance        float64
WD_FirstBus      object
WD_LastBus       object
SAT_FirstBus     object
SAT_LastBus      object
SUN_FirstBus     object
SUN_LastBus      object
dtype: object

In [17]:
print(df_bus_routes.shape)
print(df_bus_routes.isnull().sum())

(25041, 12)
ServiceNo       0
Operator        0
Direction       0
StopSequence    0
BusStopCode     0
Distance        0
WD_FirstBus     0
WD_LastBus      0
SAT_FirstBus    0
SAT_LastBus     0
SUN_FirstBus    0
SUN_LastBus     0
dtype: int64


In [18]:
df_bus_routes.to_csv("../data/bus_routes.csv", encoding="utf-8", index=False)

### Passenger Volume - Bus Stops

In [25]:
df_pv_bus_stops = retrieve_from_datamall(datamall_token, 'PV/Bus', payload={"Date": "202209"})

# URL Valid for 3 min only
df_pv_bus_stops.values

### Passenger Volume - Origin Destination Bus Stops

In [38]:
df_pv_od_bus_stops = retrieve_from_datamall(datamall_token, 'PV/ODBus', payload={"Date": "202209"})

# URL Valid for 3 min only
df_pv_od_bus_stops.values

### Passenger Volume - Train

In [30]:
df_pv_train = retrieve_from_datamall(datamall_token, 'PV/Train', payload={"Date": "202209"})

# URL Valid for 3 min only
df_pv_train.values

### Passenger Volume - Origin Destination Train

In [33]:
df_pv_od_train = retrieve_from_datamall(datamall_token, 'PV/ODTrain', payload={"Date": "202209"})


# URL Valid for 3 min only
df_pv_od_train.values