# Dependencies

In [6]:
# pip install gtfs-realtime-bindings pandas requests
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import MessageToDict
import pandas as pd
from requests import get
import time
from datetime import datetime
import psycopg2
from sqlalchemy import create_engine

In [5]:
def request_api_rapidkl(category, watermark):

    URL = f'https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category={category}'

    # Parse the GTFS Realtime feed
    feed = gtfs_realtime_pb2.FeedMessage()
    response = get(URL)
    feed.ParseFromString(response.content)

    # Extract and print vehicle position information
    vehicle_positions = [MessageToDict(entity.vehicle) for entity in feed.entity]
    df = pd.json_normalize(vehicle_positions)

    if df.empty:
        print(f'ERROR: Dataframe is empty - {watermark}')
    else:
        print(f'STATUS: Dataframe created - {watermark}')

    return df


def generate_rapidkl_data(category, requests_amt):
    dfs = []
    for _ in range(requests_amt):
        df_output = request_api_rapidkl(category, datetime.now())
        dfs.append(df_output)
        time.sleep(30)

    if all([x.empty for x in dfs]):
        print('ERROR: All dataframe(s) is empty. Failed to generate dataset')
    else:
        df_concat = pd.concat(dfs)
        return df_concat
    
df_fetch = generate_rapidkl_data('rapid-bus-kl', 5)

STATUS: Dataframe created - 2025-01-20 08:55:24.893962
STATUS: Dataframe created - 2025-01-20 08:55:55.241082
STATUS: Dataframe created - 2025-01-20 08:56:25.512366
STATUS: Dataframe created - 2025-01-20 08:56:55.797025
STATUS: Dataframe created - 2025-01-20 08:57:26.144785


In [22]:
df_fetch.columns

Index(['timestamp', 'trip.tripId', 'trip.startTime', 'trip.startDate',
       'trip.routeId', 'position.latitude', 'position.longitude',
       'position.bearing', 'position.speed', 'vehicle.id',
       'vehicle.licensePlate'],
      dtype='object')

In [26]:
def rename_col(df):
    return df.rename({
        'trip.tripId': 'trip_id',
        'trip.startTime': 'start_time',
        'trip.startDate': 'start_date',
        'trip.routeId': 'route_id',
        'position.latitude': 'latitude',
        'position.longitude': 'longitude',
        'position.bearing': 'bearing',
        'position.speed': 'speed',
        'vehicle.id': 'vehicle_id',
        'vehicle.licensePlate': 'license_plate'
        }, axis=1)

def convert_unixtime_to_standard(unixtime):
    return datetime.fromtimestamp(int(unixtime))

df_f = rename_col(df_fetch)
df_f['timestamp'] = df_f.apply(lambda x: convert_unixtime_to_standard(x['timestamp']), axis=1)
df_f.to_csv('rapid-kl-bus.csv', index=False)

# Connecting DB

In [43]:
def connect_db():
    conn = psycopg2.connect(
        host="localhost",
        database="postgres",
        user="postgres",
        port='54320',
        password="postgres")
    return conn

def query_db(query:str):
    """ 
    Use for CREATE, INSERT syntax
    """
    conn = connect_db()
    cur = conn.cursor()
    try:
        cur.execute(query)
        print('syntax executed')
        cur.close()
        print('connection close')
        conn.commit()
    except:
        conn.rollback()


def connect_db_v2():
    engine = create_engine('postgresql+psycopg2://postgres:postgres@localhost:54320/postgres')
    return engine

def fetch_db(query:str):
    """
    Use for SELECT syntax
    """
    engine = connect_db_v2()
    return pd.read_sql_query(query, con=engine)

In [8]:
fetch_db('SELECT * FROM dim_drivers')

Unnamed: 0,driver_id,plate_num
0,driver_0000,WVE5137
1,driver_0001,WVL602
2,driver_0002,WUW1483
3,driver_0003,VGG9462
4,driver_0004,WVB6987
...,...,...
205,driver_0205,SF2113398
206,driver_0206,VEN1373
207,driver_0207,WVE5068
208,driver_0208,WPC3856


In [44]:
# TESTING
# query_db("""
#             CREATE TABLE ammar_test (
#             name TEXT, 
#             PRIMARY KEY (name)
#         );
#         """)
# query_db("""
#         INSERT INTO ammar_test 
#             (name) 
#             VALUES ('jack')
#         ;
#         """)
# fetch_db("SELECT * FROM ammar_test")

syntax executed
connection close
syntax executed
connection close


Unnamed: 0,name
0,jack


# dim_busses

In [27]:
df_f

Unnamed: 0,timestamp,trip_id,start_time,start_date,route_id,latitude,longitude,bearing,speed,vehicle_id,license_plate
0,2025-01-19 22:43:05,weekend_S4030_S403002_9,22:43:05,20250119,S4030,3.054564,101.526080,164.0,15.74,WVA4806,WVA4806
1,2025-01-19 22:43:22,weekend_T7800_T780002_5,22:33:38,20250119,T7800,3.132654,101.600900,185.8,6.30,WUW4733,WUW4733
2,2025-01-19 22:43:38,weekend_U7830_U783002_8,22:31:30,20250119,U7830,3.074836,101.613510,343.1,17.22,WA1675G,WA1675G
3,2025-01-19 22:43:19,weekend_U1700_U170002_5,22:16:19,20250119,U1700,3.180183,101.692500,172.0,11.11,WVD4971,WVD4971
4,2025-01-19 22:43:37,weekend_U2200_U220001_15,22:17:08,20250119,U2200,3.166267,101.696160,3.0,20.37,WUY9577,WUY9577
...,...,...,...,...,...,...,...,...,...,...,...
61,2025-01-19 22:44:20,weekend_U7800_U780001_8,22:33:19,20250119,U7800,3.117043,101.672520,351.3,3.33,WQA647,WQA647
62,2025-01-19 22:44:06,weekend_T7810_T781002_5,22:31:07,20250119,T7810,3.103906,101.596890,346.5,45.56,WVA3045,WVA3045
63,2025-01-19 22:44:05,weekend_U2510_U251002_11,22:41:02,20250119,U2510,3.198363,101.754050,298.2,8.89,WNV5654,WNV5654
64,2025-01-19 22:44:22,weekend_U2500_U250002_16,22:40:42,20250119,U2500,3.213721,101.729866,24.0,4.26,W9423Q,W9423Q


In [28]:
df_f['vehicle_id'].unique()

array(['WVA4806', 'WUW4733', 'WA1675G', 'WVD4971', 'WUY9577', 'WUW4133',
       'WB2225C', 'WVN5644', 'SF2113398', 'WVB2592', 'WVG2342', 'WVJ8227',
       'WUW9721', 'WB6530S', 'WPV6941', 'W4415R', 'WVA4143', 'VGG9462',
       'WVE5041', 'W5174V', 'VGG9481', 'WVD4738', 'WVL659', 'WUW8429',
       'WC7313H', 'WUX5708', 'WUW8425', 'WUW8434', 'VFK4581', 'WVN5548',
       'CDH7526', 'VGJ1946', 'WQA7091', 'VEL9768', 'WVP2448', 'WVA4269',
       'VGK1677', 'W2758T', 'WUY4091', 'WVH9827', 'WQE2159', 'WVH9594',
       'WVG5306', 'WUT8493', 'W3607T', 'WPY7751', 'WB824L', 'WVA3151',
       'WB8743F', 'WVE3614', 'W7400S', 'WVD4166', 'WVE4812', 'WVA4813',
       'WPC7940', 'WVA3045', 'WNV5654', 'W9423Q', 'WPL3858', 'WVJ1049',
       'VGH4658', 'WUT8497', 'WB807U', 'CDH8332', 'WVB3950', 'WQA647',
       'WVC6647'], dtype=object)