# Dependencies

In [7]:
# pip install gtfs-realtime-bindings pandas requests
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import MessageToDict
import pandas as pd
from requests import get
import time
import datetime
from datetime import datetime
import psycopg2
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv
load_dotenv()
import warnings
warnings.filterwarnings('ignore')

In [8]:
pd.__version__

'2.1.4'

In [9]:
import sqlalchemy

sqlalchemy.__version__

'2.0.37'

# Request API

In [50]:
def request_api_rapidkl(category, watermark):

    URL = f'https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category={category}'

    # Parse the GTFS Realtime feed
    feed = gtfs_realtime_pb2.FeedMessage()
    response = get(URL)
    feed.ParseFromString(response.content)

    # Extract and print vehicle position information
    vehicle_positions = [MessageToDict(entity.vehicle) for entity in feed.entity]
    df = pd.json_normalize(vehicle_positions)
    df['watermark'] = watermark

    if df.empty:
        print(f'ERROR: Dataframe is empty - {watermark}')
    else:
        print(f'STATUS: Dataframe created - {watermark}')

    return df


def generate_rapidkl_data(category, requests_amt):
    from datetime import datetime
    dfs = []
    for _ in range(requests_amt):
        df_output = request_api_rapidkl(category, datetime.now())
        dfs.append(df_output)
        time.sleep(30)

    if all([x.empty for x in dfs]):
        print('ERROR: All dataframe(s) is empty. Failed to generate dataset')
    else:   
        df_concat = pd.concat(dfs)
        return df_concat
    
df_fetch = generate_rapidkl_data('rapid-bus-kl', 5)

STATUS: Dataframe created - 2025-01-22 07:33:10.971586
STATUS: Dataframe created - 2025-01-22 07:33:41.447035
STATUS: Dataframe created - 2025-01-22 07:34:13.794443
STATUS: Dataframe created - 2025-01-22 07:34:44.064461
STATUS: Dataframe created - 2025-01-22 07:35:14.339291


In [63]:
def rename_col(df):
    return df.rename({
        'trip.tripId': 'trip_id',
        'trip.startTime': 'start_time',
        'trip.startDate': 'start_date',
        'trip.routeId': 'route_id',
        'position.latitude': 'latitude',
        'position.longitude': 'longitude',
        'position.bearing': 'bearing',
        'position.speed': 'speed',
        'vehicle.id': 'vehicle_id',
        'vehicle.licensePlate': 'license_plate'
        }, axis=1)

def convert_unixtime_to_standard(unixtime):
    return datetime.fromtimestamp(int(unixtime))

In [67]:
df_rapid = rename_col(df_fetch)
df_rapid['timestamp'] = df_fetch.apply(lambda x: 
                        convert_unixtime_to_standard(x['timestamp']), 
                        axis=1)
# df_rapid.to_csv('rapid-kl-bus_3.csv', index=False)

In [4]:
df_1 = pd.read_csv('rapid-kl-bus.csv')
df_2 = pd.read_csv('rapid-kl-bus_2.csv')
df_3 = pd.read_csv('rapid-kl-bus_3.csv')
df_merge = pd.concat([df_1, df_2, df_3])
df_merge.shape

(3850, 12)

In [58]:
# query_db('ALTER TABLE rapidkl.fact_daily_trip DROP COLUMN index')

# Connecting DB

In [5]:
load_dotenv(".env", override=True)
DB_HOST = os.getenv('DB_HOST')
DB_NAME = os.getenv('DB_NAME')
DB_PORT = os.getenv('DB_PORT')
DB_USERNAME = os.getenv('DB_USERNAME')
DB_PASSWORD = os.getenv('DB_PASSWORD')

print(DB_HOST, DB_NAME, DB_PORT, DB_USERNAME, DB_PASSWORD)

localhost rapidkl 54320 postgres postgres


In [19]:
def connect_db():
    try:
        conn = psycopg2.connect(
            host=DB_HOST,
            database=DB_NAME,
            port=DB_PORT,
            user=DB_USERNAME,
            password=DB_PASSWORD
        )
        print('STATUS: DB connection(1) succeed')
        return conn
    except Exception as e1:
        print("ERROR (1): ", e1)
        print('ERROR: DB connection(1) failed')

def query_db(query:str):
    """ 
    Use for CREATE, INSERT syntax
    """
    try: 
        conn = connect_db()
        cur = conn.cursor()
        cur.execute(query)
        cur.close()
        conn.commit()
        print('STATUS: Query succeed')

    except Exception as e2:
        conn.rollback()
        print("ERROR (2):", e2)
        print('ERROR: Query failed!')


def connect_db_v2():
    try:
        database = f'postgresql+psycopg2://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
        print(database)
        engine = create_engine(database)
        print('STATUS: DB connection(2) succeed')
        return engine
    except:
        print('ERROR: DB connection(2) failed')

def fetch_db(query:str):
    """
    Use for SELECT syntax
    """
    try:
        engine = connect_db()
        return pd.read_sql_query(query, con=engine)
    except Exception as e3:
        print("ERROR (3): ", e3)
connect_db()
connect_db_v2()

STATUS: DB connection(1) succeed
postgresql+psycopg2://postgres:postgres@localhost:54320/rapidkl
STATUS: DB connection(2) succeed


Engine(postgresql+psycopg2://postgres:***@localhost:54320/rapidkl)

In [24]:
query_db("""
    CREATE TABLE IF NOT EXISTS test_table (
        id TEXT
    )
    """)
query_db("""
    INSERT INTO test_table
    with source_data as (
    select 1 as id
    union all
    select null as id
)
select *
from source_data     
""")

STATUS: DB connection(1) succeed
STATUS: Query succeed
STATUS: DB connection(1) succeed
STATUS: Query succeed


In [21]:
fetch_db('SELECT * FROM dev.dim_busses')

STATUS: DB connection(1) succeed


Unnamed: 0,bus_id,bus_plates
0,1,BJR8233
1,2,BNG4014
2,3,CDH7526
3,4,CDH8296
4,5,CDH8332
...,...,...
485,486,WWC4624
486,487,WWC4681
487,488,WWC6423
488,489,WWD4612


In [4]:
# TESTING
# query_db("""
#             CREATE TABLE ammar_test (
#             name TEXT, 
#             PRIMARY KEY (name)
#         );
#         """)
# query_db("""
#         INSERT INTO ammar_test 
#             (name) 
#             VALUES ('jack')
#         ;
#         """)
# fetch_db("SELECT * FROM ammar_test")

# Database Schema

In [59]:
# query_db('CREATE SCHEMA rapidkl;')

In [11]:
df_merge.shape

(3850, 12)

# Tables

## fact_daily_trip

In [69]:
engine = connect_db_v2()
dir(engine)

postgresql+psycopg2://postgres:postgres@localhost:54320/rapidkl
STATUS: DB connection(2) succeed


['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_compiled_cache',
 '_connection_cls',
 '_echo',
 '_execute_clauseelement',
 '_execute_compiled',
 '_execute_default',
 '_execution_options',
 '_has_events',
 '_is_future',
 '_lru_size_alert',
 '_option_cls',
 '_optional_conn_ctx_manager',
 '_run_ddl_visitor',
 '_run_visitor',
 '_schema_translate_map',
 '_should_log_debug',
 '_should_log_info',
 '_sqla_logger_namespace',
 '_trans_ctx',
 '_wrap_pool_connect',
 'begin',
 'clear_compiled_cache',
 'connect',
 'dialect',
 'dispatch',
 'dispose',
 'driver',
 'echo',
 'engine',
 'execute',
 'execution_options',
 'get_execution_options',
 'has_table',
 'hide_parameters',
 'logger

In [6]:
engine = connect_db_v2()
df_merge.to_sql('fact_daily_trip', 
                con=engine, 
                schema='dev', 
                if_exists='replace', 
                index=False)

postgresql+psycopg2://postgres:postgres@localhost:54320/rapidkl
STATUS: DB connection(2) succeed


850

In [62]:
fetch_db('SELECT * FROM dev.fact_daily_trip')

STATUS: DB connection(1) succeed
ERROR (3):  Execution failed on sql 'SELECT * FROM dev.fact_daily_trip': relation "dev.fact_daily_trip" does not exist
LINE 1: SELECT * FROM dev.fact_daily_trip
                      ^



## dim_drivers

In [7]:
df_trip = fetch_db('SELECT * FROM dev.fact_daily_trip')
bus_plates = df_trip['license_plate'].unique()
driver_names = [f'driver_{str(x+1).zfill(5)}' for x in range(len(bus_plates))]
df_drivers = pd.DataFrame({'driver_id':[x+1 for x in range(len(bus_plates))], 'driver_name':driver_names})
df_drivers.to_sql('dim_drivers', con=engine, schema='dev', if_exists='replace', index=False)

STATUS: DB connection(1) succeed


490

In [97]:
fetch_db("SELECT * FROM rapidkl.dim_drivers")

STATUS: DB connection(2) succeed


Unnamed: 0,driver_id,driver_name
0,1,driver_00001
1,2,driver_00002
2,3,driver_00003
3,4,driver_00004
4,5,driver_00005
...,...,...
485,486,driver_00486
486,487,driver_00487
487,488,driver_00488
488,489,driver_00489


## dim_busses

In [8]:
df_trip = fetch_db('SELECT * FROM dev.fact_daily_trip')
bus_plates = sorted(df_trip['license_plate'].unique())
bus_id = [x+1 for x in range(len(bus_plates))]
df_bus = pd.DataFrame({'bus_id':bus_id, 'bus_plates': bus_plates})
df_bus.to_sql('dim_busses', con=engine, schema='dev', if_exists='replace', index=False)

STATUS: DB connection(1) succeed


490

In [102]:
fetch_db("SELECT * FROM rapidkl.dim_busses")

STATUS: DB connection(2) succeed


Unnamed: 0,bus_id,bus_plates
0,1,BJR8233
1,2,BNG4014
2,3,CDH7526
3,4,CDH8296
4,5,CDH8332
...,...,...
485,486,WWC4624
486,487,WWC4681
487,488,WWC6423
488,489,WWD4612


## fact_trips

In [13]:
query_1 = """
CREATE TABLE IF NOT EXISTS rapidkl.fact_trips (
    timestamp TIMESTAMP, 
    trip_id TEXT, 
    start_time TEXT, 
    driver_name TEXT,
    bus_plates TEXT, 
    route_id TEXT, 
    speed REAL,
    PRIMARY KEY (trip_id, timestamp, bus_plates, speed, route_id)
    )
"""
query_2 = """
    INSERT INTO rapidkl.fact_trips
    SELECT 
        CAST(timestamp AS TIMESTAMP), 
        trip_id, 
        start_time, 
        driver_name, 
        bus_plates,
        route_id, 
        speed
    FROM (
        SELECT *
        FROM rapidkl.fact_daily_trip fdt
        JOIN rapidkl.dim_busses vv 
        ON fdt.vehicle_id = vv.bus_plates
        JOIN rapidkl.dim_drivers dd
        ON vv.bus_id = dd.driver_id
    ) jjj;
"""

query_db(query_1)
query_db(query_2)


STATUS: DB connection(1) succeed
STATUS: Query succeed
STATUS: DB connection(1) succeed
STATUS: Query succeed


In [42]:
query_db('DROP TABLE rapidkl.fact_trips')

STATUS: DB connection(1) succeed
STATUS: Query succeed


In [28]:
fetch_db('SELECT * FROM rapidkl.fact_daily_trip')

STATUS: DB connection(2) succeed


Unnamed: 0.1,timestamp,trip_id,start_time,start_date,route_id,latitude,longitude,bearing,speed,vehicle_id,license_plate,Unnamed: 0
0,2025-01-20 08:54:31,weekday_U1700_U170002_3,08:46:31,20250120,U1700,3.254817,101.693990,0.0,0.00,WVD4971,WVD4971,
1,2025-01-20 08:54:28,weekday_P0010_P001002_3,08:47:58,20250120,P0010,3.084800,101.627830,92.0,59.26,WA3714M,WA3714M,
2,2025-01-20 08:54:37,weekday_U6400_U640001_3,08:20:06,20250120,U6400,3.081413,101.666470,279.5,27.22,WPA5621,WPA5621,
3,2025-01-20 08:54:49,weekday_U3030_U303002_1,08:31:28,20250120,U3030,3.159460,101.744630,278.6,5.00,WVJ8197,WVJ8197,
4,2025-01-20 08:53:57,weekday_U6520_U652002_0,07:40:52,20250120,U6520,3.057708,101.688690,268.6,5.74,WPC8505,WPC8505,
...,...,...,...,...,...,...,...,...,...,...,...,...
3845,2025-01-22 07:34:46,weekday_U8210_U821002_1,07:00:46,20250122,U8210,3.133783,101.688995,143.0,14.82,WUY9518,WUY9518,
3846,2025-01-22 07:34:31,weekday_U1900_U190002_1,06:47:02,20250122,U1900,3.147900,101.694176,177.0,7.41,WVA4347,WVA4347,
3847,2025-01-22 07:34:42,weekday_T7870_T787002_0,07:31:56,20250122,T7870,3.110708,101.636230,315.1,18.52,WVA2965,WVA2965,
3848,2025-01-22 07:34:17,weekday_U4210_U421002_1,07:09:12,20250122,U4210,3.150506,101.708370,286.5,40.37,WVP2522,WVP2522,


In [13]:
fetch_db('SELECT * FROM rapidkl.fact_daily_trip')

STATUS: DB connection(2) succeed


Unnamed: 0.1,timestamp,trip_id,start_time,start_date,route_id,latitude,longitude,bearing,speed,vehicle_id,license_plate,Unnamed: 0
0,2025-01-20 08:54:31,weekday_U1700_U170002_3,08:46:31,20250120,U1700,3.254817,101.693990,0.0,0.00,WVD4971,WVD4971,
1,2025-01-20 08:54:28,weekday_P0010_P001002_3,08:47:58,20250120,P0010,3.084800,101.627830,92.0,59.26,WA3714M,WA3714M,
2,2025-01-20 08:54:37,weekday_U6400_U640001_3,08:20:06,20250120,U6400,3.081413,101.666470,279.5,27.22,WPA5621,WPA5621,
3,2025-01-20 08:54:49,weekday_U3030_U303002_1,08:31:28,20250120,U3030,3.159460,101.744630,278.6,5.00,WVJ8197,WVJ8197,
4,2025-01-20 08:53:57,weekday_U6520_U652002_0,07:40:52,20250120,U6520,3.057708,101.688690,268.6,5.74,WPC8505,WPC8505,
...,...,...,...,...,...,...,...,...,...,...,...,...
3845,2025-01-22 07:34:46,weekday_U8210_U821002_1,07:00:46,20250122,U8210,3.133783,101.688995,143.0,14.82,WUY9518,WUY9518,
3846,2025-01-22 07:34:31,weekday_U1900_U190002_1,06:47:02,20250122,U1900,3.147900,101.694176,177.0,7.41,WVA4347,WVA4347,
3847,2025-01-22 07:34:42,weekday_T7870_T787002_0,07:31:56,20250122,T7870,3.110708,101.636230,315.1,18.52,WVA2965,WVA2965,
3848,2025-01-22 07:34:17,weekday_U4210_U421002_1,07:09:12,20250122,U4210,3.150506,101.708370,286.5,40.37,WVP2522,WVP2522,


## fact_driving_behavior

In [25]:
speed_status_df = fetch_db(
    """
    WITH speed_count AS (
        SELECT 
            DATE(timestamp) as date, 
            bus_plates, 
            driver_name, 
            CASE 
                WHEN speed > 60 THEN 1
                ELSE 0
            END count_breach_speed
        FROM rapidkl.fact_trips
        WHERE DATE(timestamp) = CURRENT_DATE
    )
    SELECT 
        date, 
        bus_plates, 
        driver_name, 
        CASE
            WHEN sum_breach_daily = 0 THEN 'Safe'
            WHEN sum_breach_daily = 1 THEN 'Cautious'
            WHEN sum_breach_daily = 2 THEN 'Cautious'
            WHEN sum_breach_daily > 2 THEN 'Danger'
        END behavior
    FROM (
        SELECT 
            date, 
            bus_plates, 
            driver_name, 
            SUM(count_breach_speed) sum_breach_daily
        FROM speed_count
        GROUP BY date, bus_plates, driver_name
        ) hhh
    """
)
speed_status_df.to_sql('fact_driving_behavior', 
            con=engine, schema='rapidkl', 
            if_exists='replace', 
            index=False)

STATUS: DB connection(2) succeed


230

In [27]:
df_bh = fetch_db('SELECT * FROM rapidkl.fact_driving_behavior')
df_bh[df_bh['behavior']!='Safe']

STATUS: DB connection(2) succeed


Unnamed: 0,date,bus_plates,driver_name,behavior
13,2025-01-21,VFA2791,driver_00023,Danger
19,2025-01-21,VFK4581,driver_00032,Cautious
23,2025-01-21,VGF8476,driver_00036,Cautious
24,2025-01-21,VGG9462,driver_00037,Cautious
33,2025-01-21,VGK8927,driver_00051,Cautious
46,2025-01-21,W9762Q,driver_00073,Cautious
92,2025-01-21,WB8743F,driver_00144,Cautious
109,2025-01-21,WPB3247,driver_00184,Cautious
142,2025-01-21,WUW2542,driver_00242,Cautious
152,2025-01-21,WUY4963,driver_00260,Cautious


## fact_bus_maintenance
- running at the eod (11:59 pm)

In [45]:
import datetime
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)

In [8]:
import datetime
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
query_1 = f"""
    with geo_table AS (
    SELECT timestamp, 
            license_plate, 
            longitude, 
            latitude, 
            ROW_NUMBER () OVER (PARTITION BY license_plate ORDER BY timestamp ASC) as row_number, 
            LAG (longitude) OVER (PARTITION BY license_plate ORDER BY timestamp ASC) as prev_longitude, 
            LAG (latitude) OVER (PARTITION BY license_plate ORDER BY timestamp ASC) as prev_latitude
    FROM rapidkl.fact_daily_trip 
    ), 
    geo_today AS (
    SELECT 
            license_plate , 
            DATE(timestamp),
            SUM(distance_km) as total_distance_km
    FROM (
        SELECT timestamp, license_plate, 
            2 * 6371 * 
            ASIN(SQRT(
                POWER(SIN(RADIANS(latitude - prev_latitude) / 2), 2) +
                COS(RADIANS(prev_latitude)) * COS(RADIANS(latitude)) *
                POWER(SIN(RADIANS(longitude - prev_longitude) / 2), 2)
            )) AS distance_km
        FROM geo_table
    ) jjj
    WHERE DATE(timestamp) = DATE('{str(today)}') -- today
    GROUP BY license_plate, distance_km, timestamp
    ORDER BY distance_km DESC
    ), 
    geo_yesterday AS (
    SELECT 
            license_plate , 
            DATE(timestamp),
            SUM(distance_km) as total_distance_km
    FROM (
        SELECT timestamp, license_plate, 
            2 * 6371 * 
            ASIN(SQRT(
                POWER(SIN(RADIANS(latitude - prev_latitude) / 2), 2) +
                COS(RADIANS(prev_latitude)) * COS(RADIANS(latitude)) *
                POWER(SIN(RADIANS(longitude - prev_longitude) / 2), 2)
            )) AS distance_km
        FROM geo_table
    ) jjj
    WHERE DATE(timestamp) = DATE('{str(yesterday)}') -- yesterday
    GROUP BY license_plate, distance_km, timestamp
    ORDER BY distance_km DESC
    ), 
    total_distance_today AS (
    SELECT license_plate, SUM(total_distance_km) AS total_distance_km_today
    FROM (
        SELECT *
        FROM geo_today
        UNION ALL
        SELECT * 
        FROM geo_yesterday
    ) kkk
    GROUP BY license_plate
    )
    SELECT CURRENT_DATE as date, *, 
        CASE
            WHEN total_distance_km_today > 10000 THEN 'Need service'
            ELSE 'Good'
        END AS bus_condition
    FROM total_distance_today 
    """
df_maintenance = fetch_db(query_1)
df_maintenance

STATUS: DB connection(2) succeed


Unnamed: 0,date,license_plate,total_distance_km_today,bus_condition
0,2025-01-22,BJR8233,0.153383,Good
1,2025-01-22,BNG4014,0.418658,Good
2,2025-01-22,CDH7526,0.691122,Good
3,2025-01-22,CDH8296,0.286427,Good
4,2025-01-22,CDH8332,2.428730,Good
...,...,...,...,...
291,2025-01-22,WWC4559,5.174168,Good
292,2025-01-22,WWC4592,0.603555,Good
293,2025-01-22,WWC4681,10.880518,Good
294,2025-01-22,WWC6423,2.631723,Good


In [9]:
query_2 = f"""
        CREATE TABLE rapidkl.fact_bus_maintenance (
        date DATE,
        license_plate TEXT, 
        total_distance_km_today REAL, 
        bus_condition TEXT    
    )
"""
query_3 = f"""
    INSERT INTO rapidkl.fact_bus_maintenance {query_1}
"""
query_db(query_2)
query_db(query_3)

STATUS: DB connection(1) succeed
STATUS: Query succeed
STATUS: DB connection(1) succeed
STATUS: Query succeed


In [6]:
query_db('DROP TABLE rapidkl.fact_history_warning')

STATUS: DB connection(1) succeed
STATUS: Query succeed


In [45]:
fetch_db('SELECT * FROM rapidkl.fact_history_warning')

STATUS: DB connection(2) succeed


Unnamed: 0,date,license_plate,total_distance_km_today,bus_condition
0,2025-01-21,BNG4014,11.181509,Good
1,2025-01-21,CDH8296,,Good
2,2025-01-21,CDH8332,2.028472,Good
3,2025-01-21,PJK1473,3.048687,Good
4,2025-01-21,PKY1292,0.000000,Good
...,...,...,...,...
398,2025-01-21,WWC4624,2.290819,Good
399,2025-01-21,WWC4681,9.731171,Good
400,2025-01-21,WWC6423,5.284207,Good
401,2025-01-21,WWD4612,0.647726,Good


## fact_history_warning

In [85]:
query_c = """
CREATE TABLE IF NOT EXISTS rapidkl.warning_cumulated (
	driver_name TEXT,  
	dates_warning DATE[], 
	date DATE, 
	PRIMARY KEY (driver_name, date)
)
"""
query_i = """
{};
INSERT INTO rapidkl.warning_cumulated 
WITH 
driver_behavior AS (
	SELECT * FROM (
	SELECT
	    DATE(timestamp) as date, 
	    bus_plates, 
	    driver_name, 
	    CASE 
	        WHEN speed > 60 THEN 1
	        ELSE 0
	    END count_breach_speed
	FROM rapidkl.fact_trips
	) hhh
	WHERE count_breach_speed > 0
),
yesterday AS (
	SELECT *
	FROM rapidkl.warning_cumulated
	WHERE date = DATE('{}')
), 
today AS  (
	SELECT 
		*
	FROM driver_behavior
	WHERE date = DATE('{}')
	GROUP BY date, bus_plates, driver_name, count_breach_speed 
)
SELECT 
	COALESCE (t.driver_name, y.driver_name) as driver_name, 
	CASE 
		WHEN y.dates_warning IS NULL THEN ARRAY[t.date]
		WHEN t.date IS NULL THEN y.dates_warning
		ELSE ARRAY [t.date] || y.dates_warning
	END as date_warning,
	COALESCE (t.date, y.date + INTERVAL '1 day') as date
FROM today t
FULL OUTER JOIN yesterday y
ON t.driver_name = y.driver_name;

"""

run_q = query_i.format(query_c, '2025-01-21', '2025-01-20')
print(run_q)



	driver_name TEXT,  
	date DATE, 
	PRIMARY KEY (driver_name, date)
)
;
WITH 
driver_behavior AS (
	SELECT * FROM (
	SELECT
	    DATE(timestamp) as date, 
	    bus_plates, 
	    driver_name, 
	    CASE 
	        WHEN speed > 60 THEN 1
	        ELSE 0
	    END count_breach_speed
	FROM rapidkl.fact_trips
	) hhh
	WHERE count_breach_speed > 0
),
yesterday AS (
	SELECT *
	WHERE date = DATE('2025-01-21')
), 
today AS  (
	SELECT 
		*
	FROM driver_behavior
	WHERE date = DATE('2025-01-20')
	GROUP BY date, bus_plates, driver_name, count_breach_speed 
)
SELECT 
	COALESCE (t.driver_name, y.driver_name) as driver_name, 
	CASE 
	COALESCE (t.date, y.date + INTERVAL '1 day') as date
FROM today t
FULL OUTER JOIN yesterday y
ON t.driver_name = y.driver_name;




In [86]:
import datetime
today_date = datetime.datetime.today().date()
yesterday_date = today_date - datetime.timedelta(days=1)
# str(yesterday_date.date())
query_db(query_i.format(query_c, str(today_date), str(yesterday_date)))

STATUS: DB connection(1) succeed
STATUS: Query succeed


In [70]:
str(today_date), str(yesterday_date)

('2025-01-24', '2025-01-23')

In [7]:
# print(query_i.format(query_c, str(today_date), str(yesterday_date)))

In [None]:
fetch_db('SELECT * FROM rapidkl.warning_cumulated')

In [None]:
# list of current warning on current date
fetch_db("""SELECT driver_name, dates_warning, MAX(date) as date
FROM rapidkl.warning_cumulated
GROUP BY driver_name , dates_warning;""")

STATUS: DB connection(2) succeed


Unnamed: 0,driver_name,dates_warning,date
0,driver_00293,[2025-01-21],2025-01-23
1,driver_00066,[2025-01-21],2025-01-23
2,driver_00095,[2025-01-21],2025-01-23
3,driver_00072,[2025-01-22],2025-01-23
4,driver_00473,[2025-01-22],2025-01-23
5,driver_00173,[2025-01-22],2025-01-23
6,driver_00424,[2025-01-21],2025-01-23
7,driver_00044,[2025-01-21],2025-01-23
8,driver_00469,[2025-01-22],2025-01-23
9,driver_00313,"[2025-01-22, 2025-01-21]",2025-01-23
