# This cell installs all necessary Python packages.
### Pathway is used for declarative data pipelines.
### Bokeh helps create interactive visualizations.

In [1]:
!pip install --quiet pathway bokeh pandas numpy


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.4/149.4 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.6/77.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m777.6/777.6 kB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.5/26.5 MB[0m [31m43.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# We import all the libraries needed in this project.
### output_notebook() lets Bokeh plots show up directly in Colab.

In [33]:
import numpy as np
import pandas as pd
import pathway as pw
from datetime import datetime
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
output_notebook()


# We load the dataset with Pandas.
### Convert timestamps to datetime, sort by time

In [34]:
df = pd.read_csv('dataset.csv')
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')
df = df.sort_values('Timestamp')


### Map traffic levels to numeric

In [35]:
traffic_mapping = {
    'low': 1,
    'average': 2,
    'high': 3
}
df['TrafficConditionNearby'] = df['TrafficConditionNearby'].map(traffic_mapping)

df.to_csv("dataset_clean.csv", index=False)

### Safe float conversion + normalization between 0 and 1.

In [36]:
def safe_float(x):
    try:
        return float(x)
    except:
        return 0.0

def normalize_series(series):
    min_val = series.min()
    max_val = series.max()
    return (series - min_val) / (max_val - min_val + 1e-6)


### Pathway Streaming Ingestion

In [37]:
schema = pw.schema_from_csv("dataset_clean.csv")

input_table = pw.io.csv.read(
    "dataset_clean.csv",
    schema=schema,
    mode="static",
    autocommit_duration_ms=1000
)

### Compute vehicle weight and occupancy ratio

In [38]:
table = input_table.with_columns(
    VehicleWeight=pw.if_else(
        input_table.VehicleType == "Car", 1.0,
        pw.if_else(
            input_table.VehicleType == "Bike", 0.5,
            pw.if_else(input_table.VehicleType == "Truck", 1.5, 1.0)
        )
    ),
)


 # Model 1 - Baseline Pricing

In [39]:
def compute_baseline_price(timestamp, occupancy, capacity):
    base_price = 10
    hour = pd.to_datetime(timestamp).hour
    occ_ratio = safe_float(occupancy) / (safe_float(capacity) + 1e-6)
    alpha = 0.3 if 8 <= hour <= 10 else 0.2 if 17 <= hour <= 19 else 0.1
    return base_price + alpha * occ_ratio

table = table.with_columns(
    Price_Model1=pw.apply(
        compute_baseline_price,
        table.Timestamp,
        table.Occupancy,
        table.Capacity
    )
)


# Model 2 Demand Calculation


In [40]:
def compute_demand(occupancy, capacity, queue, traffic, special, vehicle_weight):
    occ_ratio = safe_float(occupancy) / (safe_float(capacity) + 1e-6)
    α, β, γ, δ, ε = 1.0, 0.5, 0.3, 1.0, 0.7
    return (
        α * occ_ratio +
        β * safe_float(queue) -
        γ * safe_float(traffic) +
        δ * safe_float(special) +
        ε * safe_float(vehicle_weight)
    )


table = table.with_columns(
    Demand=pw.apply(
        compute_demand,
        table.Occupancy,
        table.Capacity,
        table.QueueLength,
        table.TrafficConditionNearby,
        table.IsSpecialDay,
        table.VehicleWeight
    )
)


###Normalize Demand

In [41]:
demand_df = pw.debug.table_to_pandas(table)
demand_df["NormDemand"] = normalize_series(demand_df["Demand"])
table = pw.debug.table_from_pandas(demand_df)






#Model 2 Pricing

In [45]:
def compute_price_model2(timestamp, norm_demand):
    base_price = 10
    hour = pd.to_datetime(timestamp).hour
    if 8 <= hour <= 10:
        lam = 1.0
    elif 17 <= hour <= 19:
        lam = 0.8
    else:
        lam = 0.5
    price = base_price * (1 + lam * norm_demand)
    return np.clip(price, 5, 20)


table = table.with_columns(
    Price_Model2=pw.apply(
        compute_price_model2,
        table.Timestamp,
        table.NormDemand
    )
)


#Model 3 Competition-Aware Pricing

In [46]:
def compute_competition_price(timestamp, occupancy, capacity, competitor_price):
    hour = pd.to_datetime(timestamp).hour
    multiplier = 1.2 if 8 <= hour <= 10 else 1.1 if 17 <= hour <= 19 else 1.0
    base_price = 10
    adjusted = base_price * multiplier
    if safe_float(occupancy) >= safe_float(capacity) and competitor_price < adjusted:
        return max(5, competitor_price - 2)
    return max(5, min(20, max(adjusted, competitor_price - 1)))

def reroute_needed(occupancy, capacity, competitor_price):
    return safe_float(occupancy) >= safe_float(capacity) and competitor_price < 10

table = table.with_columns(
    CompetitorPrice=pw.apply(
        lambda ts: 12 + (pd.to_datetime(ts).hour % 4),
        table.Timestamp
    )
)

table = table.with_columns(
    CompetitorPrice=pw.apply(lambda ts: 12 + (pd.to_datetime(ts).hour % 4), table.Timestamp),
    Price_Model3=pw.apply(
        compute_competition_price,
        table.Timestamp,
        table.Occupancy,
        table.Capacity,
        table.CompetitorPrice
    ),
    RerouteFlag=pw.apply(
        reroute_needed,
        table.Occupancy,
        table.Capacity,
        table.CompetitorPrice
    )
)


In [47]:
plot_df = pw.debug.table_to_pandas(table)



In [56]:
plot_df["Timestamp"] = pd.to_datetime(plot_df["Timestamp"], errors="coerce")


# Plot Model 1 vs Model 2 Prices

In [57]:
lots = plot_df["SystemCodeNumber"].unique()
for lot in lots:
    lot_data = plot_df[plot_df["SystemCodeNumber"] == lot]
    source = ColumnDataSource(data={
        "x": lot_data["Timestamp"],
        "Model1": lot_data["Price_Model1"],
        "Model2": lot_data["Price_Model2"]
    })

    p = figure(
        title=f"Model 1 vs Model 2 - Lot {lot}",
        x_axis_type="datetime",
        width=800,
        height=300
    )
    p.line(x="x", y="Model1", source=source, color="blue", legend_label="Model 1")
    p.line(x="x", y="Model2", source=source, color="green", legend_label="Model 2")
    p.legend.location = "top_left"
    show(p)


In [58]:
for lot in lots:
    lot_data = plot_df[plot_df["SystemCodeNumber"] == lot]
    source = ColumnDataSource(data={
        "x": lot_data["Timestamp"],
        "Model3": lot_data["Price_Model3"],
        "Competitor": lot_data["CompetitorPrice"]
    })

    p = figure(
        title=f"Model 3 vs Competitor - Lot {lot}",
        x_axis_type="datetime",
        width=800,
        height=300
    )
    p.line(x="x", y="Model3", source=source, color="orange", legend_label="Model 3")
    p.line(x="x", y="Competitor", source=source, color="red", line_dash="dashed", legend_label="Competitor")
    p.legend.location = "top_left"
    show(p)
