In [42]:
!pip install pathway bokeh jupyter-bokeh --quiet

In [43]:
!pip install pathway bokeh --quiet

In [44]:
import pathway as pw
from pathway.internals import dtype
import numpy as np
import pandas as pd

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
output_notebook()

import pathway as pw
import math
import datetime


In [45]:
def load_and_preprocess(path='/content/dataset.csv'):
    df = pd.read_csv(path)

    df.columns = [col.strip().replace(' ', '_') for col in df.columns]
    df['timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], dayfirst=True)
    df['occupancy_rate'] = df['Occupancy'] / df['Capacity']
    vehicle_weight_map = {'car': 1.0, 'bike': 0.7, 'truck': 1.5}
    df['vehicle_weight'] = df['VehicleType'].map(vehicle_weight_map).fillna(1.0)
    traffic_map = {'low': 0, 'medium': 1, 'high': 2}
    df['traffic_score'] = df['TrafficConditionNearby'].map(traffic_map).fillna(0)
    df['QueueLength'] = df['QueueLength'].fillna(0)
    df['IsSpecialDay'] = df['IsSpecialDay'].fillna(0)

    return df

df = load_and_preprocess('/content/dataset.csv')
df[['timestamp', 'occupancy_rate', 'vehicle_weight', 'traffic_score']].head()



Unnamed: 0,timestamp,occupancy_rate,vehicle_weight,traffic_score
0,2016-10-04 07:59:00,0.105719,1.0,0.0
1,2016-10-04 08:25:00,0.110919,1.0,0.0
2,2016-10-04 08:59:00,0.138648,1.0,0.0
3,2016-10-04 09:32:00,0.185442,1.0,0.0
4,2016-10-04 09:59:00,0.259965,0.7,0.0


In [46]:
def baseline_pricing(df, alpha=0.1, base_price=10.0):
    """
    Implements Model 1: Linear pricing based on occupancy rate.
    For each row in the dataframe, updates the price based on previous price.
    """
    df_sorted = df.sort_values(by='timestamp').copy()
    df_sorted['price'] = base_price

    for lot in df_sorted['SystemCodeNumber'].unique():
        lot_df = df_sorted[df_sorted['SystemCodeNumber'] == lot].copy()
        prev_price = base_price
        prices = []

        for _, row in lot_df.iterrows():
            price = prev_price + alpha * row['occupancy_rate'] * prev_price
            prices.append(price)
            prev_price = price

        df_sorted.loc[df_sorted['SystemCodeNumber'] == lot, 'price'] = prices

    return df_sorted

baseline_df = baseline_pricing(df)
baseline_df[['timestamp', 'SystemCodeNumber', 'occupancy_rate', 'price']].head()


Unnamed: 0,timestamp,SystemCodeNumber,occupancy_rate,price
0,2016-10-04 07:59:00,BHMBCCMKT01,0.105719,10.105719
5248,2016-10-04 07:59:00,BHMNCPHST01,0.1975,10.1975
3936,2016-10-04 07:59:00,BHMMBMMBX01,0.384279,10.384279
6560,2016-10-04 07:59:00,BHMNCPNST01,0.513402,10.513402
17056,2016-10-04 07:59:00,Shopping,0.319792,10.319792


In [47]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
output_notebook()

def plot_parking_price(df, lot_id):
    df_lot = df[df['SystemCodeNumber'] == lot_id].sort_values(by='timestamp')
    source = ColumnDataSource(df_lot)

    p = figure(title=f"Baseline Price Trend for Lot {lot_id}",
               x_axis_type='datetime', width=800, height=400)
    p.line(x='timestamp', y='price', source=source, line_width=2)
    p.xaxis.axis_label = 'Time'
    p.yaxis.axis_label = 'Price ($)'
    show(p)


plot_parking_price(baseline_df, lot_id='BHMBCCMKT01')


In [48]:
def demand_based_pricing(df, base_price=10.0, λ=0.5,
                         alpha=1.0, beta=0.8, gamma=0.5, delta=1.2, epsilon=0.6):
    """
    Implements Model 2: Dynamic pricing based on demand formula.
    """
    df_sorted = df.sort_values(by='timestamp').copy()


    df_sorted['raw_demand'] = (
        alpha * df_sorted['occupancy_rate'] +
        beta * df_sorted['QueueLength'] -
        gamma * df_sorted['traffic_score'] +
        delta * df_sorted['IsSpecialDay'] +
        epsilon * df_sorted['vehicle_weight']
    )


    def normalize(group):
        min_d = group['raw_demand'].min()
        max_d = group['raw_demand'].max()
        group['normalized_demand'] = (group['raw_demand'] - min_d) / (max_d - min_d + 1e-6)
        group['normalized_demand'] = group['normalized_demand'] * 2 - 1  # scale to [-1, 1]
        return group

    df_sorted = df_sorted.groupby('SystemCodeNumber').apply(normalize)

    df_sorted['price'] = base_price * (1 + λ * df_sorted['normalized_demand'])
    df_sorted['price'] = df_sorted['price'].clip(lower=0.5 * base_price, upper=2 * base_price)

    return df_sorted

plot_parking_price(demand_df, lot_id='BHMBCCMKT01')


NameError: name 'demand_df' is not defined

In [None]:
from math import radians, cos, sin, sqrt, atan2

def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c  # Distance in km

def competitive_pricing(df, base_price=10.0, λ=0.5):
    df = demand_based_pricing(df, base_price=base_price, λ=λ).copy()
    df['reroute_flag'] = False

    all_lots = df[['SystemCodeNumber', 'Latitude', 'Longitude']].drop_duplicates()

    for idx, row in all_lots.iterrows():
        lot_id = row['SystemCodeNumber']
        lat1, lon1 = row['Latitude'], row['Longitude']
        nearby = all_lots[
            (all_lots['SystemCodeNumber'] != lot_id) &
            (all_lots.apply(lambda x: haversine(lat1, lon1, x['Latitude'], x['Longitude']) < 1.0, axis=1))
        ]

        if not nearby.empty:
            for i, time_row in df[df['SystemCodeNumber'] == lot_id].iterrows():
                time = time_row['timestamp']
                occ = time_row['occupancy_rate']
                price = time_row['price']

                competitor_prices = []
                for nb in nearby['SystemCodeNumber']:
                    match = df[(df['SystemCodeNumber'] == nb) & (df['timestamp'] == time)]
                    if not match.empty:
                        competitor_prices.append(match.iloc[0]['price'])

                if competitor_prices:
                    avg_competitor = np.mean(competitor_prices)

                    if price > avg_competitor:
                        df.at[i, 'price'] = max(base_price, price - 0.5)

                    if occ > 0.95:
                        for nb in nearby['SystemCodeNumber']:
                            match = df[(df['SystemCodeNumber'] == nb) & (df['timestamp'] == time)]
                            if not match.empty and match.iloc[0]['occupancy_rate'] < 0.7:
                                df.at[i, 'reroute_flag'] = True
                                break
    return df


comp_df = competitive_pricing(df)
comp_df[['timestamp', 'SystemCodeNumber', 'price', 'reroute_flag']].head()


In [None]:
from bokeh.layouts import column
from bokeh.models import HoverTool, Circle
from bokeh.plotting import figure, show, output_notebook
output_notebook()

def simulate_plot(df, lot_ids):
    plots = []

    for lot_id in lot_ids:
        lot_df = df[df['SystemCodeNumber'] == lot_id].sort_values(by='timestamp')
        source = ColumnDataSource(lot_df)

        p = figure(title=f"Price & Rerouting for {lot_id}",
                   x_axis_type='datetime', width=800, height=350)

        p.line(x='timestamp', y='price', source=source, line_width=2, color="navy", legend_label="Price")

        reroute_source = ColumnDataSource(lot_df[lot_df['reroute_flag'] == True])
        p.circle(x='timestamp', y='price', source=reroute_source, size=8, color="red", legend_label="Reroute Flag")

        p.add_tools(HoverTool(tooltips=[("Time", "@timestamp{%F %T}"), ("Price", "@price")],
                              formatters={"@timestamp": "datetime"}))
        p.legend.location = "top_left"
        p.xaxis.axis_label = "Timestamp"
        p.yaxis.axis_label = "Price ($)"

        plots.append(p)

    show(column(*plots))


simulate_plot(comp_df, lot_ids=['BHMBCCMKT01', 'BHMBCCMKT02', 'BHMBCCMKT05'])


In [None]:
import pandas as pd

df = pd.read_csv("/content/dataset.csv")
df["timestamp"] = pd.to_datetime(df["LastUpdatedDate"] + " " + df["LastUpdatedTime"], dayfirst=True)
df["timestamp"] = df["timestamp"].dt.strftime("%Y-%m-%dT%H:%M:%S.000+0000")
df.to_csv("/content/cleaned.csv", index=False)


In [None]:
class ParkingSchema(pw.Schema):
    timestamp: dtype.DATE_TIME_UTC  # expects ISO 8601 + timezone
    SystemCodeNumber: str
    Capacity: float
    Latitude: float
    Longitude: float
    Occupancy: float
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: float
    IsSpecialDay: float

table = pw.io.csv.read(
    "/content/cleaned.csv",
    schema=ParkingSchema,
    mode="static"
)

In [None]:
# 3. Feature Engineering
@pw.udf
def get_occupancy_rate(capacity, occupancy):
    return occupancy / capacity if capacity else 0

@pw.udf
def get_traffic_score(traffic):
    return {"low": 0, "medium": 1, "high": 2}.get(traffic.lower(), 0)

@pw.udf
def get_vehicle_weight(vtype):
    return {"car": 1.0, "bike": 0.7, "truck": 1.5}.get(vtype.lower(), 1.0)

augmented = table.select(
    table.timestamp,
    table.SystemCodeNumber,
    occupancy_rate = get_occupancy_rate(table.Capacity, table.Occupancy),
    traffic_score = get_traffic_score(table.TrafficConditionNearby),
    vehicle_weight = get_vehicle_weight(table.VehicleType),
    QueueLength = table.QueueLength,
    IsSpecialDay = table.IsSpecialDay
)

In [None]:

# 4. Pricing Logic
@pw.udf
def get_price(occupancy_rate, queue_len, traffic_score, is_special_day, vehicle_weight):
    base_price = 10
    α, β, γ, δ, ε, λ = 1.0, 0.8, 0.5, 1.2, 0.6, 0.5
    demand = (
        α * occupancy_rate +
        β * queue_len -
        γ * traffic_score +
        δ * is_special_day +
        ε * vehicle_weight
    )
    demand = max(min(demand, 1), -1)
    price = base_price * (1 + λ * demand)
    return max(5, min(20, price))


In [None]:
with_price = augmented.select(
    augmented.timestamp,
    augmented.SystemCodeNumber,
    price = get_price(
        augmented.occupancy_rate,
        augmented.QueueLength,
        augmented.traffic_score,
        augmented.IsSpecialDay,
        augmented.vehicle_weight
    )
)

In [None]:
# 6. Output to JSONL
pw.io.jsonlines.write(with_price, "/content/streamed_output.jsonl")
pw.run()