<a href="https://colab.research.google.com/github/gottcha63/capstone_project/blob/main/capstone_project__model_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**INSTALLING** **LIBRARIES**

In [None]:
!pip install pathway bokeh --quiet

# Step 1: Importing and Preprocessing the Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from datetime import datetime
import pathway as pw
import bokeh.plotting
import panel as pn

In [None]:
data = pd.read_csv('/content/dataset_capstone project.csv')
data.head(10)

In [None]:
#seeing that data has any null values

data.isnull().sum()


In [None]:
# Combine the 'LastUpdatedDate' and 'LastUpdatedTime' columns into a single datetime column
data['Timestamp'] = pd.to_datetime(data['LastUpdatedDate'] + ' ' + data['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')

# Sort the DataFrame by the new 'Timestamp' column and reset the index
data = data.sort_values('Timestamp').reset_index(drop=True)

CREATING A STREAMING DATA

In [None]:
# Save the selected columns to a CSV file for streaming or downstream processing
data[["ID","Timestamp", "Occupancy", "Capacity","QueueLength","VehicleType","TrafficConditionNearby","IsSpecialDay"]].to_csv("parking_stream_2.csv", index=False)


In [None]:
class ParkingSchema(pw.Schema):
  ID : str         # Unique identifier for the parking location
  Timestamp: str   # Timestamp of the observation (should ideally be in ISO format)
  Occupancy: int   # Number of occupied parking spots
  Capacity: int    # Total parking capacity at the location
  QueueLength: int # Length of the parking queue
  VehicleType: str # type of vehicle
  TrafficConditionNearby: str # condition of traffic near by
  IsSpecialDay: int # whether it is a special day or not

In [None]:
# Load the data as a simulated stream using Pathway's replay_csv function
# This replays the CSV data at a controlled input rate to mimic real-time streaming
# input_rate=1000 means approximately 1000 rows per second will be ingested into the stream.

data_stream = pw.demo.replay_csv("parking_stream_2.csv", schema=ParkingSchema, input_rate=1000)

In [None]:
# Define the datetime format to parse the 'Timestamp' column
fmt = "%Y-%m-%d %H:%M:%S"

# Add new columns to the data stream:
# - 't' contains the parsed full datetime
# - 'day' extracts the date part and resets the time to midnight (useful for day-level aggregations)
data_with_time = data_stream.with_columns(
    time = data_stream.Timestamp.dt.strptime(fmt),
    day = data_stream.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)

# Step-2 : creating a pricing function

In [None]:
BASE_PRICE = 10.0


def model2_demand_based_model(occ, cap, queue, traf, special, vt):
    # Feature engineering
    occ_rate = occ / cap
    vehicle_weight = {"car":1.0, "bike":0.5, "truck":1.5}.get(vt, 1.0)
    traf_weight = {"high":1.0, "average":0.5,"low":0.0}.get(traf, 1.0)
    # Demand-based formula
    demand = 2.0 * occ_rate + 1.5 * queue - 1.2 * traf_weight + 5.0 * special + vehicle_weight
    #normalized demand
    norm =  min(max(demand / 20, 0.0), 1.0)
    price = BASE_PRICE * (1.0 + 0.5 * norm)
    norm_price = float(max(0.5 * BASE_PRICE, min(price, 2.0 * BASE_PRICE)))
    return norm_price


# now we can acces the prices_stream which includes claculated price column
prices_stream = data_with_time.with_columns(
      Price = pw.udf(model2_demand_based_model)(data_with_time.Occupancy, data_with_time.Capacity, data_with_time.QueueLength, data_with_time.TrafficConditionNearby, data_with_time.IsSpecialDay, data_with_time.VehicleType)
    )

#Step 3: Visualizing Daily Price Fluctuations with a Bokeh Plot


In [None]:
#Activate the Panel extension to enable interactive visualizations
pn.extension()

# Define a custom Bokeh plotting function that takes a data source (from Pathway) and returns a figure
def price_plotter(source):

    # Create a Bokeh figure with datetime x-axis
    fig = bokeh.plotting.figure(
        height=500,
        width=1000,
        title="Pathway: Daily Parking Price",
        x_axis_type="datetime",  # Ensure time-based data is properly formatted on the x-axis
    )
    # Plot a line graph showing how the price evolves over time
    fig.line("time", "Price", source=source, line_width=2, color="navy")

    # Overlay red circles at each data point for better visibility
    fig.circle("time", "Price", source=source, size=6, color="red")

    return fig

# Explicitly select columns before plotting
prices_for_plot = prices_stream.select(prices_stream.time, prices_stream.Price)

# Use Pathway's built-in .plot() method to bind the data stream (delta_window) to the Bokeh plot
# - 'price_plotter' is the rendering function
# - 'sorting_col' is removed as sorting is now handled within the plotter function
viz = prices_for_plot.plot(price_plotter)

# Create a Panel layout and make it servable as a web app
# This line enables the interactive plot to be displayed when the app is served
pn.Column(viz).servable()

In [None]:
# Start the Pathway pipeline execution in the background
# - This triggers the real-time data stream processing defined above
# - %%capture --no-display suppresses output in the notebook interface

%%capture --no-display
pw.run()