<a href="https://colab.research.google.com/github/divanshu-06/Summer-Analytics_2025/blob/main/Final_Capstone_Project/Module_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Module 1



In [24]:
!pip install pathway bokeh --quiet # This cell may take a few seconds to execute.


# Step 1: Importing and Preprocessing the Data

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from datetime import datetime
import pathway as pw
import bokeh.plotting
import panel as pn
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
from bokeh.models import DatetimeTickFormatter, HoverTool
from bokeh.models import ColumnDataSource

In [26]:

df = pd.read_csv('/content/dataset.csv')
df

# You can find the sample dataset here: https://drive.google.com/file/d/1D479FLjp9aO3Mg8g6Lpj9oRViWacurA6/view?usp=sharing

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00
...,...,...,...,...,...,...,...,...,...,...,...,...
18363,18363,Shopping,1920,26.150504,91.733531,1517,truck,average,6,0,19-12-2016,14:30:00
18364,18364,Shopping,1920,26.150504,91.733531,1487,car,low,3,0,19-12-2016,15:03:00
18365,18365,Shopping,1920,26.150504,91.733531,1432,cycle,low,3,0,19-12-2016,15:29:00
18366,18366,Shopping,1920,26.150504,91.733531,1321,car,low,2,0,19-12-2016,16:03:00


In [27]:
# Combine the 'LastUpdatedDate' and 'LastUpdatedTime' columns into a single datetime column
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate']+' '+df['LastUpdatedTime'],format='%d-%m-%Y %H:%M:%S')

# Sort the DataFrame by the new 'Timestamp' column and reset the index
df=df.sort_values('Timestamp').reset_index(drop=True)

# Step 2: Making a simple pricing function

In [28]:
p=10
alpha=0.4
mn=5
mx=20

df["Price"]=None
slot_mapping={name: idx for idx, name in enumerate(df["SystemCodeNumber"].unique())}

df["ID"]=df["SystemCodeNumber"].map(slot_mapping)

df["Date"]=df["Timestamp"].dt.date


# Recursive price calculation
for id in df["ID"].unique():

    slot_df=df[df["ID"]==id].sort_values("Timestamp")
    prices=[]

    prevp=p
    prevd=None

    for idx,row in slot_df.iterrows():
        currd=row["Date"]

        if currd!=prevd:
            prevp=p  #Reseting price at start of new day

        price=prevp+ alpha * (row["Occupancy"]/row["Capacity"])
        price=min(max(price, mn), mx)

        prices.append(price)
        prevp=price
        prevd=currd

    df.loc[slot_df.index,"Price"]=prices


In [29]:
# Save the selected columns to a CSV file for streaming or downstream processing
df[["Timestamp", "Occupancy", "Capacity"]].to_csv("parking_stream.csv", index=False)

# Note: Only three features are used here for simplicity.
# Participants are expected to incorporate additional relevant features in their models.

In [30]:
# Define the schema for the streaming data using Pathway
# This schema specifies the expected structure of each data row in the stream

class ParkingSchema(pw.Schema):
    ID:int
    Timestamp:str
    Price:float

#converting pandas to pw
table=pw.debug.table_from_pandas(df[["ID", "Timestamp", "Price"]],schema=ParkingSchema)

# Step 3: Visualizing Daily Price Fluctuations with a Bokeh Plot

In [31]:
output_notebook()


plots=[]

for id in df["SystemCodeNumber"].unique():

    slot_df=df[df["SystemCodeNumber"]==id].sort_values("Timestamp")


    src=ColumnDataSource(data={"x":slot_df["Timestamp"],"y":slot_df["Price"]})



    p=figure(x_axis_type="datetime",

        title=f"Slot {id} Pricing Curve",
        x_axis_label="Date and Time",y_axis_label="Price of Ticket",

        width=1000,height=350,

        tools="pan,wheel_zoom,box_zoom,reset,save")

    p.line(x="x",y="y",source=src,line_width=2)
    p.scatter(x="x",y="y",source=src, size=5, color="red",alpha=0.8,marker="circle")



    p.xaxis.formatter=DatetimeTickFormatter(hours="%H:%M",days="%d-%m %H:%M",months="%d-%m",years="%Y")

    hover=HoverTool(tooltips=[("Time","@x{%F %H:%M}"),("Price", "@y{0.00}")],

        formatters={"@x":"datetime"},mode="vline")

    p.add_tools(hover)

    plots.append(p)


#Running

In [32]:

grid=gridplot([[p] for p in plots])
show(grid)
