# MODEL 1 :  Baseline Linear Model

## Preprocessing Of Dataset

### Importing the Libraries

In [15]:
!pip install pathway bokeh  --quiet

In [None]:
import pandas as pd
import pathway as pw
import numpy as np
import bokeh.plotting
import bokeh.model
import panel as pn
import datetime
from datetime import datetime

In [17]:
# Reading the File via Pandas as the Dataframe
df = pd.read_csv('/content/dataset.csv')

#Dropping the Columns which does not need now
df = df.drop(columns=['ID','Latitude','Longitude'])

# Observed that some of the rows has Occupancy more than the Capacity , which is impossible
# Removed that rows to get correct pricing
df =df[~(df['Capacity']<df['Occupancy'])]


* Major Assumption :     
1. Dataset is not clear whether the Occupancy is only by the vehicle Type mentioned , So I Assumed that Vehicle Type is for All the Occupancies for each row
2. Since the Dynamic Price must be for the any Specific Time Interval and that would make for the Profit Strategy . So Implemented the Time Basis Dynamic pricing model instead of the PerDay Basis.
3. Since the Longitude , Latitude and SystemCodeNumber is Unique for Each Lots , I Didn't Consider the Location to be the Factor.


### Feature Engineering


In [None]:
# Creating the New TimeStamp Columns and Making it a Datetime Object
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')

#NO need of the Individual Date and Time Columns Now , so Removed .
df.drop(columns=['LastUpdatedDate','LastUpdatedTime'],inplace=True)

# Checking if the Dataset had null values
df.isnull().sum()

In [None]:
df['VehicleType'].unique()

* Creating a new Columns to plot the Special Day Rows

In [20]:
def specialDay(day):
  if day == 1:
    return 'orange'
  else:
    return 'yellow'
df['color'] = df['IsSpecialDay'].apply(specialDay)

* Converting the Dataset into 14 different files for Different Lots (for easy distribution)

In [21]:
for i, (_, group_df) in enumerate(df.groupby('SystemCodeNumber'), start=1):
    filename = f"parking_lot_{i}.csv"
    group_df.to_csv(filename, index=False)

## Making of Pipeline

### Making the Schema to parse the Files

In [22]:
class InputSchema(pw.Schema):
  SystemCodeNumber:str
  Capacity : int
  Occupancy : int
  VehicleType : str
  TrafficConditionNearby : str
  QueueLength  : int
  IsSpecialDay : int
  Timestamp :  str
  color : str

In [23]:
tables = {}
for i in range(1, 15):
    filename = f"/content/parking_lot_{i}.csv"
    table_name = f"t{i}"

    # Reading all 14 files and saving them as the Streaming Data
    tables[table_name] = pw.demo.replay_csv(
        f"{filename}",
        schema=InputSchema,
        input_rate=1000

    )

* Parsing the TimeStamp Columns to Datetime Object

In [24]:
fmt = "%Y-%m-%d %H:%M:%S" # format of TimeStamp Columns
for i in range(1,15):
  tables[f't{i}'] = tables[f't{i}'].with_columns(
   TimeStamp = tables[f't{i}'].Timestamp.dt.strptime(fmt)
  )
for i in range(1,15):
  tables[f't{i}'] = tables[f't{i}'].with_columns(
    # Make the new Columns from TimeStamp and convert it to String
    day = tables[f't{i}'].Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00"))

## Model Creation






* **Making the linear Formula** :    
  *price = min Price + alpha * Utilisation*

  such that

  1.**Minimum Price** :

  10 * 0.5

  2.**Utilisation** :
  
  Current Occupancy / Maximum Capacity of Lot

  3.**Alpha** :
  
  Maximum Price - Base Price
  
  to maintain the Price between Min and Max Price allowed


### Windowing the Table

* Windowing the Datasets by 30 Minutes Duration and Aggregate by windowby().reduce() Method


In [None]:
from datetime import timedelta
base_price = 10  # given
min_price = 5   # 10 * 0.5
max_price = 20  # 10 * 2.0

windowed_table = {}
for i in range(1,15):
  table_name = f't{i}'
  windowed_table[table_name] = (
      tables[f't{i}'].windowby(
          pw.this.TimeStamp,  # Event time column to use for windowing (parsed datetime)
          instance=pw.this.day,  # Logical partitioning key: one instance per calendar day
          window= pw.temporal.tumbling(duration =timedelta(minutes=30)),  # Fixed-size daily window
          behavior=pw.temporal.exactly_once_behavior()  # Guarantees exactly-once processing semantics
      )
      .reduce(
          time_start = pw.this._pw_window_start,
          time_end = pw.this._pw_window_end,
          occ_max = pw.reducers.max(pw.this.Occupancy),
          cap = pw.reducers.max(pw.this.Capacity),
          color = pw.reducers.earliest(pw.this.color)
      )
      .with_columns(
          # Creating the Linear Formula to Increment or Decrement the Price from Base Price
          price=min_price + (pw.this.occ_max / pw.this.cap) * (max_price - base_price)
      )
  )


## Creating the Interactive Dashboard

* Making the Dashboard of Different Tabs using the Bokeh and Panel Libraries

In [None]:
import panel as pn
# Enabling Panel's widgets and layout feature
pn.extension()

# Making the custom Price Plotting Function
def price_plotter(source):
    fig = bokeh.plotting.figure(
        height=400,
        width=800,
        title=" Dynamic Parking Lot Price",
        tools = 'hover,pan,zoom_in,zoom_out,wheel_zoom,save,reset',   # Manually setting the tools required
        x_axis_type="datetime" )
    fig.line("time_end", "price", source=source, line_width=2, color="navy")  #Creating a LinePlot
    fig.circle("time_end", "price", source=source, size=6, color="color")   # Creating a ScatterPlot
    return fig


#creating the figures of different parking lot
figures = {}
for i in range(1,15):
  figures[f'fig{i}']= windowed_table[f't{i}'].plot(price_plotter, sorting_col="time_end")

In [None]:
# Making the Dashboard with multiple tabs for different Parking Lot
tabs = pn.Tabs(
    *[(f"Lot {i}", figures[f"fig{i}"]) for i in range(1, 15)]
)

tabs.servable()

## Creating the DataFlow using Pathway

In [None]:
# Running the Pipeline
pw.run()