<a href="https://colab.research.google.com/github/harshitasharma111/Amazon_Clone/blob/main/CapstoneSummerAnalytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pathway





In [None]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.io import push_notebook
import pathway as pw

In [None]:
df = pd.read_csv('/content/dataset (1).csv')


In [None]:
def preprocess(df):
    # Normalize occupancy
    df['occupancy_rate'] = df['Occupancy'] / df['Capacity']
    # Encode vehicle type
    vehicle_map = {'car': 1.0, 'bike': 0.5, 'truck': 1.5}
    df['vehicle_type_weight'] = df['VehicleType'].map(vehicle_map)
    # Map traffic string values to numeric
    traffic_map = {'low': 1.0, 'average': 2.0, 'high': 3.0}
    df['traffic'] = df['TrafficConditionNearby'].map(traffic_map)
    # Ensure queue length and traffic are numeric
    df['queue_length'] = df['QueueLength'].fillna(0).astype(float)
    df['traffic'] = df['traffic'].fillna(0).astype(float)  # Use the mapped column here!
    df['is_special_day'] = df['IsSpecialDay'].astype(int)

    df['LastUpdatedDate'] = df['LastUpdatedDate'].astype(str)
    df['LastUpdatedTime'] = df['LastUpdatedTime'].astype(str)

    df['full_datetime'] = pd.to_datetime(
    df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
    dayfirst=True
    )

    return df


In [None]:
df = preprocess(df)


In [None]:
def baseline_linear_price(prev_price, Occupancy, Capacity, alpha=2.0):
    # Price increases linearly with occupancy
    return prev_price + alpha * (Occupancy / Capacity)


In [None]:
def demand_function(row, alpha=1.0, beta=0.5, gamma=0.3, delta=1.0, epsilon=0.8):
    # Linear demand function as per problem statement
    demand = (
        alpha * row['occupancy_rate'] +
        beta * row['queue_length'] -
        gamma * row['traffic'] +
        delta * row['is_special_day'] +
        epsilon * row['vehicle_type_weight']
    )
    return demand

In [None]:
def demand_based_price(base_price, demand, lambda_=0.3):
    # Normalize demand to [0, 1] for stability
    norm_demand = (demand - demand.min()) / (demand.max() - demand.min() + 1e-6)
    price = base_price * (1 + lambda_ * norm_demand)
    # Bound price between 0.5x and 2x base price
    price = np.clip(price, 0.5 * base_price, 2.0 * base_price)
    return price

In [None]:
def haversine(lat1, lon1, lat2, lon2):
    # Calculate distance between two lat/lon points (in km)
    R = 6371
    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    dphi = np.radians(lat2 - lat1)
    dlambda = np.radians(lon2 - lon1)
    a = np.sin(dphi/2)**2 + np.cos(phi1)*np.cos(phi2)*np.sin(dlambda/2)**2
    return 2*R*np.arcsin(np.sqrt(a))

In [None]:
def competitive_price(row, lots_df, base_price, demand_price, radius_km=0.5):
    # Find nearby lots
    dists = haversine(row['Latitude'], row['Longitude'],
                      lots_df['Latitude'], lots_df['Longitude'])
    nearby = lots_df[(dists < radius_km) & (lots_df['ID'] != row['ID'])]
    if len(nearby) == 0:
        return demand_price
    # Compare prices
    min_competitor_price = nearby['current_price'].min()
    # If lot is full and others are cheaper, suggest reroute or lower price
    if row['Occupancy'] >= row['Capacity'] and demand_price > min_competitor_price:
        return min_competitor_price - 0.5
    # If others are more expensive, can increase price
    elif demand_price < min_competitor_price:
        return min(min_competitor_price - 0.1, 2.0 * base_price)
    else:
        return demand_price

In [None]:
base_price = 10.0
alpha, beta, gamma, delta, epsilon = 1.0, 0.5, 0.3, 1.0, 0.8
lambda_ = 0.3

df['prev_price'] = base_price
df['demand'] = df.apply(lambda row: demand_function(row, alpha, beta, gamma, delta, epsilon), axis=1)
df['demand_price'] = demand_based_price(base_price, df['demand'], lambda_)


In [None]:
# For competitive model, simulate current_price for all lots
df['current_price'] = df['demand_price']
for idx, row in df.iterrows():
    df.at[idx, 'competitive_price'] = competitive_price(row, df, base_price, row['demand_price'])

In [None]:
# Example: Plot price evolution for one parking lot
lot_id = df['ID'].unique()[0]
lot_df = df[df['ID'] == lot_id].sort_values('full_datetime')



In [None]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource


source = ColumnDataSource(data={
    "full_datetime": lot_df['full_datetime'],
    "baseline_price": lot_df['prev_price'],
    "demand_price": lot_df['demand_price'],
    "competitive_price": lot_df['competitive_price'],
})



def price_plotter(source):
    fig = figure(
        height=400,
        width=800,
        title=f"Daily Parking Price Evolution for Lot {lot_id}",
        x_axis_type="datetime"
    )
    fig.line("full_datetime", "baseline_price", source=source, line_width=2, color="blue", legend_label="Baseline")
    fig.line("full_datetime", "demand_price", source=source, line_width=2, color="green", legend_label="Demand-Based")
    fig.line("full_datetime", "competitive_price", source=source, line_width=2, color="red", legend_label="Competitive")
    fig.scatter("full_datetime", "demand_price", source=source, size=6, color="black", legend_label="Demand Points")
    fig.xaxis.axis_label = "Time"
    fig.yaxis.axis_label = "Price"
    fig.legend.location = "top_left"
    return fig

p = price_plotter(source)
show(p)

In [None]:
def suggest_reroute(row, lots_df, radius_km=0.5):
    dists = haversine(row['Latitude'], row['Longitude'],
                      lots_df['Latitude'], lots_df['Longitude'])
    nearby = lots_df[(dists < radius_km) & (lots_df['Occupancy'] < lots_df['Capacity'])]
    if len(nearby) > 0:
        # Suggest the cheapest available nearby lot
        suggestion = nearby.loc[nearby['current_price'].idxmin()]
        return suggestion['ID']
    return None

df['reroute_to'] = df.apply(lambda row: suggest_reroute(row, df), axis=1)