# 02 — Analysis & Visualizations
Descriptive stats, cancellations, behaviors, drivers, and ops metrics.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scripts.analysis import (
    popular_vehicle_types, avg_distance_and_value, ratings_distribution,
    common_cancellation_reasons, frequent_cancellers, cancellations_by_time,
    correlation_metrics, vtat_ctat_by_vehicle, peak_demand, booking_status_over_time
)

df = pd.read_csv("data/cleaned_Dataset.csv")
df.head()

In [None]:
# Popular vehicle types
popular_vehicle_types(df)

In [None]:
# Average distance & booking value
avg_distance_and_value(df)

In [None]:
# Ratings distribution
ratings_distribution(df)

In [None]:
# Common cancellation reasons
common_cancellation_reasons(df, who="customer").head(10)

In [None]:
# Frequent cancellers
frequent_cancellers(df).head(10)

In [None]:
# Cancellations by hour (plot)
series = cancellations_by_time(df, freq="H")
series.plot(title="Cancellations by Hour")
plt.xlabel("Time")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig("reports/cancellations_by_hour.png", dpi=150)
plt.show()

In [None]:
# Correlation matrix
correlation_metrics(df)

In [None]:
# VTAT/CTAT by vehicle type
vtat_ctat_by_vehicle(df)

In [None]:
# Peak demand by hour (plot)
peak = peak_demand(df)
peak.plot(kind="bar", title="Peak Demand by Hour")
plt.xlabel("Hour of Day")
plt.ylabel("Bookings")
plt.tight_layout()
plt.savefig("reports/peak_demand_by_hour.png", dpi=150)
plt.show()

In [None]:
# Booking status over time (daily)
daily = booking_status_over_time(df, freq="D")
daily.head()