In [0]:
-- This creates our first table that reads the raw taxi data
CREATE OR REFRESH STREAMING LIVE TABLE dev_1899989130012056.nyctaxi_dlt_sql.taxi_trips_bronze
COMMENT "Raw NYC taxi trip data"
AS SELECT 
  *,
  current_timestamp() as ingestion_time,
  _metadata.file_path as source_file
FROM cloud_files(
  "/databricks-datasets/nyctaxi/tripdata/yellow/yellow_tripdata_2019-*.csv.gz",
  "csv",
  map(
    "cloudFiles.inferColumnTypes", "true",
    "header", "true"
  )
);

In [0]:
-- This creates a cleaned version of our data
CREATE OR REFRESH STREAMING LIVE TABLE dev_1899989130012056.nyctaxi_dlt_sql.taxi_trips_silver
(
  CONSTRAINT valid_fare EXPECT (fare_amount >= 0) ON VIOLATION DROP ROW,
  CONSTRAINT valid_trip_distance EXPECT (trip_distance > 0) ON VIOLATION DROP ROW
)
COMMENT "Cleaned taxi trips with data quality rules"
AS SELECT
  tpep_pickup_datetime as pickup_time,
  tpep_dropoff_datetime as dropoff_time,
  passenger_count,
  trip_distance,
  fare_amount,
  tip_amount,
  total_amount,
  payment_type,
  PULocationID,
  DOLocationID,
  CASE 
    WHEN payment_type = 1 THEN 'Credit card'
    WHEN payment_type = 2 THEN 'Cash'
    WHEN payment_type = 3 THEN 'No charge'
    WHEN payment_type = 4 THEN 'Dispute'
    ELSE 'Unknown'
  END as payment_method,
  DATE(tpep_pickup_datetime) as trip_date
FROM STREAM(LIVE.taxi_trips_bronze)
WHERE year(tpep_pickup_datetime) = 2019;

In [0]:
-- Daily summary statistics
CREATE OR REFRESH LIVE TABLE dev_1899989130012056.nyctaxi_dlt_sql.daily_taxi_summary
COMMENT "Daily aggregated metrics for taxi trips"
AS SELECT
  trip_date,
  COUNT(*) as total_trips,
  SUM(fare_amount) as total_fares,
  AVG(trip_distance) as avg_distance,
  AVG(tip_amount) as avg_tip,
  MAX(tip_amount) as max_tip,
  COUNT(DISTINCT payment_method) as payment_methods_used
FROM LIVE.taxi_trips_silver
GROUP BY trip_date;

-- Popular routes analysis
CREATE OR REFRESH LIVE TABLE dev_1899989130012056.nyctaxi_dlt_sql.popular_routes
COMMENT "Most popular pickup and dropoff locations"
AS SELECT
  PULocationID as pickup_location,
  DOLocationID as dropoff_location,
  COUNT(*) as trip_count,
  AVG(total_amount) as avg_fare,
  AVG(trip_distance) as avg_distance
FROM LIVE.taxi_trips_silver
GROUP BY PULocationID, DOLocationID
HAVING COUNT(*) > 100
ORDER BY trip_count DESC
LIMIT 100;