In [6]:
from pyspark.sql.functions import max, min, avg, count, round
from pyspark.sql import SparkSession

In [7]:
class MockDBUtils:
    class Widgets:
        def __init__(self):
            self.values = {}

        def text(self, name, default_value=""):
            self.values[name] = default_value

        def get(self, name):
            return self.values.get(name, "")

    def __init__(self):
        self.widgets = self.Widgets()


In [8]:
dbutils = MockDBUtils()
dbutils.widgets.text("catalog", "citibike_dev")  # ✅ default
catalog = dbutils.widgets.get("catalog") or "citibike_dev"

In [11]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("Citibike Data") \
    .getOrCreate()

In [12]:
df = spark.read.table(f"{catalog}.`02_silver`.jc_citibike")

In [13]:

df = df.groupBy("trip_start_date").agg(
    round(max("trip_duration_mins"),2).alias("max_trip_duration_mins"),
    round(min("trip_duration_mins"),2).alias("min_trip_duration_mins"),
    round(avg("trip_duration_mins"),2).alias("avg_trip_duration_mins"),
    count("ride_id").alias("total_trips")
)

In [13]:
df.write.\
    mode("overwrite").\
    option("overwriteSchema", "true").\
    saveAsTable(f"{catalog}.`03_gold`.daily_ride_summary")