#GOLD PIPELINE

##IMPORT GOLD UTILS
---

In [0]:
%run "./gold_utils"

##WIDGETS
---

In [0]:
##### YEARS ##### 

### SILVER PATH ###
ADLS_SILVER = "abfss://citibike@databricksjm.dfs.core.windows.net/silver"

### AVAILABLE YEARS ###
years = []
for silver_dir in dbutils.fs.ls(ADLS_SILVER):
    dirname = silver_dir.name
    if dirname.startswith("_"):
        continue
    idx = dirname.index("=") + 1
    year_dir = dirname[idx:idx+4]
    years.append(year_dir)
years.sort()

### WIDGET ###
dbutils.widgets.multiselect(
    name="years",
    defaultValue=years[-1],
    choices=years,
    label="Select years"
)
YEARS_RAW = dbutils.widgets.get("years")
YEARS_STR = [y.strip() for y in YEARS_RAW.split(",") if y.strip()] 

if not YEARS_STR:
    raise ValueError("No years selected")

In [0]:

##### FILTER & AGGREGATE #####

dbutils.widgets.text("duration_min", "59")
dbutils.widgets.text("duration_max", "89736")
dbutils.widgets.dropdown("product", "top_stations", ["top_stations"])
dbutils.widgets.text("top_n", "10")

In [0]:
##### GOLD NAME #####

dbutils.widgets.text("goldname", "")
GOLDNAME = dbutils.widgets.get("goldname")
if GOLDNAME == "":
    raise ValueError("No goldname entered")

##CREATE DATAFRAME BY SELECTED YEARS
---

In [0]:
### CREATE DF ###
df = (
    spark.read
    .format("delta")
    .load(ADLS_SILVER)
    .where(col("year").isin([int(y) for y in YEARS_STR]))
)

##FILTER & AGGREGATE
---

In [0]:
duration_min = int(dbutils.widgets.get("duration_min"))
duration_max = int(dbutils.widgets.get("duration_max"))
product = dbutils.widgets.get("product")
top_n = int(dbutils.widgets.get("top_n"))

### DURATION FILTER ###
if duration_max == -1:
    duration_max = 99999999999
df = (
    duration_filter(df, duration_min, duration_max)
)
### PRODUCT ###
if product == "top_stations":
    df = (
        top_stations(df, top_n)
    )



##SAVE
---

In [0]:
### GOLD PATH ###
ADLS_GOLD = "abfss://citibike@databricksjm.dfs.core.windows.net/gold"

### SAVE TO GOLD ADLS ###
(
    df.write
    .format("delta")
    .mode("overwrite")
    .save(f"{ADLS_GOLD}/{GOLDNAME}")
)