In [0]:

import mlflow
import mlflow.sklearn
from pyspark.sql.window import Window
from pyspark.sql.functions import avg, lag, col, when, lead, max as spark_max

# Load the registered model (no need to retrain)
mlflow.set_registry_uri("databricks-uc")
model_uri = "models:/layers.gold.buy_wait_model/1"
loaded_model = mlflow.sklearn.load_model(model_uri)
print("âœ“ Model loaded")

# Feature columns (must match training)
feature_columns = ["price", "avg_7d_price", "lag_1_price", "lag_7_price", 
                   "pct_change_1d", "pct_change_7d", "pct_vs_avg"]

# Create features from fresh data
base_df = spark.table("layers.silver.book_master")

w_avg = (Window.partitionBy("source", "book_name").orderBy("scrape_date").rowsBetween(-7, -1))
w_lag = (Window.partitionBy("source", "book_name").orderBy("scrape_date"))

feature_df = base_df \
    .withColumn("avg_7d_price", avg("price").over(w_avg)) \
    .withColumn("lag_1_price", lag("price", 1).over(w_lag)) \
    .withColumn("lag_7_price", lag("price", 7).over(w_lag)) \
    .withColumn("pct_change_1d", (col("price") - col("lag_1_price")) / col("lag_1_price") * 100) \
    .withColumn("pct_change_7d", (col("price") - col("lag_7_price")) / col("lag_7_price") * 100) \
    .withColumn("pct_vs_avg", (col("price") - col("avg_7d_price")) / col("avg_7d_price") * 100)

print("âœ“ Features created")


# GET TODAY'S RECOMMENDATIONS

def get_recommendations(target_date=None):
    if target_date is None:
        target_date = feature_df.select(spark_max("scrape_date")).collect()[0][0]
    
    print(f"\nðŸ“… Recommendations for: {target_date}\n")
    
    today_features = (feature_df
        .filter(col("scrape_date") == target_date)
        .filter(col("avg_7d_price").isNotNull())
        .filter(col("lag_7_price").isNotNull())
        .select(
            col("book_name"), col("source"), col("url"),
            col("price").alias("current_price"),
            col("avg_7d_price"), col("lag_1_price"), col("lag_7_price"),
            col("pct_change_1d"), col("pct_change_7d"), col("pct_vs_avg")
        )
        .toPandas()
    )
    
    if len(today_features) == 0:
        print("No data for this date")
        return None
    
    X_predict = today_features[["current_price", "avg_7d_price", "lag_1_price", "lag_7_price", 
                                 "pct_change_1d", "pct_change_7d", "pct_vs_avg"]].copy()
    X_predict.columns = feature_columns
    
    predictions = loaded_model.predict(X_predict)
    probabilities = loaded_model.predict_proba(X_predict)
    
    today_features["recommendation"] = ["ðŸŸ¢ BUY" if p == 1 else "ðŸ”´ WAIT" for p in predictions]
    today_features["confidence"] = [f"{max(prob)*100:.1f}%" for prob in probabilities]
    
    print("ðŸ“Š Summary:\n")
    for _, row in today_features.sort_values("recommendation", ascending=False).iterrows():
        status = "below avg âœ“" if row["pct_vs_avg"] < 0 else "above avg"
        print(f"{row['recommendation']} {row['book_name']} ({row['source']}): â‚¹{row['current_price']:.0f} ({status}) - {row['confidence']}")
        print(f"   {row['url']}\n")
    
    return today_features

# Run it!
get_recommendations()