In [0]:
import mlflow
import mlflow.spark
from mlflow.exceptions import RestException
from pyspark.ml.feature import StringIndexer
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [0]:
interactions = spark.table("MLOps.data.als_interactions_30d")

product_indexer = StringIndexer(
    inputCol="product_id",
    outputCol="product_id_idx",
    handleInvalid="skip"
)
interactions_indexed = product_indexer.fit(interactions).transform(interactions)

train, test = interactions_indexed.randomSplit([0.8, 0.2], seed=42)
train.write.mode("overwrite").saveAsTable("MLOps.data.train_table")
test.write.mode("overwrite").saveAsTable("MLOps.data.test_table")

experiment_path = "/Workspace/Users/jung@ap-com.co.jp/mlops_demo_model/als_recommendation"
experiment = mlflow.get_experiment_by_name(experiment_path)

runs_df = mlflow.search_runs(
    experiment_ids=[experiment.experiment_id]
)
training_runs = runs_df[runs_df["tags.mlflow.runName"] == "training"]
latest_training_run = training_runs.sort_values(
    by="start_time", ascending=False
).iloc[0]

model_uri = f"runs:/{latest_training_run.run_id}/als_model"
model = mlflow.spark.load_model(model_uri)
print(model_uri)

predictions = model.transform(test)
evaluator = RegressionEvaluator(
    metricName="rmse",
    labelCol="interaction_weight",
    predictionCol="prediction"
)
rmse = evaluator.evaluate(predictions)

with mlflow.start_run(run_name="evaluation") as run:
    mlflow.log_metric("rmse", rmse)
    mlflow.log_param("evaluated_model_uri", model_uri)
    print(f"Evaluation finished. RMSE: {rmse}")