In [0]:
!pip install mlflow==2.17.0 \
cffi==1.17.1 \
cloudpickle==3.1.0 \
matplotlib==3.9.2 \
numpy==1.26.4 \
pandas==2.2.3 \
psutil==6.0.0 \
pyarrow==14.0.1 \
scikit-learn==1.5.2 \
lightgbm==4.5.0 \
scipy==1.14.1 \
databricks-feature-engineering==0.6 \
databricks-feature-lookup==1.2.0 \
databricks-sdk==0.32.0 \
pydantic==2.9.2 \
loguru==0.7.3 \
pytest==7.4.4 \
pydantic_settings==2.9.1 \
hyperopt==0.2.7 \
setuptools>=80.7.1

In [0]:
%restart_python

In [0]:
import os
import sys

from loguru import logger
from pyspark.sql import SparkSession

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../src")))

from hotel_reservations.config import ProjectConfig, Tags
from hotel_reservations.models.feature_lookup_model import FeatureLookUpModel

In [0]:
spark = SparkSession.builder.getOrCreate()
tags_dict = {"git_sha": "abcd12345", "branch": "week2", "job_run_id": "1234"}
tags = Tags(**tags_dict)

config = ProjectConfig.from_yaml(config_path="../project_config.yml")

In [0]:
fe_model = FeatureLookUpModel(config=config, tags=tags, spark=spark)

In [0]:
fe_model.create_feature_table()

In [0]:
fe_model.define_feature_function()

In [0]:
fe_model.load_data()

In [0]:
fe_model.feature_engineering()

In [0]:
fe_model.train()

In [0]:
fe_model.register_model()

In [0]:
from pyspark.sql.functions import col

spark = SparkSession.builder.getOrCreate()

test_set = spark.table("mlops_dev.olalubic.test_set").limit(10)
X_test = test_set.drop(
    "repeated_guest",
    "no_of_previous_cancellations",
    "no_of_previous_bookings_not_canceled",
    "avg_price_per_room",
    "no_of_special_requests",
    "booking_status",
)
X_test = X_test.withColumn("Client_ID", col("Client_ID").cast("string"))
fe_model = FeatureLookUpModel(config=config, tags=tags, spark=spark)
predictions = fe_model.load_latest_model_and_predict(X_test)
logger.info(predictions)

In [0]:
predictions.toPandas().head()