In [10]:
#!/usr/bin/env python
import argparse
import itertools
import logging
import json
import pandas as pd
import wandb
import mlflow.sklearn
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error

logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
logger = logging.getLogger()


run = wandb.init(job_type="test")



[34m[1mwandb[0m: Currently logged in as: [33mahmedaladdin[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [20]:
logger.info("Downloading and reading the exported model")
model_export_path = run.use_artifact('nyc_airbnb/random_forest_model:prod').download()

sk_pipe  =  mlflow.sklearn.load_model(model_export_path)
processed_features = list(itertools.chain.from_iterable([x[2] for x in sk_pipe['Preprocessor'].transformers]))

2023-01-15 12:02:56,441 Downloading and reading the exported model
[34m[1mwandb[0m: Downloading large artifact random_forest_model:prod, 75.30MB. 6 files... Done. 0:0:0.0


In [None]:
test_data_path = run.use_artifact('ahmedaladdin/nyc_airbnb/random_forest_model:latest').file()
df = pd.read_csv(test_data_path, low_memory=False)

# Extract the target from the features
logger.info("Extracting target from dataframe")
X_test = df.copy()
y_test = X_test.pop("price")


# Compute r2 and MAE
logger.info("Scoring")
r_squared = sk_pipe.score(X_test[processed_features], y_test)

y_pred = sk_pipe.predict(X_test[processed_features])
mae = mean_absolute_error(y_test, y_pred)

logger.info(f"Score: {r_squared}")
logger.info(f"MAE: {mae}")

######################################
# Here we save r_squared under the "r2" key
run.summary['r2'] = r_squared
# Now log the variable "mae" under the key "mae".
run.summary["mae"] = mae

In [5]:
import sklearn
