In [35]:
import os

import numpy as np
import pandas as pd

In [36]:
AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY", None); 
AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY", None); 

MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", None); 
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", None); 
MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", None); 
MLFLOW_S3_ENDPOINT_URL = os.getenv("MLFLOW_S3_ENDPOINT_URL", None); 
MLFLOW_S3_IGNORE_TLS = os.getenv("MLFLOW_S3_IGNORE_TLS", None); 
MLFLOW_BUCKET_NAME = os.getenv("MLFLOW_BUCKET_NAME", None); 
MLFLOW_SERVER = os.getenv("MLFLOW_SERVER", None);
MLFLOW_EXPERIMENT_NAME = os.getenv("MLFLOW_EXPERIMENT_NAME", "mlzoomcamp");


print("MINIO_ACCESS_KEY", MINIO_ACCESS_KEY)
print("MINIO_SECRET_KEY", MINIO_SECRET_KEY)
print("MLFLOW_TRACKING_URI", MLFLOW_TRACKING_URI)
print("MLFLOW_S3_ENDPOINT_URL", MLFLOW_S3_ENDPOINT_URL)
print("MLFLOW_S3_IGNORE_TLS", MLFLOW_S3_IGNORE_TLS)
print("MLFLOW_BUCKET_NAME", MLFLOW_BUCKET_NAME)
print("MLFLOW_SERVER", MLFLOW_SERVER)

MINIO_ACCESS_KEY test_menio_access_key
MINIO_SECRET_KEY test_minio_secret_key
MLFLOW_TRACKING_URI http://mlflow:5000
MLFLOW_S3_ENDPOINT_URL http://minio:9000
MLFLOW_S3_IGNORE_TLS true
MLFLOW_BUCKET_NAME mlflow-artifacts
MLFLOW_SERVER http://mlflow:5000


In [37]:
import uuid
from datetime import datetime

import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature


In [38]:
# Load the MLflow experiment
mlflow.set_tracking_uri(MLFLOW_SERVER)

experiment = mlflow.search_experiments(filter_string=f"name='{MLFLOW_EXPERIMENT_NAME}'")[0]
mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)

<Experiment: artifact_location='s3://mlflow-artifacts/experiments/', creation_time=1737476847625, experiment_id='141662139641609146', last_update_time=1737476847625, lifecycle_stage='active', name='mlzoomcamp', tags={}>

In [39]:
# best_run = mlflow.search_runs(experiment.experiment_id, filter_string="metrics.rmse=min(metrics.rmse)")

runs_df = mlflow.search_runs(experiment_ids=[experiment.experiment_id])
best_run = runs_df.sort_values(by="metrics.rmse", ascending=True).iloc[0]

In [40]:
best_run

run_id                                                                d3eac55f4ee341988b878370d21f735a
experiment_id                                                                       141662139641609146
status                                                                                        FINISHED
artifact_uri                                         s3://mlflow-artifacts/experiments/d3eac55f4ee3...
start_time                                                            2025-01-21 16:39:18.829000+00:00
end_time                                                              2025-01-21 16:39:25.433000+00:00
metrics.neg_root_mean_squared_error                                                          -0.271748
metrics.rmse                                                                                  0.277724
params.mlp_regressor__power_t                                                                     None
params.mlp_regressor__activation                                         

In [41]:
# Load the model from the best run
model_path = best_run.artifact_uri
print(model_path)

s3://mlflow-artifacts/experiments/d3eac55f4ee341988b878370d21f735a/artifacts


In [57]:
from mlflow.tracking.artifact_utils import _download_artifact_from_uri
from mlflow.utils.model_utils import (
    _add_code_from_conf_to_system_path,
    _get_flavor_configuration,
)

import pickle

if not os.path.isdir('model'):
    os.mkdir('model')



"""
local_model_path = _download_artifact_from_uri(artifact_uri=model_uri, output_path=dst_path)
    flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME)
    _add_code_from_conf_to_system_path(local_model_path, flavor_conf)
    sklearn_model_artifacts_path = os.path.join(local_model_path, flavor_conf["pickled_model"])
    serialization_format = flavor_conf.get("serialization_format", SERIALIZATION_FORMAT_PICKLE)
    return _load_model_from_local_file(
        path=sklearn_model_artifacts_path, serialization_format=serialization_format
    )
"""

def load_sklearn_model(model_uri, dst_path):
    local_model_path = _download_artifact_from_uri(artifact_uri=model_uri, output_path=dst_path)
    # print(local_model_path)
    # shutil.copytree(f'{local_model_path}/model', f'{local_model_path}/MLmodel')
    local_model_path = os.path.join(local_model_path, 'MLmodel')
    flavor_conf = _get_flavor_configuration(
        model_path=local_model_path,
        flavor_name="sklearn"
    )
    _add_code_from_conf_to_system_path(local_model_path, flavor_conf)
    sklearn_model_artifacts_path = os.path.join(local_model_path, flavor_conf["pickled_model"])
    serialization_format = flavor_conf.get("serialization_format", "pickle")
    with open(sklearn_model_artifacts_path, 'rb') as f:
        loaded_model = pickle.load(f)
    return loaded_model
    
# loaded_model = mlflow.sklearn.load_model(model_uri=model_path, dst_path='model')
loaded_model = load_sklearn_model(model_uri=model_path, dst_path='model')

In [59]:
import re

# Function to convert camelCase or PascalCase to snake_case
def to_snake_case(name):
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

In [61]:
df_full = pd.read_csv("train.csv")

df_full.columns = [to_snake_case(col) for col in df_full.columns]
df_full.drop(
    columns=["id", "alley", "pool_qc", "fence", "misc_feature", "mas_vnr_type", "fireplace_qu", "lot_frontage"],
    inplace=True
    )
df_full.dropna(inplace=True)

TARGET_COLUMN = "sale_price"
del df_full[TARGET_COLUMN]

df_full_dict = df_full.to_dict(orient='records')

In [62]:
import random

N = 10
sample_x = random.sample(df_full_dict, k=N)

In [64]:
y = loaded_model.predict(sample_x)

In [65]:
y

array([10.94, 11.95, 11.99, 12.  , 12.  , 11.09, 11.03, 11.91, 11.09,
       12.  ])

In [66]:
y_prices = np.exp(y) - 1

In [67]:
y_prices

array([ 56386.34314617, 154816.14657623, 161134.35418627, 162753.791419  ,
       162753.791419  ,  65511.74612369,  61696.580797  , 148745.67943014,
        65511.74612369, 162753.791419  ])