In [1]:
import pandas as pd
import numpy as np
import os
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import mlflow
import joblib
import boto3
from mlflow.tracking import MlflowClient

In [2]:
os.environ["AWS_PROFILE"] = "park_pulse" 

TRACKING_SERVER_HOST = "ec2-3-217-110-64.compute-1.amazonaws.com"
mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:5000")

In [3]:
client = MlflowClient(tracking_uri=f"http://{TRACKING_SERVER_HOST}:5000")


In [4]:
file_path = 'data/test_inference_data.csv'
X_test = pd.read_csv(file_path)

In [5]:
file_path = 'data/groundtruth_inference_data.csv'
y_test = pd.read_csv(file_path)

In [6]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="metrics.r2 > 0.99",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.r2 DESC"])

In [7]:
best_run_id ="d6f41a3b0a394694af2dca3e616a6e5b"
client.download_artifacts(run_id=best_run_id, path='scales_pickle/preprocessor.pkl', dst_path='.')

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/Users/johnomole/Desktop/parkpulse/mlflow/scales_pickle/preprocessor.pkl'

In [9]:
import mlflow
logged_model = 'runs:/d6f41a3b0a394694af2dca3e616a6e5b/models_pickle'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

In [15]:
y_pred = loaded_model.predict(X_test)

In [16]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'The mse is {mse} and the r2 score is: {r2}')

The mse is 0.02208210374351211 and the r2 score is: 0.9956144128460448
