In [None]:
%%html
<style> 
table { float:left; margin-bottom: 1em; } 
table + * { content: ""; clear: both; display: table; }
</style>

In [None]:
#!pip install pycaret[analysis,parallel]

In [None]:
from pycaret import regression

In [None]:
from pycaret.datasets import get_data
dataset = get_data("diamond")

In [None]:
dataset = get_data("diamond", profile=True)

In [None]:
data = dataset.sample(frac=0.9, random_state=786)

In [None]:
data_unseen = dataset.drop(data.index)

In [None]:
data.reset_index(drop=True, inplace=True)
data_unseen.reset_index(drop=True, inplace=True)

In [None]:
print("Data for Modeling " + str(data.shape))

In [None]:
print("Unseen Data for Prediction " + str(data_unseen.shape))

### MLFlow

In [None]:
import mlflow
mlflow.set_tracking_uri("http://localhost:5000")

### Preprocessing

In [None]:
experiment = regression.setup(data = data,
                 target = "Price",
                 session_id = 123,
                 normalize = True,
                 transformation = True,
                 transform_target = True,                 
                 remove_multicollinearity = True,
                 multicollinearity_threshold = 0.95,
                 bin_numeric_features = ["Carat Weight"],
                 verbose = True, 
                 use_gpu = True,                 
                 log_experiment = True,
                 log_plots = True,
                 log_data = True,                 
                 experiment_name = "diamond1"
                 )
                 

In [None]:
best_model = regression.compare_models(exclude = [])

In [None]:
tuned_best = regression.tune_model(best_model)

In [None]:
model = regression.create_model("dt")

In [None]:
import numpy as np

params = {
    # "n_neighbors": np.arange(2, 11, 2),
    # "leaf_size": np.arange(10,40, 5)
}

tuned_model = regression.tune_model(model)

### Evaluate model

In [None]:
regression.plot_model(tuned_model, plot = "pipeline")

In [None]:
regression.plot_model(tuned_model, plot = "residuals_interactive")

In [None]:
regression.plot_model(tuned_model, plot = "learning")


In [None]:
regression.plot_model(tuned_model, plot = "feature")

In [None]:
regression.plot_model(tuned_model, plot = "feature_all")

In [None]:
regression.plot_model(tuned_model, plot = "parameter")

In [None]:
#regression.plot_model(tuned_model, plot = "tree")

In [None]:
regression.predict_model(tuned_best)

### Finish and Deploy

In [None]:
final_best = regression.finalize_model(tuned_best)

In [None]:
model_output = ".././models/diamond-pipeline"
regression.save_model(final_best, model_output)

### Consume Model (Pycaret)

In [None]:
model = regression.load_model(model_output)

In [None]:
print(model)

In [None]:
predictions = regression.predict_model(model, data=data_unseen)

In [None]:
predictions.head()

In [None]:
from pycaret.utils.generic import check_metric

In [None]:
check_metric(predictions.Price, predictions.prediction_label, "R2")

In [None]:
# Loading full path 
pipeline = regression.load_model("/home/ac/projects/mlops_bootcamp/mlartifacts/143810263215121988/eefdfe7972c640a3b193fe3f49f9bc18/artifacts/model/model")

In [None]:
print(pipeline)

### Consume Model (MLFlow)

In [None]:
new_data = data_unseen.drop(["Price"], axis=1)

In [None]:
import mlflow
logged_model = 'runs:/eefdfe7972c640a3b193fe3f49f9bc18/model'

# Load model as a PyFuncModel.
model = mlflow.pyfunc.load_model(logged_model)

In [None]:
print(model)

In [None]:
# Predict on a Pandas DataFrame.
import pandas as pd
predictions = model.predict(pd.DataFrame(new_data))
predictions