## **Example run with mlflow.prophet**

This examples uses mlflow built in *mlflow.prophet.log_model* to log the model

Example is copied from mlflow offical github https://github.com/mlflow/mlflow/blob/master/examples/prophet/train.py

In [29]:
import json

import numpy as np
import pandas as pd
from prophet import Prophet, serialize
from prophet.diagnostics import cross_validation, performance_metrics

import mlflow
from mlflow.models import infer_signature

SOURCE_DATA = (
    "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_retail_sales.csv"
)
ARTIFACT_PATH = "model"
np.random.seed(12345)

In [30]:
def extract_params(pr_model):
    return {attr: getattr(pr_model, attr) for attr in serialize.SIMPLE_ATTRIBUTES}


sales_data = pd.read_csv(SOURCE_DATA)

In [31]:
sales_data.head()

Unnamed: 0,ds,y
0,1992-01-01,146376
1,1992-02-01,147079
2,1992-03-01,159336
3,1992-04-01,163669
4,1992-05-01,170068


In [32]:
%env MLFLOW_TRACKING_URI=sqlite:///mlrun_prophet.db

env: MLFLOW_TRACKING_URI=sqlite:///mlrun_prophet.db


In [36]:
mlflow.set_tracking_uri("http://127.0.0.1:8080")

In [None]:
with mlflow.start_run():
    model = Prophet().fit(sales_data)

    params = extract_params(model)

    metric_keys = ["mse", "rmse", "mae", "mape", "mdape", "smape", "coverage"]
    metrics_raw = cross_validation(
        model=model,
        horizon="365 days",
        period="180 days",
        initial="710 days",
        parallel="threads",
        disable_tqdm=True,
    )
    cv_metrics = performance_metrics(metrics_raw)
    metrics = {k: cv_metrics[k].mean() for k in metric_keys}

    print(f"Logged Metrics: \n{json.dumps(metrics, indent=2)}")
    print(f"Logged Params: \n{json.dumps(params, indent=2)}")

    train = model.history
    predictions = model.predict(model.make_future_dataframe(30))
    signature = infer_signature(train, predictions)

    mlflow.prophet.log_model(model, artifact_path=ARTIFACT_PATH, signature=signature)
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    model_uri = mlflow.get_artifact_uri(ARTIFACT_PATH)
    print(f"Model artifact logged to: {model_uri}")


# loaded_model = mlflow.prophet.load_model(model_uri)

# forecast = loaded_model.predict(loaded_model.make_future_dataframe(60))

# print(f"forecast:\n${forecast.head(30)}")

After setting up the docker image and running the container (see more on notebook 4. Docker Quick Guide), we attempt to call the api

In short, you want to

**Connect to mlflow tracking uri**

`export MLFLOW_TRACKING_URI="http://127.0.0.1:8080"`

`$env:MLFLOW_TRACKING_URI="http://127.0.0.1:8080"`

**Build image**

`mlflow models build-docker --model-uri "models:/<model name>/<version>" --name "<image name>"`

**Run container**

`docker run -p 5002:8080 <image name>`

In [62]:
import requests

endpoint = "http://localhost:5002/invocations" # make sure the port number matches the one you input in terminal
data = {
    "dataframe_split": {  
        "columns": ['ds','y','floor','t','y_scaled'],
        "data": [['2016-06-01',0,0,0,0],['2016-06-01',10,10,10,120]],
        
    }
}
# do a post request
response = requests.post(endpoint, json=data)
print(response.json())

{'predictions': [{'ds': '2016-06-01T00:00:00', 'trend': 464314.1915900584, 'yhat_lower': 456857.610812237, 'yhat_upper': 477214.8479718002, 'trend_lower': 464314.1915900584, 'trend_upper': 464314.1915900584, 'additive_terms': 2862.888577091087, 'additive_terms_lower': 2862.888577091087, 'additive_terms_upper': 2862.888577091087, 'yearly': 2862.888577091087, 'yearly_lower': 2862.888577091087, 'yearly_upper': 2862.888577091087, 'multiplicative_terms': 0.0, 'multiplicative_terms_lower': 0.0, 'multiplicative_terms_upper': 0.0, 'yhat': 467177.0801671495}, {'ds': '2016-06-01T00:00:00', 'trend': 464314.1915900584, 'yhat_lower': 457352.15333530406, 'yhat_upper': 477562.4071601084, 'trend_lower': 464314.1915900584, 'trend_upper': 464314.1915900584, 'additive_terms': 2862.888577091087, 'additive_terms_lower': 2862.888577091087, 'additive_terms_upper': 2862.888577091087, 'yearly': 2862.888577091087, 'yearly_lower': 2862.888577091087, 'yearly_upper': 2862.888577091087, 'multiplicative_terms': 0.0,

**Noted that due to the nature of prophet, you will need to declare extra columns as demonstrated and pass in some random values inside (all zeros in this demonstration)**

## **Logging with pyfunc**

In [38]:
import json

import numpy as np
import pandas as pd
from prophet import Prophet, serialize
from prophet.diagnostics import cross_validation, performance_metrics

import mlflow
from mlflow.models import infer_signature

SOURCE_DATA = (
    "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_retail_sales.csv"
)
ARTIFACT_PATH = "model"
np.random.seed(12345)

sales_data = pd.read_csv(SOURCE_DATA)

In [5]:
%env MLFLOW_TRACKING_URI=sqlite:///prophet_mlrun.db

env: MLFLOW_TRACKING_URI=sqlite:///prophet_mlrun.db


In [6]:
mlflow.set_tracking_uri("http://127.0.0.1:8080")

In [39]:
class ProphetWrapper(mlflow.pyfunc.PythonModel):
    def __init__(self):
        self.model = Prophet()
    def fit(self):
        self.model.fit(sales_data)
        train = self.model.history
        predictions = self.model.predict(model.make_future_dataframe(30))
        signature = infer_signature(pd.DataFrame({'ds': [len(train)]}), predictions)

    def predict(self, context, model_input):
        future = self.model.make_future_dataframe(periods=model_input)
        forecast = self.model.predict(future)
        return {'prediction':forecast}

In [57]:
import cloudpickle
conda_env = {
    'channels': ['defaults'],
    'dependencies': [
        'python=3.11.5',
        'pip',
          {
            'pip': [
                'mlflow',
                'pillow',
                'cloudpickle=={}'.format(cloudpickle.__version__),
                'cmdstanpy',
                'cmdstanpy.compilation',
                'cffi',
                'defusedxml',
                'holidays',
                'importlib-resources',
                'ipython',
                'matplotlib',
                'numpy',
                'packaging',
                'pandas',
                'prophet',
                'stanio',
                'tqdm'
                
            ],
          },
    ],
    'name': 'prophet_env'
}

In [58]:
custom_model = ProphetWrapper()
custom_model.fit()
with mlflow.start_run():
    # log the Python function model
    mlflow.pyfunc.log_model(
        python_model=custom_model,
        conda_env=conda_env,
        artifact_path="prophetwrapper",
        registered_model_name="prophetwrapper_model",
    )

17:50:59 - cmdstanpy - INFO - Chain [1] start processing
17:50:59 - cmdstanpy - INFO - Chain [1] done processing
  inputs = _infer_schema(model_input)
Registered model 'prophetwrapper_model' already exists. Creating a new version of this model...
2024/01/12 17:51:00 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: prophetwrapper_model, version 5
Created version '5' of model 'prophetwrapper_model'.


In [59]:
loaded_model1 = mlflow.pyfunc.load_model("models:/prophetwrapper_model/5")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [60]:
loaded_model1.predict(30)['prediction'].tail()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
318,2016-05-27,464118.007526,468510.505073,488386.588114,464118.007526,464118.007526,14683.607789,14683.607789,14683.607789,14683.607789,14683.607789,14683.607789,0.0,0.0,0.0,478801.615315
319,2016-05-28,464164.339417,465386.892923,487087.198938,464164.339417,464164.339417,12102.932457,12102.932457,12102.932457,12102.932457,12102.932457,12102.932457,0.0,0.0,0.0,476267.271874
320,2016-05-29,464210.671308,463630.675942,483961.969856,464210.671308,464210.671308,9568.611892,9568.611892,9568.611892,9568.611892,9568.611892,9568.611892,0.0,0.0,0.0,473779.2832
321,2016-05-30,464257.003199,460810.688351,481699.514173,464257.003199,464257.003199,7142.929363,7142.929363,7142.929363,7142.929363,7142.929363,7142.929363,0.0,0.0,0.0,471399.932562
322,2016-05-31,464303.33509,459530.554542,479879.276449,464303.33509,464303.33509,4885.008122,4885.008122,4885.008122,4885.008122,4885.008122,4885.008122,0.0,0.0,0.0,469188.343212


In [50]:
import requests

endpoint = "http://localhost:5002/invocations" # make sure the port number matches the one you input in terminal
data = {
    "dataframe_split": { 
        "columns": ['ds'],
        "data": [[30]]
    }
}
# do a post request
response = requests.post(endpoint, json=data)
print(response.json())

ConnectionError: HTTPConnectionPool(host='localhost', port=5002): Max retries exceeded with url: /invocations (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000015C8A5CFED0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))