In [13]:
import sys
import os
from pathlib import Path
import pandas as pd
import datetime as dt
from mlrun.platforms import auto_mount


DATA_DIR = "data"
MLRUN_ENV_FILE = "mlrun.env"

import mlrun

mlrun.set_env_from_file(MLRUN_ENV_FILE)

project = mlrun.get_or_create_project(
    "predictive-maintenance",
    parameters={
        "source": "s3://mlrun/predictive-maintenance.zip",
        "default_image": "felipenv/mlrun-predictive:1.9",
    },
)

# project.set_secrets(
#     secrets={
#         "PG_HOST": "optimus-postgres.mlrun.svc.cluster.local",
#         "PG_USER": "optimus",
#         "PG_PASSWORD": "optimus",
#         "PG_DB": "optimus",
#     }
# )

> 2025-09-02 11:21:23,577 [info] Created and saved project: {"context":"./","from_template":null,"name":"predictive-maintenance","overwrite":false,"save":true}
Project Source: s3://mlrun/predictive-maintenance.zip
Exporting project as zip archive to s3://mlrun/predictive-maintenance.zip...
> 2025-09-02 11:21:26,602 [info] Project created successfully: {"project_name":"predictive-maintenance","stored_in_db":true}


## 0. Log Datasets + Configs

In [14]:
from datasetsforecast.phm2008 import PHM2008

In [15]:
Y_train_df, Y_test_df = PHM2008.load(directory='./data', group='FD001', clip_rul=False)


In [16]:
Y_train_df = project.log_dataset(
    key="Y_train_df", df=Y_train_df, format="csv"
)

Y_test_df = project.log_dataset(
    key="Y_test_df", df=Y_test_df, format="csv"
)

## 1. Preprocessing Function

In [17]:
preprocessing_fn = project.get_function("preprocessing")

# preprocessing_fn.spec.image_pull_policy = "Always"

In [18]:
preprocessing_run = project.run_function(
    function=preprocessing_fn,
    inputs={
        "Y_train_df": Y_train_df.uri,
        "Y_test_df": Y_test_df.uri,
    },
    # returns=[
    #     {"key": "post_enforce_schema", "file_format": "csv"},
    #     {"key": "resampled_data", "file_format": "csv"},
    #     ],
    local=False,
)

> 2025-09-02 11:21:34,115 [info] Storing function: {"db":"http://mlrun-api:8080","name":"preprocessing-input-data","uid":"fa4eb8808b8c4a779a1575b46ec1a191"}
> 2025-09-02 11:21:34,420 [info] Job is running in the background, pod: preprocessing-input-data-ddt58
> 2025-09-02 11:21:42,281 [info] extracting source from s3://mlrun/predictive-maintenance.zip to /app/code
> 2025-09-02 11:21:44,039 [info] Found 14 sensor columns: ['s_2', 's_3', 's_4', 's_7', 's_8', 's_9', 's_11', 's_12', 's_13', 's_14', 's_15', 's_17', 's_20', 's_21']
> 2025-09-02 11:21:44,658 [info] Globally normalized 14 sensor columns
> 2025-09-02 11:21:45,040 [info] To track results use the CLI: {"info_cmd":"mlrun get run fa4eb8808b8c4a779a1575b46ec1a191 -p predictive-maintenance","logs_cmd":"mlrun logs fa4eb8808b8c4a779a1575b46ec1a191 -p predictive-maintenance"}
> 2025-09-02 11:21:45,041 [info] Run execution finished: {"name":"preprocessing-input-data","status":"completed"}


project,uid,iter,start,end,state,kind,name,labels,inputs,parameters,results,artifacts
predictive-maintenance,...6ec1a191,0,Sep 02 11:21:42,2025-09-02 11:21:45.029270+00:00,completed,run,preprocessing-input-data,kind=jobowner=jovyanmlrun/client_version=1.9.2mlrun/client_python_version=3.11.13host=preprocessing-input-data-ddt58,Y_train_dfY_test_df,,"return=( Unnamed: 0 unique_id ds s_2 ... s_17 s_20 s_21 y\n0 0 1 1 -1.721725 ... -0.781710 1.348493 1.194427 191\n1 1 1 2 -1.061780 ... -0.781710 1.016528 1.236922 190\n2 2 1 3 -0.661813 ... -2.073094 0.739891 0.503423 189\n3 3 1 4 -0.661813 ... -0.781710 0.352598 0.777792 188\n4 4 1 5 -0.621816 ... -0.136018 0.463253 1.059552 187\n... ... ... ... ... ... ... ... ... ...\n20626 20626 100 196 1.618000 ... 2.446751 -1.805173 -2.921113 4\n20627 20627 100 197 1.717992 ... 1.155367 -2.856395 -1.203764 3\n20628 20628 100 198 1.478011 ... 3.092444 -2.081810 -3.292481 2\n20629 20629 100 199 1.098043 ... 1.155367 -2.911722 -2.085072 1\n20630 20630 100 200 2.337940 ... 1.801059 -2.469103 -2.194080 0\n\n[20631 rows x 18 columns], unique_id ds s_2 s_3 ... s_17 s_20 s_21 y\n0 1 193 0.678077 -0.853550 ... -0.781710 0.241943 0.774097 142\n1 1 194 -1.941707 -0.338137 ... -0.136018 1.127183 0.941305 141\n2 1 195 -0.441831 -0.584426 ... -0.136018 1.459148 1.172256 140\n3 1 196 -0.481827 -1.044384 ... -1.427402 1.016528 0.775945 139\n4 1 197 -0.341839 -0.543650 ... -2.073094 0.961200 1.138999 138\n... ... ... ... ... ... ... ... ... ...\n13091 100 394 1.118041 1.456023 ... 0.509675 -0.919933 -0.852719 24\n13092 100 395 1.078044 0.842747 ... 1.155367 -1.362553 -0.116449 23\n13093 100 396 1.518008 0.428459 ... 1.155367 -1.085916 -0.781586 22\n13094 100 397 1.158038 0.728573 ... 1.155367 -0.864606 -0.182962 21\n13095 100 398 0.538089 1.809962 ... 1.801059 -0.643296 -0.962652 20\n\n[13096 rows x 17 columns])",full_data_normalizedY_test_normalizedY_train_normalized





> 2025-09-02 11:21:50,159 [info] Run execution finished: {"name":"preprocessing-input-data","status":"completed"}


## Feature engineering

In [19]:
feature_fn = project.get_function("feature")

In [20]:
feature_run = project.run_function(
    function=feature_fn,
    inputs={
        "Y_full_data_normalized": preprocessing_run.outputs["full_data_normalized"],
    },
    local=False,
)

> 2025-09-02 11:21:50,536 [info] Storing function: {"db":"http://mlrun-api:8080","name":"feature-feat-creation","uid":"a9cfa611bd794b279c547c0220a16dce"}
> 2025-09-02 11:21:50,828 [info] Job is running in the background, pod: feature-feat-creation-h75j5
> 2025-09-02 11:21:57,963 [info] extracting source from s3://mlrun/predictive-maintenance.zip to /app/code
> 2025-09-02 11:21:59,551 [info] Creating smoothed feature for column: s_21
> 2025-09-02 11:21:59,551 [info] Grouping by: unique_id
> 2025-09-02 11:21:59,551 [info] Using window_length: 15, polyorder: 3
> 2025-09-02 11:21:59,593 [info] Created new feature: s_21_smoothed
> 2025-09-02 11:22:00,020 [info] To track results use the CLI: {"info_cmd":"mlrun get run a9cfa611bd794b279c547c0220a16dce -p predictive-maintenance","logs_cmd":"mlrun logs a9cfa611bd794b279c547c0220a16dce -p predictive-maintenance"}
> 2025-09-02 11:22:00,020 [info] Run execution finished: {"name":"feature-feat-creation","status":"completed"}


project,uid,iter,start,end,state,kind,name,labels,inputs,parameters,results,artifacts
predictive-maintenance,...20a16dce,0,Sep 02 11:21:57,2025-09-02 11:22:00.002994+00:00,completed,run,feature-feat-creation,kind=jobowner=jovyanmlrun/client_version=1.9.2mlrun/client_python_version=3.11.13host=feature-feat-creation-h75j5,Y_full_data_normalized,,,Y_full_data_features





> 2025-09-02 11:22:05,222 [info] Run execution finished: {"name":"feature-feat-creation","status":"completed"}


## Train Model

In [21]:
train_fn = project.get_function("train")

In [22]:
train_run = project.run_function(
    function=train_fn,
    inputs={
        "Y_full_data_features": feature_run.outputs["Y_full_data_features"],
    },
    local=False,
)

> 2025-09-02 11:22:05,400 [info] Storing function: {"db":"http://mlrun-api:8080","name":"train-train-model","uid":"a7ccccae1c8142109b5f92a00a5af3f6"}
> 2025-09-02 11:22:05,674 [info] Job is running in the background, pod: train-train-model-9vpkz
> 2025-09-02 11:22:12,363 [info] extracting source from s3://mlrun/predictive-maintenance.zip to /app/code
pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
Global seed set to 1
> 2025-09-02 11:22:19,145 [info] Starting model training with 33727 samples
> 2025-09-02 11:22:19,145 [info] Target column: y
> 2025-09-02 11:22:19,145 [info] Unique ID column: unique_id
> 2025-09-02 11:22:19,145 [info] Time series column: ds
> 2025-09-02 11:22:19,145 [info] Using 15 exogenous variables: ['s_2', 's_3', 's_4', 's_7', 's_8', 's_9', 's_11', 's_12', 's_13', 's_14', 's_15', 's_17', 

project,uid,iter,start,end,state,kind,name,labels,inputs,parameters,results,artifacts
predictive-maintenance,...0a5af3f6,0,Sep 02 11:22:12,2025-09-02 11:22:23.452061+00:00,completed,run,train-train-model,kind=jobowner=jovyanmlrun/client_version=1.9.2mlrun/client_python_version=3.11.13host=train-train-model-9vpkz,Y_full_data_features,,return=trained_nbeatsx_model,model_summarymodel





> 2025-09-02 11:22:27,584 [info] Run execution finished: {"name":"train-train-model","status":"completed"}


## Predict

In [23]:
prediction_fn = project.get_function("prediction")

In [24]:
prediction_run = project.run_function(
    function=prediction_fn,
    inputs={
        "Y_full": feature_run.outputs["Y_full_data_features"],
        "Y_test_normalized": preprocessing_run.outputs["Y_test_normalized"],
        "model": train_run.outputs['model'],
    },
    local=False,
)

> 2025-09-02 11:22:27,829 [info] Storing function: {"db":"http://mlrun-api:8080","name":"prediction-predict","uid":"d8aaddc6dfa84f49bcf181e55cd2cd70"}
> 2025-09-02 11:22:28,148 [info] Job is running in the background, pod: prediction-predict-8gxjt
> 2025-09-02 11:22:34,545 [info] extracting source from s3://mlrun/predictive-maintenance.zip to /app/code
pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
> 2025-09-02 11:22:40,763 [info] Starting prediction process...
> 2025-09-02 11:22:40,764 [info] Full data shape: (33727, 18)
> 2025-09-02 11:22:40,764 [info] Test data shape: (13096, 17)
> 2025-09-02 11:22:40,764 [info] Loading trained model...
> 2025-09-02 11:22:40,818 [info] Starting cross-validation with 10 windows...
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: Fa

project,uid,iter,start,end,state,kind,name,labels,inputs,parameters,results,artifacts
predictive-maintenance,...5cd2cd70,0,Sep 02 11:22:34,2025-09-02 11:22:43.644160+00:00,completed,run,prediction-predict,kind=jobowner=jovyanmlrun/client_version=1.9.2mlrun/client_python_version=3.11.13host=prediction-predict-8gxjt,Y_fullY_test_normalizedmodel,,,predictionsprediction_metrics





> 2025-09-02 11:22:49,732 [info] Run execution finished: {"name":"prediction-predict","status":"completed"}
