In [1]:
import os

In [2]:
%pwd

'/home/adhitizki/playground/pacmann/mlops_credit_card/mlops-credit-card/notebooks'

In [3]:
# Change to the main directory
# So, it's executed from main directory
os.chdir("../")

In [4]:
# with open('.env') as f:
#     os.environ.update(
#         line.strip().split('=') for line in f
# )

In [5]:
%pwd

'/home/adhitizki/playground/pacmann/mlops_credit_card/mlops-credit-card'

### Predict Config

This code will be apply in `src/LadazaIDReview/entity/config_entity.py`.

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class PredictionConfig:
    root_dir: Path
    mlflow_tracking_uri: str
    mlflow_model_name: str
    mlflow_deploy_model_alias: Path
    mlflow_scaler_model_path: Path
    
    # for development (debug)
    input_valid_path: Path
    output_valid_path: Path

### Predict Config Manager

This code will be apply in `src/MLProject/config/configurations.py`.

In [7]:
from MLProject.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from MLProject.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_prediction_config(self) -> PredictionConfig:
        """read training evaluation config file and store as 
        config entity then apply the dataclasses
        
        Returns:
            config: PredictionConfig type
        """
        predict_config = self.config.predict
        
        # for development (debug)
        dump_data_config = self.config.dump_data

        create_directories([predict_config.root_dir])

        config = PredictionConfig(
            root_dir=predict_config.root_dir,
            mlflow_tracking_uri=os.environ["MLFLOW_TRACKING_URI"],
            mlflow_model_name=predict_config.mlflow_model_name,
            mlflow_deploy_model_alias=os.environ["MLFLOW_DEPLOY_MODEL_ALIAS"],
            mlflow_scaler_model_path=predict_config.mlflow_scaler_model_path,
            
            # for development (debug)
            input_valid_path=dump_data_config.input_valid_path,
            output_valid_path=dump_data_config.output_valid_path
        )

        return config

In [9]:
from mlflow.artifacts import download_artifacts
from mlflow import MlflowClient
from mlflow import pyfunc

import joblib

---

**Debug**: Explain when doing prediction in the notebook with MLflow.

In [10]:
config = ConfigurationManager()
predict_config = config.get_prediction_config()

[2024-07-22 23:16:50,680: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-07-22 23:16:50,686: INFO: common: yaml file: metrics/params.yaml loaded successfully]
[2024-07-22 23:16:50,689: INFO: common: created directory at: artifacts]
[2024-07-22 23:16:50,691: INFO: common: created directory at: artifacts/predict]


Select the deployed model from MLflow.

In [11]:
client = MlflowClient(tracking_uri=predict_config.mlflow_tracking_uri)
selected_model = client.get_model_version_by_alias(
    predict_config.mlflow_model_name, 
    predict_config.mlflow_deploy_model_alias
)

selected_model.source

'mlflow-artifacts:/1/d0ed298a35ee4ca3acd3cc76defdbf20/artifacts/models'

In [12]:
loaded_model = pyfunc.load_model(model_uri=selected_model.source)
loaded_model

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

mlflow.pyfunc.loaded_model:
  artifact_path: models
  flavor: mlflow.sklearn
  run_id: d0ed298a35ee4ca3acd3cc76defdbf20

Get the model `run_id`.

In [13]:
selected_run_id = selected_model.run_id
selected_run_id

'd0ed298a35ee4ca3acd3cc76defdbf20'

Download vectorizer (one of the MLflow artifact) from MLflow.

In [14]:
download_artifacts(
    run_id=selected_run_id,
    artifact_path=predict_config.mlflow_scaler_model_path,
    dst_path=predict_config.root_dir
)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/home/adhitizki/playground/pacmann/mlops_credit_card/mlops-credit-card/artifacts/predict/scaler/scaler.pkl'

Load the downloaded vectorizer.

In [15]:
root_dir = predict_config.root_dir
mlflow_scaler_model_path = predict_config.mlflow_scaler_model_path
scaler_model_path = f"{root_dir}/{mlflow_scaler_model_path}"
scaler = joblib.load(scaler_model_path)
scaler

Predict and evaluate the data test as input.

In [16]:
X_valid = joblib.load(predict_config.input_valid_path)
y_valid = joblib.load(predict_config.output_valid_path)

In [17]:
X_valid.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
52335,0.026959,-0.126982,1.618991,-0.441741,0.300002,0.127898,0.716459,-0.154763,0.032103,0.485643,...,-0.204385,-0.04827,0.303284,-0.133856,1.117723,-0.541355,-1.410231,-0.060327,0.256329,21851.03
39501,0.142067,-0.565915,0.495495,-2.204124,1.565482,2.393946,0.556355,-0.111966,-0.004773,1.158851,...,0.226744,-0.239844,-0.611218,0.136396,1.412551,-1.185462,-1.528993,-0.809289,-1.373998,11450.21
13640,-0.058849,-1.478973,-0.35257,0.267406,-0.385502,0.440511,1.146811,-0.17712,0.202304,0.164229,...,1.827847,0.246913,0.070856,0.276167,0.120955,0.18071,-1.208093,-0.412153,0.07931,8966.13
29892,-0.688415,-0.341557,-0.106558,0.621967,0.30515,-0.306676,-0.686923,-0.013694,-0.355037,-0.431664,...,-0.192917,0.211144,0.068983,-0.816323,-0.37716,-0.838943,-0.673803,0.730304,-0.898668,1690.6
15317,-0.056187,0.337458,-0.374867,0.792098,2.436512,-1.11816,1.305309,-0.236867,-0.660626,-0.129328,...,-0.236873,-0.15106,-0.307905,-0.288629,-0.631406,1.622052,0.939539,-0.360283,0.344625,520.0


In [18]:
X_valid.shape

(8530, 29)

The request body, for the preparation of the http input request body.

Vectorize the data test as input.

In [21]:
import pandas as pd

X_valid_scaled = scaler.transform(X_valid)
X_valid_scaled = pd.DataFrame(X_valid_scaled, columns=X_valid.columns)
X_valid_scaled

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,0.020553,-0.129301,1.622804,-0.439500,0.298113,0.130156,0.772397,-0.158384,0.030275,0.487501,...,-0.214454,-0.043760,0.307043,-0.136438,1.115266,-0.544553,-1.409808,-0.059934,0.266003,1.417051
1,0.135963,-0.579896,0.494869,-2.210092,1.575276,2.426775,0.599597,-0.114667,-0.006793,1.167780,...,0.233632,-0.242117,-0.626751,0.143079,1.409530,-1.192386,-1.529034,-0.821011,-1.425621,-0.082478
2,-0.065480,-1.517216,-0.356547,0.272951,-0.393719,0.446987,1.236874,-0.181222,0.201362,0.162710,...,1.897710,0.261876,0.069711,0.287642,0.120405,0.181690,-1.206880,-0.417451,0.082328,-0.440617
3,-0.696697,-0.349577,-0.109563,0.629163,0.303309,-0.310280,-0.742269,-0.014281,-0.358882,-0.439442,...,-0.202535,0.224840,0.067799,-0.842302,-0.376757,-0.843862,-0.670502,0.743486,-0.932420,-1.489560
4,-0.062811,0.347481,-0.378932,0.800087,2.454347,-1.132711,1.407941,-0.242254,-0.666063,-0.133931,...,-0.248220,-0.150189,-0.317039,-0.296518,-0.630516,1.631371,0.949145,-0.364742,0.357619,-1.658330
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8525,-0.290591,-0.136918,0.961237,-0.348507,0.648767,1.170936,0.789691,-0.275294,1.227750,1.332420,...,-0.408032,-0.262666,-0.827707,0.116553,-0.040842,-1.483498,-3.339004,-1.762353,0.015028,0.040356
8526,0.961035,-0.384725,0.668023,-0.188264,0.096557,0.006574,0.486883,-0.164420,0.425674,0.697179,...,-0.379336,-0.065387,0.121889,-0.106932,1.099647,0.906153,-0.852997,-0.239360,-0.075052,-1.616976
8527,-0.531269,-1.140320,0.316623,-0.535626,0.083928,0.424143,0.612504,-0.182671,0.076004,0.616164,...,-1.785607,-0.326947,-0.556260,-0.773452,-0.678318,-0.144476,-1.172008,-0.202200,-0.268051,-1.203331
8528,0.677616,-0.455874,0.196999,-0.328725,0.648574,0.721269,0.434870,-0.218825,0.999664,0.435809,...,-0.394042,-0.006148,0.119545,-0.204982,-0.638003,-0.146175,-0.576446,-0.226210,-0.473516,-1.474582


Make prediction.

In [22]:
y_predict = loaded_model.predict(X_valid_scaled)



In [23]:
len(y_predict)

8530

In [24]:
y_predict[:10]

array([0, 0, 1, 1, 1, 0, 0, 0, 1, 1])

In [25]:
from sklearn.metrics import classification_report
print(classification_report(y_valid, y_predict))

              precision    recall  f1-score   support

           0       0.95      0.97      0.96      4273
           1       0.97      0.95      0.96      4257

    accuracy                           0.96      8530
   macro avg       0.96      0.96      0.96      8530
weighted avg       0.96      0.96      0.96      8530



---

### Make Prediction

This code in `src/LazadaIDReview/components/predict.py`.

In [26]:
from MLProject import logger

class Predict:
    def __init__(self, config: PredictionConfig):
        self.config = config

    def run(self, data: list) -> list:
        """predict the data with linear regression model
        
        Args:
            data (pd.Dataframe): input data to predict

        Raises:
            client_error: error when access mlflow to get deployed model
            download_error: error when download scaler from mlflow artifact
            load_error: scaler error
        
        Returns:
            y_predict: list type
        """
        try:
            logger.info("Set MLflow Client.")
            client = MlflowClient(tracking_uri=self.config.mlflow_tracking_uri)
            
            logger.info("Select the deployed model from MLflow.")
            selected_model = client.get_model_version_by_alias(
                self.config.mlflow_model_name, 
                self.config.mlflow_deploy_model_alias
            )
            
            logger.info("Get the deployed model run id.")
            selected_run_id = selected_model.run_id
        except Exception as client_error:
            logger.error(client_error)
            raise client_error
        
        root_dir = self.config.root_dir
        mlflow_scaler_model_path = self.config.mlflow_scaler_model_path
        scaler_model_path = Path(f"{root_dir}/{mlflow_scaler_model_path}")
        
        try:
            logger.info("Downloading scaler from MLflow's artifacts.")
            download_artifacts(
                run_id=selected_run_id,
                artifact_path=self.config.mlflow_scaler_model_path,
                dst_path=self.config.root_dir
            )
        except Exception as download_error:
            logger.error(download_error)
            raise download_error
        
        try:
            logger.info("Load the scaler model.")
            scaler = joblib.load(scaler_model_path)
            
            logger.info("Transform the data.")
            X_test_scaled = scaler.transform(data)
            X_test_scaled = pd.DataFrame(X_test_scaled, columns=data.columns)

        except Exception as load_error:
            logger.error(load_error)
            raise load_error
        
        logger.info("Predict the data.")
        loaded_model = pyfunc.load_model(model_uri=selected_model.source)
        y_predict = loaded_model.predict(X_test_scaled).tolist()
        
        return y_predict

### Predict the Data

**Debug**: test the predict object and it's method.

In [27]:
config = ConfigurationManager()
predict_config = config.get_prediction_config()

[2024-07-22 23:16:52,748: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-07-22 23:16:52,752: INFO: common: yaml file: metrics/params.yaml loaded successfully]
[2024-07-22 23:16:52,755: INFO: common: created directory at: artifacts]
[2024-07-22 23:16:52,757: INFO: common: created directory at: artifacts/predict]


In [28]:
data = joblib.load(predict_config.input_valid_path)

This code in `app.py`.

In [29]:
try:
    config = ConfigurationManager()
    predict_config = config.get_prediction_config()
    predict = Predict(config=predict_config)
    result = predict.run(data)
except Exception as e:
    logger.error(e)
    raise e

[2024-07-22 23:16:52,912: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-07-22 23:16:52,916: INFO: common: yaml file: metrics/params.yaml loaded successfully]
[2024-07-22 23:16:52,919: INFO: common: created directory at: artifacts]
[2024-07-22 23:16:52,921: INFO: common: created directory at: artifacts/predict]
[2024-07-22 23:16:52,923: INFO: 713703144: Set MLflow Client.]
[2024-07-22 23:16:52,927: INFO: 713703144: Select the deployed model from MLflow.]
[2024-07-22 23:16:52,951: INFO: 713703144: Get the deployed model run id.]
[2024-07-22 23:16:52,954: INFO: 713703144: Downloading scaler from MLflow's artifacts.]


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

[2024-07-22 23:16:53,055: INFO: 713703144: Load the scaler model.]
[2024-07-22 23:16:53,059: INFO: 713703144: Transform the data.]
[2024-07-22 23:16:53,066: INFO: 713703144: Predict the data.]


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]



In [30]:
len(result)

8530

In [31]:
result[:10]

[0, 0, 1, 1, 1, 0, 0, 0, 1, 1]