In [1]:
import os

In [2]:
%pwd

'/home/adhitizki/playground/pacmann/mlops_credit_card/mlops-credit-card/notebooks'

In [3]:
# Change to the main directory
# So, it's executed from main directory
os.chdir("../")

In [4]:
# with open('.env') as f:
#     os.environ.update(
#         line.strip().split('=') for line in f
# )

In [5]:
%pwd

'/home/adhitizki/playground/pacmann/mlops_credit_card/mlops-credit-card'

### Unit Testing Config

This code will be apply in `src/LadazaIDReview/entity/config_entity.py`.

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class UnitTestConfig:
    root_dir: Path
    mlflow_tracking_uri: str
    mlflow_model_name: str
    mlflow_deploy_model_alias: str
    mlflow_input_example_path: Path
    app_endpoint: str
    

### Unit Testing Config Manager

This code will be apply in `src/MLProject/config/configurations.py`.

In [7]:
from MLProject.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from MLProject.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_unit_test_config(self) -> UnitTestConfig:
        """read training evaluation config file and store as 
        config entity then apply the dataclasses
        
        Returns:
            config: UnitTestConfig type
        """
        predict_config = self.config.predict
        unit_test_config = self.config.unit_test

        create_directories([unit_test_config.root_dir])

        config = UnitTestConfig(
            root_dir=unit_test_config.root_dir,
            mlflow_tracking_uri=os.environ["MLFLOW_TRACKING_URI"],
            mlflow_model_name=predict_config.mlflow_model_name,
            mlflow_deploy_model_alias=os.environ["MLFLOW_DEPLOY_MODEL_ALIAS"],
            mlflow_input_example_path=unit_test_config.mlflow_input_example_path,
            app_endpoint=os.environ["APP_ENDPOINT"]
        )

        return config

In [9]:
from mlflow.artifacts import download_artifacts
from mlflow import MlflowClient
from mlflow import pyfunc

---

**Debug**: Explain when doing the preparation test in the notebook with MLflow like load input example and etc.

In [10]:
config = ConfigurationManager()
unit_test_config = config.get_unit_test_config()

[2024-07-25 22:19:54,184: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-07-25 22:19:54,188: INFO: common: yaml file: metrics/params.yaml loaded successfully]
[2024-07-25 22:19:54,190: INFO: common: created directory at: artifacts]
[2024-07-25 22:19:54,194: INFO: common: created directory at: artifacts/test]


Select the deployed model from MLflow.

In [11]:
client = MlflowClient(tracking_uri=unit_test_config.mlflow_tracking_uri)
selected_model = client.get_model_version_by_alias(
    unit_test_config.mlflow_model_name, 
    unit_test_config.mlflow_deploy_model_alias
)

selected_model.source

'mlflow-artifacts:/1/6ad944bf48144741852c8724502cf1bd/artifacts/models'

In [12]:
loaded_model = pyfunc.load_model(model_uri=selected_model.source)
loaded_model

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

mlflow.pyfunc.loaded_model:
  artifact_path: models
  flavor: mlflow.sklearn
  run_id: 6ad944bf48144741852c8724502cf1bd

Get the model `run_id`.

In [13]:
selected_run_id = selected_model.run_id
selected_run_id

'6ad944bf48144741852c8724502cf1bd'

In [14]:
download_artifacts(
    run_id=selected_run_id,
    artifact_path=unit_test_config.mlflow_input_example_path,
    dst_path=unit_test_config.root_dir
)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/home/adhitizki/playground/pacmann/mlops_credit_card/mlops-credit-card/artifacts/test/models/input_example.json'

In [15]:
import json

f = open(f"{unit_test_config.root_dir}/{unit_test_config.mlflow_input_example_path}")
input_example = json.load(f)
input_example

{'columns': ['V1',
  'V2',
  'V3',
  'V4',
  'V5',
  'V6',
  'V7',
  'V8',
  'V9',
  'V10',
  'V11',
  'V12',
  'V13',
  'V14',
  'V15',
  'V16',
  'V17',
  'V18',
  'V19',
  'V20',
  'V21',
  'V22',
  'V23',
  'V24',
  'V25',
  'V26',
  'V27',
  'V28',
  'Amount'],
 'data': [[-0.3762728912373728,
   0.721511213900676,
   -1.1664918189345304,
   1.208328018227423,
   -0.9415571315710908,
   -1.160776111977162,
   -1.3437470860433494,
   0.504017059573741,
   -0.9112977745290756,
   -1.752325630637097,
   2.2632745201338755,
   -1.877402024744184,
   0.3475890727488539,
   -1.7592174568663417,
   0.899166548336839,
   -2.0432209761729547,
   -2.024710262577306,
   -1.9387079993047176,
   2.052308816229901,
   1.22227277694122,
   0.6116023868234789,
   0.3834441097976565,
   0.391722779514344,
   -0.8507592098367073,
   -1.97148701338222,
   0.6735683641958345,
   2.803530452054044,
   1.7886324831048497,
   10910.77],
  [-2.0191851118412427,
   2.1379569329428443,
   -2.205429744481869

We use the input data from MLflow input examples and try to match with the MLflow input example format.

In [16]:
request_body = input_example

request_body

{'columns': ['V1',
  'V2',
  'V3',
  'V4',
  'V5',
  'V6',
  'V7',
  'V8',
  'V9',
  'V10',
  'V11',
  'V12',
  'V13',
  'V14',
  'V15',
  'V16',
  'V17',
  'V18',
  'V19',
  'V20',
  'V21',
  'V22',
  'V23',
  'V24',
  'V25',
  'V26',
  'V27',
  'V28',
  'Amount'],
 'data': [[-0.3762728912373728,
   0.721511213900676,
   -1.1664918189345304,
   1.208328018227423,
   -0.9415571315710908,
   -1.160776111977162,
   -1.3437470860433494,
   0.504017059573741,
   -0.9112977745290756,
   -1.752325630637097,
   2.2632745201338755,
   -1.877402024744184,
   0.3475890727488539,
   -1.7592174568663417,
   0.899166548336839,
   -2.0432209761729547,
   -2.024710262577306,
   -1.9387079993047176,
   2.052308816229901,
   1.22227277694122,
   0.6116023868234789,
   0.3834441097976565,
   0.391722779514344,
   -0.8507592098367073,
   -1.97148701338222,
   0.6735683641958345,
   2.803530452054044,
   1.7886324831048497,
   10910.77],
  [-2.0191851118412427,
   2.1379569329428443,
   -2.205429744481869

Test the `app.py` with http request with MLflow input data example.

In [17]:
import requests

result = requests.post(url=unit_test_config.app_endpoint, json=request_body)
y_predict = result.json()

In [18]:
len(y_predict)

10

In [19]:
y_predict

[1, 1, 1, 0, 1, 1, 1, 0, 1, 1]

In [20]:
type(y_predict)

list

---

### Unit Testing

This code in `src/LazadaIDReview/components/unit_testing.py`.

In [22]:
import json
import requests

from MLProject import logger

class UnitTesting:
    def __init__(self, config: UnitTestConfig):
        self.config = config
        self.req_body_key = None
        self.req_body = None
    
    def set_request_body(self) -> None:
        """predict the data with linear regression model

        Raises:
            client_error: error when access mlflow to get deployed model
            download_error: error when download vectorizer from mlflow artifact
        """
        try:
            logger.info("Set MLflow Client.")
            client = MlflowClient(tracking_uri=self.config.mlflow_tracking_uri)
            selected_model = client.get_model_version_by_alias(
                self.config.mlflow_model_name, 
                self.config.mlflow_deploy_model_alias
            )
            
            logger.info("Get the deployed model run id.")
            selected_run_id = selected_model.run_id
        except Exception as client_error:
            logger.error(client_error)
            raise client_error

        try:
            logger.info("Downloading scaler from MLflow's artifacts.")
            download_artifacts(
                run_id=selected_run_id,
                artifact_path=self.config.mlflow_input_example_path,
                dst_path=self.config.root_dir
            )
        except Exception as download_error:
            logger.error(download_error)
            raise download_error
        
        logger.info("Open MLflow input example.")
        f = open(f"{self.config.root_dir}/{self.config.mlflow_input_example_path}")
        input_example = json.load(f)
        self.req_body = input_example
        
    def get_request_body_value(self) -> list:
        """get the request body data

        Returns:
            req_body: list type
        """
        logger.info("Get MLflow input example value.")
        req_body_value = self.req_body["data"]
        return req_body_value
    
    def get_output_length(self):
        """get the output length of the predict result

        Returns:
            len_result: list type
        """
        logger.info("Get predicted result length.")
        result = requests.post(
            url=self.config.app_endpoint, 
            json=self.req_body
        )
        len_result = len(result.json())
        return len_result

    def is_output_type_list(self) -> bool:
        """check if the output file is list data type

        Returns:
            is_list: bool type
        """
        logger.info("Check is the predicted output is list.")
        result = requests.post(
            url=self.config.app_endpoint, 
            json=self.req_body
        )
        is_list = type(result.json()) is list
        return is_list

    def is_output_type_consistent(self) -> bool:
        """check if the output file have consistent
        data type inside a list

        Returns:
            bool type
        """
        logger.info("Check is each predicted output is integer")
        result = requests.post(
            url=self.config.app_endpoint, 
            json=self.req_body
        )
        for result in result.json():
            if type(result) is not int:
                return False
        return True

### Run Testing

**Debug**: Simulate the unit testing without library.

In [23]:
try:
    config = ConfigurationManager()
    unit_testing_config = config.get_unit_test_config()
    unit_test = UnitTesting(config=unit_testing_config)
    unit_test.set_request_body()
    
    print("Review Contents: ")
    for content in unit_test.get_request_body_value():
        print(content)
    
    print("\nBegin tests:")
    print(f"Is same size: {unit_test.get_output_length() == len(unit_test.get_request_body_value())}")
    print(f"Is the output is list: {unit_test.is_output_type_list() == True}")
    print(f"Is the output consistent: {unit_test.is_output_type_consistent() == True}")
except Exception as e:
    logger.error(e)
    raise e

[2024-07-25 22:25:11,137: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-07-25 22:25:11,142: INFO: common: yaml file: metrics/params.yaml loaded successfully]
[2024-07-25 22:25:11,144: INFO: common: created directory at: artifacts]
[2024-07-25 22:25:11,146: INFO: common: created directory at: artifacts/test]
[2024-07-25 22:25:11,148: INFO: 1142801574: Set MLflow Client.]
[2024-07-25 22:25:11,179: INFO: 1142801574: Get the deployed model run id.]
[2024-07-25 22:25:11,180: INFO: 1142801574: Downloading scaler from MLflow's artifacts.]


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

[2024-07-25 22:25:11,284: INFO: 1142801574: Open MLflow input example.]
Review Contents: 
[2024-07-25 22:25:11,286: INFO: 1142801574: Get MLflow input example value.]
[-0.3762728912373728, 0.721511213900676, -1.1664918189345304, 1.208328018227423, -0.9415571315710908, -1.160776111977162, -1.3437470860433494, 0.504017059573741, -0.9112977745290756, -1.752325630637097, 2.2632745201338755, -1.877402024744184, 0.3475890727488539, -1.7592174568663417, 0.899166548336839, -2.0432209761729547, -2.024710262577306, -1.9387079993047176, 2.052308816229901, 1.22227277694122, 0.6116023868234789, 0.3834441097976565, 0.391722779514344, -0.8507592098367073, -1.97148701338222, 0.6735683641958345, 2.803530452054044, 1.7886324831048497, 10910.77]
[-2.0191851118412427, 2.1379569329428443, -2.2054297444818696, 2.3349248387093784, -1.8837497918403807, -1.224022160228724, -2.2485010385262822, 0.5189492126128068, -1.895738371318734, -1.8643662480727563, 2.046498402362263, -1.6596802352069366, 1.109122991538764