* project 目錄為 `End-to-End-Machine-Learning-Pipeline`，以下路徑均以此為 根目錄

* `src/mlProject/constants/__init__.py` 下定義了

	```python
	CONFIG_FILE_PATH
	PARAMS_FILE_PATH
	SCHEMA_FILE_PATH
	```

* `src/mlProject/constants/__init__.py` 要先完成 (針對 5. Update the configuration manager )

# 自行實作

In [None]:
!git clone https://github.com/henrykohl/MLOps-Foundation.git

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install() # expect a kernel restart

In [None]:
!conda create -n mlproj python=3.8 -y

In [None]:
%cd MLOps-Foundation/End-to-End-Machine-Learning-Pipeline/

In [None]:
%pwd

In [None]:
!source activate mlproj; pip install -r requirements.txt

* colab 操作時，依序
  - 將 `01_data_ingestion.ipynb` 中，自行實作的執行部分複製過來，執行一次。
  - 將 `02_data_validation.ipynb` 中，自行實作的執行部分複製過來，執行一次。
	- 將 `03_data_transformation.ipynb` 中，自行實作的執行部分複製過來，執行一次。
	- 將 `04_model_trainer.ipynb` 中，自行實作的執行部分複製過來，執行一次。

# Lecture Demo

In [1]:
import os
os.chdir("../")
%pwd

'd:\\Bappy\\Live Sessions\\Euron\\MLOPs Masters Batch\\End-to-End-Machine-Learning-Pipeline'

## 4. Update the entity
* project 對應 `src/mlProject/entity/config_entity.py`

In [None]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path          ## 定義在 config.yaml (model_evaluation)
    test_data_path: Path    ## 定義在 config.yaml (model_evaluation)
    model_path: Path        ## 定義在 config.yaml (model_evaluation)
    all_params: dict        ## 定義在 params.yaml 
    metric_file_name: Path  ## 定義在 config.yaml (model_evaluation)
    target_column: str      ## 定義在 schema.yaml

## 5. Update the configuration manager 
* project 對應 `src/mlProject/config/configuration.py`用到
	- '4. entity'： `src/mlProject/entity/config_entity.py` -- 輸出 ModelEvaluationConfig

In [3]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories, save_json

In [None]:
class ConfigurationManager:
    def __init__(
        self,
				config_filepath = CONFIG_FILE_PATH,   ## 輸出: PosixPath("config/config.yaml")
        params_filepath = PARAMS_FILE_PATH,   ## 輸出: PosixPath("params.yaml")
        schema_filepath = SCHEMA_FILE_PATH):  ## 輸出: PosixPath("schema.yaml")

        self.config = read_yaml(config_filepath) ## 輸出: ConfigBox({...}); config.artifacts_root 是 str
        self.params = read_yaml(params_filepath) ## 輸出: ConfigBox({...})
        self.schema = read_yaml(schema_filepath) ## 輸出: ConfigBox({...})

        create_directories([self.config.artifacts_root]) ## 建立目錄 artifacts

    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation   ## 輸出: ConfigBox({...});
        params = self.params.ElasticNet         ## 輸出: ConfigBox({...}); 
        schema =  self.schema.TARGET_COLUMN     ## 輸出: ConfigBox({...});

        create_directories([config.root_dir])   ## 建目錄 artifacts/model_evaluation

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,                       ## artifacts/model_evaluation
            test_data_path=config.test_data_path,           ## artifacts/data_transformation/test.csv 
            model_path = config.model_path,                 ## artifacts/model_trainer/model.joblib
            all_params=params,                              ## params 是 ConfigBox 類型
            metric_file_name = config.metric_file_name,     ## artifacts/model_evaluation/metrics.json
            target_column = schema.name                     ## quality
           
        )

        return model_evaluation_config


## 6. Update the components
* project 對應 `src/mlProject/components/model_evaluation.py` 用到
	- '4. entity'： `src/mlProject/entity/config_entity.py` -- 輸入 ModelEvaluationConfig

In [5]:
import os
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from urllib.parse import urlparse
import numpy as np
import joblib

In [None]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    
    def eval_metrics(self,actual, pred):                 # actual 類型: Series, pred 類型: Series
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2
    


    def save_results(self):

        test_data = pd.read_csv(self.config.test_data_path) ## 從 artifacts/data_transformation/test.csv 
        model = joblib.load(self.config.model_path)         ## 從 artifacts/model_trainer/model.joblib

        test_x = test_data.drop([self.config.target_column], axis=1)   ## test_data 捨棄 quality 欄 (沒用到)
        test_y = test_data[[self.config.target_column]]                ## test_data 取得 quality 欄 (沒用到)
        
        predicted_qualities = model.predict(test_x)                      ## 輸出 Series

        (rmse, mae, r2) = self.eval_metrics(test_y, predicted_qualities) ## 輸入 (Series, Series)
        
        # Saving metrics as local
        scores = {"rmse": rmse, "mae": mae, "r2": r2}                    ## 輸入 (float, float, float)
        save_json(path=Path(self.config.metric_file_name), data=scores)  ## 到 artifacts/model_evaluation/metrics.json





## 7. Update the pipeline 

In [None]:
try:
    config = ConfigurationManager()                                  ## 例化configuration，建立主目錄
    model_evaluation_config = config.get_model_evaluation_config()   ## 執行configuration，建立次目錄，例化entity
    model_evaluation_config = ModelEvaluation(config=model_evaluation_config) ## 例化 component
    model_evaluation_config.save_results()                           ## 執行 component，
except Exception as e:
    raise e

[2025-01-12 13:09:29,149: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-01-12 13:09:29,150: INFO: common: yaml file: params.yaml loaded successfully]
[2025-01-12 13:09:29,151: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-01-12 13:09:29,152: INFO: common: created directory at: artifacts]
[2025-01-12 13:09:29,153: INFO: common: created directory at: artifacts/model_evaluation]
[2025-01-12 13:09:29,189: INFO: common: json file saved at: artifacts\model_evaluation\metrics.json]
