In [1]:
import os

In [2]:
%pwd

'c:\\Users\\frup00090410\\Mlops_project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\frup00090410\\Mlops_project'

In [5]:
from dotenv import load_dotenv


load_dotenv()  # take environment variables from .env.

# Now you can access the variables using os.getenv.
MLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI')
MLFLOW_TRACKING_USERNAME = os.getenv('MLFLOW_TRACKING_USERNAME')
MLFLOW_TRACKING_PASSWORD = os.getenv('MLFLOW_TRACKING_PASSWORD')

In [17]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelEvaluationConfig:
    path_model: Path
    preprocessed_spilitted_data_path: Path
    model_params: dict
    mlflow_uri: int

In [7]:
from Classifier.constants import *
from Classifier.utils.common import read_yaml, create_directories, write_to_pickle

In [22]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_training

        model_evaluation_config = ModelEvaluationConfig(
            path_model=Path(config.model_path),
            preprocessed_spilitted_data_path=Path(config.preprocessed_spilitted_data_path),
            mlflow_uri=MLFLOW_TRACKING_URI,
            model_params=dict(
                batch_size=self.params.BATCH_SIZE,
                epochs=self.params.EPOCHS,
                max_words=self.params.MAX_WORDS,
                validation_split=self.params.VALIDATION_SPLIT,
                learning_rate=self.params.LEARNING_RATE,
                beta_1=self.params.BETA_1,
                beta_2=self.params.BETA_2
            ),
        )
        return model_evaluation_config

In [57]:
import tensorflow as tf
from pathlib import Path
import mlflow
import mlflow.keras
import tensorflow
from tensorflow.keras.models import load_model
from urllib.parse import urlparse
import pickle
import json

In [64]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig): 
        self.config = config


    def read_pickle_files(self):
        data = {}
        for filename in os.listdir(self.config.preprocessed_spilitted_data_path):
            if filename.endswith('.pickle'):
                with open(os.path.join(self.config.preprocessed_spilitted_data_path, filename), 'rb') as f:
                    data[filename] = pickle.load(f)
        return data
    

    def evaluate_model(self, data: dict):
        """

        Args:
            model (tf.keras.Model): _description_
            data (dict): _description_
        """
        # Access test data
        x_test = data['X_test_preprocessed.pickle']
        y_test = data['y_test_preprocessed.pickle']
        # Load model
        model = load_model(os.path.join(self.config.path_model, 'model.h5'))
        # Store model:
        self.model = model
        # Evaluate model
        evaluation = model.evaluate(x_test, y_test, batch_size=self.config.model_params['batch_size'], verbose=1)
        # add score
        self.score = evaluation
        # Create directory to store evaluation metrics
        os.makedirs("artifacts/model_evaluation", exist_ok=True)
        # Create a dictionary that contains the loss and metrics
        evaluation_dict = {'loss': evaluation[0], 'accuracy': evaluation[1]}
        # Save the dictionary as a JSON file
        with open(os.path.join('artifacts/model_evaluation', 'evaluation.json'), 'w') as f:
            json.dump(evaluation_dict, f)

    
    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        
        with mlflow.start_run():
            mlflow.log_params(self.config.model_params)
            mlflow.log_metrics(
                {"loss": self.score[0], "accuracy": self.score[1]}
            )
            # Model registry does not work with file store
            if tracking_url_type_store != "file":

                # Register the model
                # There are other ways to use the Model Registry, which depends on the use case,
                # please refer to the doc for more information:
                # https://mlflow.org/docs/latest/model-registry.html#api-workflow
                mlflow.keras.log_model(self.model, "model", registered_model_name="MLP")
            else:
                mlflow.keras.log_model(self.model, "model")
        






In [65]:
config = ConfigurationManager()
model_evaluation_config = config.get_model_evaluation_config()
model_evaluation = ModelEvaluation(config=model_evaluation_config)
model_evaluation.evaluate_model(model_evaluation.read_pickle_files())
model_evaluation.log_into_mlflow()

[2023-12-26 17:12:48,251: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-12-26 17:12:48,257: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-26 17:12:48,260: INFO: common: created directory at: artifacts]




[2023-12-26 17:12:52,838: INFO: builder_impl: Assets written to: C:\Users\FRUP00~1\AppData\Local\Temp\tmpprbhtleu\model\data\model\assets]


Successfully registered model 'MLP'.
2023/12/26 17:13:18 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: MLP, version 1
Created version '1' of model 'MLP'.
