In [1]:
import os
from dotenv import load_dotenv


os.chdir("../")

In [2]:
load_dotenv()

MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI")
MLFLOW_TRACKING_USERNAME = os.getenv("MLFLOW_TRACKING_USERNAME")
MLFLOW_TRACKING_PASSWORD = os.getenv("MLFLOW_TRACKING_PASSWORD")

In [3]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    model_path: Path
    data_path: Path
    params: dict
    mlflow_uri: str
    image_size: list
    batch_size: int

In [4]:
from cnn_classifier.constants import *
from cnn_classifier.utils.common import read_yaml, create_directories, save_json


class ConfigurationManager:
    def __init__(
        self,
        config_file_path: Path = CONFIG_FILE_PATH,
        params_file_path: Path = PARAMS_FILE_PATH,
    ):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        cfg = self.config.model_evaluation
        params = self.params.params

        model_path = self.config.model_trainer.trained_model_file_path
        data_path = [
            f.path
            for f in os.scandir(self.config.data_ingestion.unzip_dir)
            if f.is_dir()
        ][0]

        create_directories([cfg.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=cfg.root_dir,
            model_path=model_path,
            data_path=data_path,
            params=self.params,
            mlflow_uri=MLFLOW_TRACKING_URI,
            image_size=params.IMAGE_SIZE,
            batch_size=params.BATCH_SIZE,
        )

        return model_evaluation_config

In [5]:
import mlflow
import mlflow.keras
import tensorflow as tf
from urllib.parse import urlparse


class ModelEvaluation:

    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    def _val_generator(self):
        data_generator_kwargs = dict(rescale=1 / 255, validation_split=0.30)
        data_flow_kwargs = dict(
            target_size=self.config.image_size[:-1],
            batch_size=self.config.batch_size,
            interpolation="bilinear",
        )

        val_data_generator = tf.keras.preprocessing.image.ImageDataGenerator(
            **data_generator_kwargs
        )

        self.val_generator = val_data_generator.flow_from_directory(
            directory=self.config.data_path,
            subset="validation",
            shuffle=False,
            **data_flow_kwargs,
        )

    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        return tf.keras.models.load_model(path)

    def evaluation(self):
        self.model = self.load_model(self.config.model_path)
        self._val_generator()
        self.score = self.model.evaluate(self.val_generator)

    def save_score(self, path: Path, scores: dict):
        save_json(path, data=scores)

    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():
            scores = {"loss": self.score[0], "accuracy": self.score[1]}
            mlflow.log_params(self.config.params)
            mlflow.log_metrics(scores)
            
            # Get run id
            run = mlflow.active_run()
            run_id = run.info.run_id
            # Save scores locally as well
            path = os.path.join(self.config.root_dir, f"scores_{run_id}.json")
            self.save_score(Path(path), scores)
            
            # Model registry does not work with file store
            if tracking_url_type_store != "file":
                # Register the model
                # There are other ways to use the Model Registry, which depends on the use case,
                # please refer to the doc for more information:
                # https://mlflow.org/docs/latest/model-registry.html#api-workflow
                mlflow.keras.log_model(
                    self.model, "model", registered_model_name="VGG16Model"
                )
            else:
                mlflow.keras.log_model(self.model, "model")

In [6]:
from cnn_classifier import logger


class ModelEvaluationPipeline:

    def run_pipeline(self):
        try:
            logger.info("Model evaluation started")
            configuration_manager = ConfigurationManager()
            model_evaluation_config = (
                configuration_manager.get_model_evaluation_config()
            )
            model_evaluation = ModelEvaluation(config=model_evaluation_config)
            model_evaluation.evaluation()
            model_evaluation.log_into_mlflow()
            logger.info("Model evaluation ended")

        except Exception as e:
            logger.error(f"Model evaluation failed: {e}")
            raise e

In [7]:
model_evaluation_pipeline = ModelEvaluationPipeline()
model_evaluation_pipeline.run_pipeline()

[ 2024-02-28 11:44:27,379 ] 8 309999184 cnn_classifier -  INFO - Model evaluation started
[ 2024-02-28 11:44:27,384 ] 34 common cnn_classifier -  INFO - Loaded YAML file successfully from: config/config.yaml
[ 2024-02-28 11:44:27,386 ] 34 common cnn_classifier -  INFO - Loaded YAML file successfully from: params.yaml
[ 2024-02-28 11:44:27,387 ] 55 common cnn_classifier -  INFO - Created directory at: artifacts
[ 2024-02-28 11:44:27,387 ] 55 common cnn_classifier -  INFO - Created directory at: artifacts/model_evaluation
Found 102 images belonging to 2 classes.
[ 2024-02-28 11:44:43,407 ] 70 common cnn_classifier -  INFO - JSON file saved at: artifacts/model_evaluation/scores_b0eb26a22d964f3eb1e5e42b8933edd0.json




[ 2024-02-28 11:44:44,134 ] 801 builder_impl tensorflow -  INFO - Assets written to: /var/folders/bx/p7m1k3qn7f1cynghy4m89byw0000gn/T/tmp7gv4tcsg/model/data/model/assets


Registered model 'VGG16Model' already exists. Creating a new version of this model...
2024/02/28 11:45:07 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16Model, version 2
Created version '2' of model 'VGG16Model'.


[ 2024-02-28 11:45:08,045 ] 16 309999184 cnn_classifier -  INFO - Model evaluation ended
