In [1]:
import os

In [2]:
%pwd

'f:\\PROJECTS\\imdb 50k NLP\\Sentiment-analysis-mlflow\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'f:\\PROJECTS\\imdb 50k NLP\\Sentiment-analysis-mlflow'

In [5]:
os.environ['MLFLOW_TRACKING_URI']= 'https://dagshub.com/chrisaaryan/my-first-repo.mlflow'
os.environ['MLFLOW_TRACKING_USERNAME']= 'chrisaaryan'
os.environ['MLFLOW_TRACKING_PASSWORD']= '97f85b6ca1fc224edc2875d929cf45a081ca85b1'

In [6]:
import tensorflow as tf

In [7]:
from tensorflow.keras.models import Sequential

In [8]:
model= tf.keras.models.load_model("artifacts/model/model.h5")



In [9]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class EvaluationConfig:
    path_of_model: Path
    test_data: Path
    all_params: dict
    mlflow_uri: str

In [10]:
from sentimentAnalysis.constants import *
from sentimentAnalysis.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_evaluation_config(self) -> EvaluationConfig:
        eval_config = EvaluationConfig(
            path_of_model=Path(self.params['model_config']['save_model_path']),  # Path where the model is stored
            test_data=Path(self.config['data_preprocessing']['test_data_path']),  # Path to test_data.csv
            mlflow_uri=self.config['mlflow_config']['mlflow_uri'],  # MLflow URI
            all_params=self.params  # Other parameters like max sequence length, etc.
        )
        return eval_config

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [11]:
import mlflow
import mlflow.keras
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import tensorflow as tf
from pathlib import Path
from urllib.parse import urlparse
from sentimentAnalysis.utils.common import save_json

class Evaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config

    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        return tf.keras.models.load_model(path)
    
    def _load_test_data(self):
        # Load test data (assuming 'preprocessed_review' is already tokenized and padded)
        df = pd.read_csv(self.config.test_data)
        X_test = df['preprocessed_review'].values  # Use the preprocessed text
        Y_test = df['sentiment'].values  # Labels (either binary or categorical)
        return X_test, Y_test
    
    def _evaluate_model(self, X_test, Y_test):
        # Use the tokenizer saved during training if applicable
        tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=self.config.all_params['model_config']['input_dim'])
        # Assume tokenizer has been fit on training data
        X_test_seq = tokenizer.texts_to_sequences(X_test)  # Tokenize test data
        X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=self.config.all_params['model_config']['input_length'])

        # Perform predictions
        predictions = self.model.predict(X_test_pad)
        predictions = (predictions > 0.5).astype(int)  # Assuming binary classification

        # Calculate evaluation metrics
        accuracy = accuracy_score(Y_test, predictions)
        precision = precision_score(Y_test, predictions)
        recall = recall_score(Y_test, predictions)
        f1 = f1_score(Y_test, predictions)

        return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1_score": f1}

    def evaluation(self):
        self.model = self.load_model(self.config.path_of_model)
        X_test, Y_test = self._load_test_data()
        self.metrics = self._evaluate_model(X_test, Y_test)
        self.save_score()

    def save_score(self):
        # Save metrics as JSON
        save_json(path=Path("evaluation_scores.json"), data=self.metrics)

    def log_into_mlflow(self):
        # Set MLflow tracking URI
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        # Start logging with MLflow
        with mlflow.start_run():
            # Log parameters and metrics
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics(self.metrics)

            # Log the model to MLflow
            if tracking_url_type_store != "file":
                mlflow.keras.log_model(self.model, "model", registered_model_name="SentimentAnalysisModel2")
            else:
                mlflow.keras.log_model(self.model, "model")

In [12]:
from sentimentAnalysis import logger

try:
    logger.info("Starting model evaluation...")

    # Configuration Manager
    config = ConfigurationManager()
    eval_config = config.get_evaluation_config()

    # Model Evaluation
    evaluation = Evaluation(eval_config)
    evaluation.evaluation()  # Evaluate the model
    evaluation.log_into_mlflow()  # Log metrics and model to MLflow

    logger.info("Model evaluation and logging completed.")

except Exception as e:
    logger.exception(f"Error occurred during model evaluation: {e}")
    raise e



[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 28ms/step


Registered model 'SentimentAnalysisModel2' already exists. Creating a new version of this model...
2024/09/27 12:53:33 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: SentimentAnalysisModel2, version 2
Created version '2' of model 'SentimentAnalysisModel2'.
2024/09/27 12:53:34 INFO mlflow.tracking._tracking_service.client: 🏃 View run judicious-ray-483 at: https://dagshub.com/chrisaaryan/my-first-repo.mlflow/#/experiments/0/runs/bb7b3efcf54a4f82850cd9bf11a3862a.
2024/09/27 12:53:34 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/chrisaaryan/my-first-repo.mlflow/#/experiments/0.
