In [1]:
import os

In [2]:
%pwd

'd:\\iNeuron_Projects\\machine_learning_01\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\iNeuron_Projects\\machine_learning_01'

### Make data class: 
- initials entity

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    all_params: dict
    metric_file_name: Path
    target_column: str

###  update the configuration manager in src config

In [6]:
from MachineLearning_2023.constants import *
from MachineLearning_2023.utils.common import read_yaml, create_directories, save_json

In [7]:
class ConfigurationManager:           
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        params = self.params.ElasticNet
        schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path = config.model_path,
            all_params=params,
            metric_file_name = config.metric_file_name,
            target_column = schema.name
           
        )

        return model_evaluation_config

### Import libraries 

In [8]:
import os
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_squared_log_error, median_absolute_error
from urllib.parse import urlparse
import numpy as np
import joblib

### update the components


In [9]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    def eval_metrics(self, actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))  # means squared error 
        mae = mean_absolute_error(actual, pred)           # means absolute error
        r2 = r2_score(actual, pred)                       # means squared error
        msle = mean_squared_log_error(actual, pred)       # means_squared_log_error
        medae = median_absolute_error(actual, pred)       # means median_absolute_error
        return rmse, mae, r2, msle, medae
    


    def save_results(self):
        test_data = pd.read_csv(self.config.test_data_path)
        model = joblib.load(self.config.model_path)

        test_x = test_data.drop([self.config.target_column], axis=1)
        test_y = test_data[[self.config.target_column]]

        predicted_qualities = model.predict(test_x)

        (rmse, mae, r2, msle, medae) = self.eval_metrics(test_y, predicted_qualities)

        # Saving metrics as local
        scores = {
            "rmse": rmse,
            "mae": mae,
            "r2": r2,
            "msle": msle,
            "medae": medae
        }
        save_json(path=Path(self.config.metric_file_name), data=scores)    # This will save the metrics in the local file

In [10]:
# Import necessary libraries
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load your data
DATA_PATH = 'data/winequality-red.csv'
wines = pd.read_csv(DATA_PATH)

# Assuming 'quality' is your target variable
X = wines.drop('quality', axis=1)
y = wines['quality']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define functions to evaluate models (evaluate_logistic_regression and evaluate_extra_trees_classifier functions)

def evaluate_logistic_regression(X_train, y_train, X_test, y_test):
    model = LogisticRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    confusion_mat = confusion_matrix(y_test, y_pred)
    
    return accuracy, confusion_mat

def evaluate_extra_trees_classifier(X_train, y_train, X_test, y_test):
    classifier = ExtraTreesClassifier()
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    confusion_mat = confusion_matrix(y_test, y_pred)
    
    return accuracy, confusion_mat


### Update the pipeline

In [12]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.save_results()
    
    # Evaluate Logistic Regression
    logistic_accuracy, logistic_confusion_mat = evaluate_logistic_regression(X_train, y_train, X_test, y_test)
    print("Logistic Regression Accuracy:", logistic_accuracy)
    print("Logistic Regression Confusion Matrix:\n", logistic_confusion_mat)

    # Evaluate Extra Trees Classifier
    et_accuracy, et_confusion_mat = evaluate_extra_trees_classifier(X_train, y_train, X_test, y_test)
    print("Extra Trees Classifier Accuracy:", et_accuracy)
    print("Extra Trees Classifier Confusion Matrix:\n", et_confusion_mat)

except Exception as e:
    raise e

[2024-02-08 20:20:55,592: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-08 20:20:55,602: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-08 20:20:55,613: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-02-08 20:20:55,618: INFO: common: created directory at: artifacts]
[2024-02-08 20:20:55,620: INFO: common: created directory at: artifacts/model_evaluation]
[2024-02-08 20:20:55,664: INFO: common: json file saved at: artifacts\model_evaluation\metrics.json]
Logistic Regression Accuracy: 0.55
Logistic Regression Confusion Matrix:
 [[ 0  0  1  0  0  0]
 [ 0  0  9  1  0  0]
 [ 0  0 94 36  0  0]
 [ 0  0 49 81  2  0]
 [ 0  0  4 37  1  0]
 [ 0  0  0  5  0  0]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Extra Trees Classifier Accuracy: 0.69375
Extra Trees Classifier Confusion Matrix:
 [[ 0  0  1  0  0  0]
 [ 0  0  8  2  0  0]
 [ 0  0 99 31  0  0]
 [ 0  0 28 99  5  0]
 [ 0  0  0 17 24  1]
 [ 0  0  0  1  4  0]]
