In [1]:
import pandas as pd 
import numpy as np
from pathlib import Path
from dataclasses import dataclass
import os

In [2]:
os.chdir("../")
os.listdir()

['.git',
 '.github',
 '.gitignore',
 'artifacts',
 'configs',
 'dvc.yaml',
 'env',
 'init_setup.sh',
 'LICENSE',
 'logs',
 'params.yaml',
 'pyproject.toml',
 'README.md',
 'requirements.txt',
 'requirements_dev.txt',
 'research',
 'setup.cfg',
 'setup.py',
 'src',
 'template.py',
 'tests',
 'tox.ini']

In [3]:
import sys
from saleStorePredictor import logging
from typing import List
from saleStorePredictor.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact
from saleStorePredictor.entity.config_entity import ModelTrainerConfig
from saleStorePredictor.utils import load_numpy_array_data,save_bin,load_bin
from saleStorePredictor.entity.model_factory import MetricInfoArtifact, ModelFactory,GridSearchedBestModel
from saleStorePredictor.entity.model_factory import evaluate_regression_model



class SaleStorePredictorModel:
    def __init__(self, preprocessing_object, trained_model_object):
        """
        TrainedModel constructor
        preprocessing_object: preprocessing_object
        trained_model_object: trained_model_object
        """
        self.preprocessing_object = preprocessing_object
        self.trained_model_object = trained_model_object

    def predict(self, X):
        """
        function accepts raw inputs and then transformed raw input using preprocessing_object
        which gurantees that the inputs are in the same format as the training data
        At last it perform prediction on transformed features
        """
        transformed_feature = self.preprocessing_object.transform(X)
        return self.trained_model_object.predict(transformed_feature)

    def __repr__(self):
        return f"{type(self.trained_model_object).__name__}()"

    def __str__(self):
        return f"{type(self.trained_model_object).__name__}()"




class ModelTrainer:

    def __init__(self, model_trainer_config:ModelTrainerConfig, data_transformation_artifact: DataTransformationArtifact):
        try:
            logging.info(f"{'>>' * 30}Model trainer log started.{'<<' * 30} ")
            self.model_trainer_config = model_trainer_config
            self.data_transformation_artifact = data_transformation_artifact
        except Exception as e:
            raise e

    def initiate_model_trainer(self)->ModelTrainerArtifact:
        try:
            logging.info(f"Loading transformed training dataset")
            transformed_train_file_path = (self.data_transformation_artifact.transformed_train_file_path)
            train_array = load_numpy_array_data(file_path=transformed_train_file_path)

            logging.info(f"Loading transformed testing dataset")
            transformed_test_file_path = (self.data_transformation_artifact.transformed_test_file_path)
            test_array = load_numpy_array_data(file_path=transformed_test_file_path)

            logging.info(f"Splitting training and testing input and target feature")
            x_train,y_train,x_test,y_test = train_array[:,:-1],train_array[:,-1],test_array[:,:-1],test_array[:,-1]
            
            logging.info(f"Extracting model config file path")
            model_config_file_path = self.model_trainer_config.model_config_file_path

            logging.info(f"Initializing model factory class using above model config file: {model_config_file_path}")
            model_factory = ModelFactory(model_config_path=model_config_file_path)
            
            
            base_accuracy = self.model_trainer_config.base_accuracy
            logging.info(f"Expected accuracy: {base_accuracy}")

            logging.info(f"Initiating operation model selecttion")
            best_model = model_factory.get_best_model(X=x_train,y=y_train,base_accuracy=base_accuracy)
            
            logging.info(f"Best model found on training dataset: {best_model}")
            
            logging.info(f"Extracting trained model list.")
            grid_searched_best_model_list:List[GridSearchedBestModel]=model_factory.grid_searched_best_model_list
            
            model_list = [model.best_model for model in grid_searched_best_model_list ]
            logging.info(f"Evaluation all trained model on training and testing dataset both")
            metric_info:MetricInfoArtifact = evaluate_regression_model(model_list=model_list,X_train=x_train,y_train=y_train,X_test=x_test,y_test=y_test,base_accuracy=base_accuracy)

            logging.info(f"Best found model on both training and testing dataset.")
            
            preprocessing_obj=  load_bin(file_path=self.data_transformation_artifact.preprocessed_object_file_path)
            model_object = metric_info.model_object


            trained_model_file_path=self.model_trainer_config.trained_model_file_path
            housing_model = SaleStorePredictorModel(preprocessing_object=preprocessing_obj,trained_model_object=model_object)
            logging.info(f"Saving model at path: {trained_model_file_path}")
            save_bin(file_path=trained_model_file_path,obj=housing_model)


            model_trainer_artifact=  ModelTrainerArtifact(is_trained=True,message="Model Trained successfully",
            trained_model_file_path=trained_model_file_path,
            train_rmse=metric_info.train_rmse,
            test_rmse=metric_info.test_rmse,
            train_accuracy=metric_info.train_accuracy,
            test_accuracy=metric_info.test_accuracy,
            model_accuracy=metric_info.model_accuracy
            
            )

            logging.info(f"Model Trainer Artifact: {model_trainer_artifact}")
            return model_trainer_artifact
        except Exception as e:
            raise  e

    def __del__(self):
        logging.info(f"{'>>' * 30}Model trainer log completed.{'<<' * 30} ")

In [4]:
from saleStorePredictor.config import ConfigurationManager
config = ConfigurationManager()
model_trainer_config = config.get_data_model_trainer_config()
transformation_artifact = config.get_data_transformation_artifact()

model_traner = ModelTrainer(model_trainer_config, transformation_artifact)
model_traner.initiate_model_trainer()

[2022-09-24 21:38:12,280: INFO: common]: yaml file: configs\config.yaml loaded successfully
[2022-09-24 21:38:12,282: INFO: common]: yaml file: params.yaml loaded successfully
[2022-09-24 21:38:12,283: INFO: common]: created directory at: artifacts
[2022-09-24 21:38:12,284: INFO: 2506256659]: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Model trainer log started.<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
[2022-09-24 21:38:12,285: INFO: 2506256659]: Loading transformed training dataset
[2022-09-24 21:38:12,294: INFO: 2506256659]: Loading transformed testing dataset
[2022-09-24 21:38:12,297: INFO: 2506256659]: Splitting training and testing input and target feature
[2022-09-24 21:38:12,298: INFO: 2506256659]: Extracting model config file path
[2022-09-24 21:38:12,299: INFO: 2506256659]: Initializing model factory class using above model config file: configs\model.yaml
[2022-09-24 21:38:12,303: INFO: model_factory]: ModelFactory.read_params: {'grid_search

ValueError: 
All the 10 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\linear_model\_base.py", line 684, in fit
    X, y = self._validate_data(
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\base.py", line 596, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\utils\validation.py", line 1074, in check_X_y
    X = check_array(
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\utils\validation.py", line 856, in check_array
    array = np.asarray(array, order=order, dtype=dtype)
ValueError: could not convert string to float: 'DRO47'

--------------------------------------------------------------------------------
8 fits failed with the following error:
Traceback (most recent call last):
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\linear_model\_base.py", line 684, in fit
    X, y = self._validate_data(
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\base.py", line 596, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\utils\validation.py", line 1074, in check_X_y
    X = check_array(
  File "d:\FSDS\DS internship\stores_sales_prediction\env\lib\site-packages\sklearn\utils\validation.py", line 856, in check_array
    array = np.asarray(array, order=order, dtype=dtype)
ValueError: could not convert string to float: 'NCM55'
