In [1]:
import os 
import sys
import pickle

In [2]:
%pwd

'c:\\Pred_maintainance-Project\\Notebook'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Pred_maintainance-Project'

In [5]:
from src.exception import CustomException
from src.logger import logging
import pandas as pd
import numpy as np

In [6]:
from dataclasses import dataclass

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [8]:
from sklearn.model_selection import GridSearchCV

In [9]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [11]:
def evaluate_models(X_train, y_train,X_test,y_test,models,param):
    try:
        report = {}

        for i in range(len(list(models))):
            model = list(models.values())[i]
            para=param[list(models.keys())[i]]

            gs = GridSearchCV(model,para,cv=3)
            gs.fit(X_train,y_train) 

            model.set_params(**gs.best_params_)
            model.fit(X_train,y_train)

            #model.fit(X_train, y_train)  # Train model

            y_train_pred = model.predict(X_train)

            y_test_pred = model.predict(X_test)

            train_model_score = r2_score(y_train, y_train_pred)

            test_model_score = r2_score(y_test, y_test_pred)

            report[list(models.keys())[i]] = test_model_score

        return report

    except Exception as e:
        raise CustomException(e, sys)

In [12]:
def evaluate_model(X_train, y_train, X_test, y_test, models,param):
    try:
        report = {} #Creating Dict report
        logging.info("Creating model report")
        for i in range(len(models)):
            model = list(models.values())[i] #Listed all models
            logging.info("Model Listd in Dictionary")
            para=param[list(models.keys())[i]]
            
            
            
            gs = GridSearchCV(model,para,cv=3)
            gs.fit(X_train,y_train)

            model.set_params(**gs.best_params_)
            model.fit(X_train,y_train)

            logging.info(f"{model} trained")
            
            #Predicting value
            y_test_pred = model.predict(X_test)
            logging.info(f"{model} predicted")
            
            #getting accuracy score
            test_model_score = r2_score(y_test, y_test_pred)
            logging.info(f"{model} accuracy score generated")
            
            report[list(models.keys())[i]] = test_model_score
            logging.info("Report generated")
            
        return report

    except Exception as e:
        logging.info("Exception as model training step")
        raise CustomException(e, sys)

In [13]:
def save_object(file_path, obj):
    
    try:
        dir_path = os.path.dirname(file_path)
        
        os.makedirs(dir_path, exist_ok = True)
        
        with open(file_path, "wb") as file_obj:
            pickle.dump(obj, file_obj)
                   
    except Exception as e:
        raise CustomException(e, sys)

In [14]:
@dataclass
class ModelTrainerConfig:
    trained_model_file_path = os.path.join("artifacts", "model.pkl")

In [15]:
class ModelTrainer:
    def __init__(self):
        self.model_trainer_config=ModelTrainerConfig()
        
    def initiate_model_training(self, train_array, test_array):
        try:
            #Separating Train & test array
            X_train, y_train, X_test, y_test = (
                train_array[:,:-1],
                train_array[:, -1],
                test_array[:, :-1],
                test_array[:, -1]
            )
            
            models = {
                
                "LinearRegression" : LinearRegression(),
                "SVR" : SVR(),
                "RandomForest" : RandomForestRegressor(),
                "KNN" : KNeighborsRegressor(),
                "DecisionTree" : DecisionTreeRegressor(),
                "GradientBoosting" : GradientBoostingRegressor()
            }
            
            params = {
                
                "LinearRegression" : {},
                
                "SVR" : {
                    # 'epsilon': [0.1, 0.2],
                    # 'kernel': ['linear', 'poly'],
                },
                
                "RandomForest" :{
                    
                    # 'criterion':['squared_error', 'absolute_error'],                 
                    # 'max_features':['sqrt','log2'],
                },
                
                "KNN" : {
                    
                    # 'n_neighbors' : [5, 7],
                    # 'weights' : ['uniform', 'distance'],
                },
                
                "DecisionTree" : {
                    # 'criterion' : ['absolute_error', 'poisson'],
                    # 'max_features':['sqrt','log2']
                },
                
                "GradientBoosting" : {
                    
                    # 'loss':['squared_error', 'absolute_error'],
                    # 'criterion':['squared_error', 'friedman_mse'],
                }
                
                
            }
            
            model_report:dict=evaluate_model(X_train, y_train, X_test, y_test, models,params)
            print(model_report) 
            print("\n**********")
            logging.info(f"Model Report : {model_report}")
            
            best_model_score = max(sorted(model_report.values()))
            logging.info("Model score sorted")
            
            #finding best model name
            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]
            logging.info("Best model name has been found")
            
            best_model = models[best_model_name]
            print(f"Best Model is {best_model_name} with accuracy : {best_model_score}")
            print("\n*****")
            logging.info(f"Best Model is {best_model_name} with accuracy : {best_model_score}")
            
            save_object(   
                file_path = self.model_trainer_config.trained_model_file_path, #storing file path
                obj = best_model
            )
            logging.info("Best model saved as pkl file")
            

        except Exception as e:
            raise CustomException(e, sys)

In [16]:
import os


In [17]:
#from src.components.data_ingestion import DataIngestion
#from src.components.data_transformation import DataTransformation

from src.predictive_maintenance.components.data_ingestion import  DataIngestion

In [18]:
from src.predictive_maintenance.components.data_transformation import DataTransformation

In [19]:
if __name__ == "__main__":
    
    data_ingestion = DataIngestion()
    train_data, test_data = data_ingestion.initiate_data_ingestion()
    
    data_transformation = DataTransformation()
    train_array, test_array = data_transformation.initiate_data_transformation(train_data, test_data)
    
    model_trainer = ModelTrainer()
    model_trainer.initiate_model_training(train_array, test_array)

--- Logging error ---
Traceback (most recent call last):
  File "c:\Pred_maintainance-Project\venv\lib\logging\__init__.py", line 1079, in emit
    msg = self.format(record)
  File "c:\Pred_maintainance-Project\venv\lib\logging\__init__.py", line 923, in format
    return fmt.format(record)
  File "c:\Pred_maintainance-Project\venv\lib\logging\__init__.py", line 659, in format
    record.message = record.getMessage()
  File "c:\Pred_maintainance-Project\venv\lib\logging\__init__.py", line 363, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "c:\Pred_maintainance-Project\venv\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\Pred_maintainance-Project\venv\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "c:\Pred_maintainance-Project\venv\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File

{'LinearRegression': -0.3603099860921559, 'SVR': -0.34739561314811795, 'RandomForest': -0.40301097782407536, 'KNN': -0.3924267773231862, 'DecisionTree': -0.43966333584606043, 'GradientBoosting': -0.3892188007858073}

**********
Best Model is SVR with accuracy : -0.34739561314811795

*****
