In [1]:
import os 
import sys
import pickle

In [2]:
%pwd

'e:\\ML_Projects\\Predictive_Maintenance\\notebooks'

In [3]:
os.chdir("../")

In [4]:
%pwd

'e:\\ML_Projects\\Predictive_Maintenance'

In [5]:
from src.exception import CustomException
from src.logger import logging
from dataclasses import dataclass

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [10]:
def evaluate_model(X_train, y_train, X_test, y_test, models,param):
    try:
        report = {} #Creating Dict report
        logging.info("Creating model report")
        for i in range(len(models)):
            model = list(models.values())[i] #Listed all models
            logging.info("Model Listd in Dictionary")
            para=param[list(models.keys())[i]]
            
            
            
            gs = GridSearchCV(model,para,cv=3)
            gs.fit(X_train,y_train)

            model.set_params(**gs.best_params_)
            model.fit(X_train,y_train)

            logging.info(f"{model} trained")
            
            #Predicting value
            y_test_pred = model.predict(X_test)
            logging.info(f"{model} predicted")
            
            #getting accuracy score
            test_model_score = r2_score(y_test, y_test_pred)
            logging.info(f"{model} accuracy score generated")
            
            report[list(models.keys())[i]] = test_model_score
            logging.info("Report generated")
            
        return report

    except Exception as e:
        logging.info("Exception as model training step")
        raise CustomException(e, sys)

In [11]:
def save_object(file_path, obj):
    
    try:
        dir_path = os.path.dirname(file_path)
        
        os.makedirs(dir_path, exist_ok = True)
        
        with open(file_path, "wb") as file_obj:
            pickle.dump(obj, file_obj)
                   
    except Exception as e:
        raise CustomException(e, sys)

In [12]:
@dataclass
class ModelTrainerConfig:
    trained_model_file_path = os.path.join("artifacts", "model.pkl")

In [13]:
class ModelTrainer:
    def __init__(self):
        self.model_trainer_config=ModelTrainerConfig()
        
    def initiate_model_training(self, train_array, test_array):
        try:
            #Separating Train & test array
            X_train, y_train, X_test, y_test = (
                train_array[:,:-1],
                train_array[:, -1],
                test_array[:, :-1],
                test_array[:, -1]
            )
            
            models = {
                
                "LinearRegression" : LinearRegression(),
                "SVR" : SVR(),
                "RandomForest" : RandomForestRegressor(),
                "KNN" : KNeighborsRegressor(),
                "DecisionTree" : DecisionTreeRegressor(),
                "GradientBoosting" : GradientBoostingRegressor()
            }
            
            params = {
                
                "LinearRegression" : {},
                
                "SVR" : {
                    # 'epsilon': [0.1, 0.2],
                    # 'kernel': ['linear', 'poly'],
                },
                
                "RandomForest" :{
                    
                    # 'criterion':['squared_error', 'absolute_error'],                 
                    # 'max_features':['sqrt','log2'],
                },
                
                "KNN" : {
                    
                    # 'n_neighbors' : [5, 7],
                    # 'weights' : ['uniform', 'distance'],
                },
                
                "DecisionTree" : {
                    # 'criterion' : ['absolute_error', 'poisson'],
                    # 'max_features':['sqrt','log2']
                },
                
                "GradientBoosting" : {
                    
                    # 'loss':['squared_error', 'absolute_error'],
                    # 'criterion':['squared_error', 'friedman_mse'],
                }
                
                
            }
            
            model_report:dict=evaluate_model(X_train, y_train, X_test, y_test, models,params)
            print(model_report) 
            print("\n**********")
            logging.info(f"Model Report : {model_report}")
            
            best_model_score = max(sorted(model_report.values()))
            logging.info("Model score sorted")
            
            #finding best model name
            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]
            logging.info("Best model name has been found")
            
            best_model = models[best_model_name]
            print(f"Best Model is {best_model_name} with accuracy : {best_model_score}")
            print("\n*****")
            logging.info(f"Best Model is {best_model_name} with accuracy : {best_model_score}")
            
            save_object(   
                file_path = self.model_trainer_config.trained_model_file_path, #storing file path
                obj = best_model
            )
            logging.info("Best model saved as pkl file")
            

        except Exception as e:
            raise CustomException(e, sys)

In [14]:
from src.components.data_ingestion import DataIngestion
from src.components.data_transformation import DataTransformation

In [15]:
if __name__ == "__main__":
    
    data_ingestion = DataIngestion()
    train_data, test_data = data_ingestion.initiate_data_ingestion()
    
    data_transformation = DataTransformation()
    train_array, test_array = data_transformation.initiate_data_transformation(train_data, test_data)
    
    model_trainer = ModelTrainer()
    model_trainer.initiate_model_training(train_array, test_array)

  df_train = pd.read_sql_query("select * from train1", mydb)
  df_test = pd.read_sql_query("select * from test1", mydb)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df["RUL"][train_df["RUL"] > 103] = 103
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df["RUL"][test_df["RUL"] > 103] = 103


{'LinearRegression': 0.4438818952015091, 'SVR': 0.6963551904019368, 'RandomForest': 0.6485769436566342, 'KNN': 0.6422849491219733, 'DecisionTree': 0.35693905674273196, 'GradientBoosting': 0.6983174749414809}

**********
Best Model is GradientBoosting with accuracy : 0.6983174749414809

*****
