In [1]:
import os

In [2]:
os.chdir("../../")

In [3]:
%pwd

'/Users/bhikipallai/Desktop/Projects/95Mobiles'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class CarModelConfig:
    root_dir: Path
    X_train: Path
    X_test: Path
    y_train:Path
    y_test: Path
    model_save: Path
    model_for_train: Path
    fit_intercept: list[bool]
    n_estimators: list [int]
    criterion: list[str]
    bootstrap: list[bool]
    oob_score: list[bool]
    splitter: list[str]


In [5]:
from src.car.constants import *
from src.car.utils.common import read_yaml,create_directory

[2024-10-01 12:53:03,636: INFO]: Logging file start


In [6]:
class ConfigurationManger:
    def __init__(self,config_filepath = CONFIG_FILE_PATH,params_filepath = PARAM_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directory([self.config.data_root])
    
    def get_model_trained(self) -> CarModelConfig:
        config = self.config.Model_Building
        params = self.params.grid

        create_directory([config.root_dir])

        model_train = CarModelConfig(
            root_dir=config.root_dir,
            X_train=config.X_train,
            X_test=config.X_test,
            y_train=config.y_train,
            y_test=config.y_test,
            model_save = config.model_save,
            model_for_train = config.model_for_train,
            fit_intercept = params.fit_intercept,
            n_estimators = params.n_estimators,
            criterion = params.criterion,
            bootstrap = params.bootstrap,
            oob_score = params.oob_score,
            splitter = params.splitter
        )

        return model_train

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
import pandas as pd
import joblib
import mlflow
import dagshub

In [8]:
class ModelTrain:
    def __init__(self,config:CarModelConfig):
        self.config = config

    def get_model_train(self):
        models = {
            "lr":LinearRegression(),
            "rfr":RandomForestRegressor(),
            "dtr":DecisionTreeRegressor()
        }

        grid_params = {
            "lr":{
                "fit_intercept":self.config.fit_intercept
            },
            "rfr":{
                "n_estimators":self.config.n_estimators,
                "criterion": self.config.criterion,
                "bootstrap" : self.config.bootstrap,
                "oob_score" : self.config.oob_score

            },
            "dtr":{
                "criterion": self.config.criterion,
                "splitter":self.config.splitter
            }
        }

        X_train = pd.read_csv(self.config.X_train)
        y_train = pd.read_csv(self.config.y_train)

        dagshub.init(repo_owner='Vicky7873', repo_name='95Mobiles', mlflow=True)
        mlflow.set_registry_uri("https://dagshub.com/Vicky7873/95Mobiles.mlflow")
        mlflow.set_experiment("Car Model Training")

        compare_score = -float("inf")
        with mlflow.start_run():
            for model_name,model in models.items():
                gdr_train = GridSearchCV(model,param_grid=grid_params[model_name],cv=5)
                gdr_train.fit(X_train,y_train)

                print("Best parameters: ", gdr_train.best_params_)
                print("best score: ", gdr_train.best_score_)
                print("best estimator: ", gdr_train.best_estimator_)

                mlflow.log_metric(f"{model_name}_best_score", gdr_train.best_score_)
                mlflow.log_params({f"{model_name}_best_params": gdr_train.best_params_})

                if gdr_train.best_score_ > compare_score:
                    compare_score = gdr_train.best_score_
                    self.best_model = gdr_train.best_estimator_
                    print("Best Model Type:", self.best_model)
    
    def save_model(self):
        model = self.best_model
        print(type(model).__name__)
        joblib.dump(model,self.config.model_save)
        joblib.dump(model,self.config.model_for_train)
        print(f"Model: {model} was saved to its path")

In [9]:
try:
    config = ConfigurationManger()
    model_config = config.get_model_trained()
    model_train = ModelTrain(config=model_config)
    model_train.get_model_train()
    model_train.save_model()
except Exception as e:
    raise e

[2024-10-01 12:53:04,957: INFO]: yaml: config/car_config.yaml loaded successfully
[2024-10-01 12:53:04,959: INFO]: yaml: params/car_params.yaml loaded successfully
[2024-10-01 12:53:04,959: INFO]: created directory at: data
[2024-10-01 12:53:04,959: INFO]: created directory at: data/car/Model_training
[2024-10-01 12:53:05,309: INFO]: HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"


[2024-10-01 12:53:05,317: INFO]: Accessing as Vicky7873
[2024-10-01 12:53:05,720: INFO]: HTTP Request: GET https://dagshub.com/api/v1/repos/Vicky7873/95Mobiles "HTTP/1.1 200 OK"


[2024-10-01 12:53:05,723: INFO]: Initialized MLflow to track repo "Vicky7873/95Mobiles"


[2024-10-01 12:53:05,725: INFO]: Repository Vicky7873/95Mobiles initialized!
Best parameters:  {'fit_intercept': True}
best score:  0.426135007556604
best estimator:  LinearRegression()
Best Model Type: LinearRegression()


  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *

Best parameters:  {'bootstrap': True, 'criterion': 'squared_error', 'n_estimators': 17, 'oob_score': False}
best score:  0.7019375695974093
best estimator:  RandomForestRegressor(n_estimators=17)
Best Model Type: RandomForestRegressor(n_estimators=17)
Best parameters:  {'criterion': 'absolute_error', 'splitter': 'best'}
best score:  0.5347455831589343
best estimator:  DecisionTreeRegressor(criterion='absolute_error')


2024/10/01 12:54:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run magnificent-auk-841 at: https://dagshub.com/Vicky7873/95Mobiles.mlflow/#/experiments/3/runs/2761ab89dcc6450aa60721f302588e3b.
2024/10/01 12:54:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Vicky7873/95Mobiles.mlflow/#/experiments/3.


RandomForestRegressor
Model: RandomForestRegressor(n_estimators=17) was saved to its path
