In [1]:
import os

In [2]:
os.chdir("../../")

In [3]:
%pwd

'/Users/bhikipallai/Desktop/Projects/95Mobiles'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class mobile_Modeltrainconfig:
    root_dir: Path
    X_train: Path
    X_test: Path
    y_train:Path
    y_test: Path
    save_model: Path
    model_for_train: Path

In [5]:
from src.mobiles.constants import *
from src.mobiles.utils.common import read_yaml,create_directory

[2024-10-06 18:38:04,203: INFO]: Logging file start


In [6]:
class ConfigurationManger:
    def __init__(self,config_filepath = CONFIG_FILE_PATH,params_filepath = PARAMS_FILE_PATH) :
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directory([self.config.data_root])
    
    def get_model_train(self)->mobile_Modeltrainconfig:
        config = self.config.model_train
        create_directory([config.root_dir])
        model_train = mobile_Modeltrainconfig(
            root_dir=config.root_dir,
            X_train=config.X_train,
            X_test=config.X_test,
            y_train=config.y_train,
            y_test=config.y_test,
            save_model=config.save_model,
            model_for_train=config.model_for_train
        )
        return model_train

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
import pandas as pd
import joblib
import dagshub
import mlflow

In [8]:
models = {
    "lr":LinearRegression(),
    "rfr":RandomForestRegressor(),
    "dtr":DecisionTreeRegressor()
}

In [9]:
grid_param = {
    "lr":{
        "fit_intercept":[False,True]
    },
    "rfr":{
        "n_estimators":[15,17,19],
        "criterion": ["squared_error", "absolute_error", "friedman_mse", "poisson"],
        "bootstrap" : [True,False],
        "oob_score" : [True,False]

    },
    "dtr":{
        "criterion": ["squared_error", "absolute_error", "friedman_mse", "poisson"],
        "splitter":["best","random"]
    }
}

In [10]:
class Model_train:
    def __init__(self,config:mobile_Modeltrainconfig ):
        self.config = config
    
    def train_model(self):
        X_train = pd.read_csv(self.config.X_train)
        y_train = pd.read_csv(self.config.y_train)

        best_score = -float("inf")
        self.best_model = None
        best_model_name = None

        dagshub.init(repo_owner='Vicky7873', repo_name='95Mobiles', mlflow=True)
        mlflow.set_registry_uri("https://dagshub.com/Vicky7873/95Mobiles.mlflow")
        mlflow.set_experiment("Mobile model training")
        for model_name,model in models.items():
            models_gdr = GridSearchCV(model,param_grid=grid_param[model_name],cv=5)
            models_gdr.fit(X_train,y_train)
            print("Best parameters: ", models_gdr.best_params_)
            print("best score: ", models_gdr.best_score_)
            print("best estimator: ", models_gdr.best_estimator_)

            mlflow.log_metric(f"{model_name}_best_score",models_gdr.best_score_)
            mlflow.log_params({f"{model_name}_best_params": models_gdr.best_params_})

            if models_gdr.best_score_>best_score:
                best_score = models_gdr.best_score_
                self.best_model = models_gdr.best_estimator_
                best_model_name = model_name


        print("Best Model Type:", best_model_name)
        print("Best Model Score:", best_score)
        print("Best Model:", self.best_model)
    
    def save_model(self):
        joblib.dump(self.best_model, self.config.save_model)
        joblib.dump(self.best_model, self.config.model_for_train)

In [11]:
try:
    config = ConfigurationManger()
    model_con = config.get_model_train()
    model = Model_train(config=model_con)
    model.train_model()
    model.save_model()
except Exception as e:
    raise e

[2024-10-06 18:38:05,910: INFO]: yaml: config/mobiles_config.yaml loaded successfully
[2024-10-06 18:38:05,911: INFO]: yaml: params/mobiles_params.yaml loaded successfully
[2024-10-06 18:38:05,911: INFO]: created directory at: data
[2024-10-06 18:38:05,912: INFO]: created directory at: data/mobile/model_train
[2024-10-06 18:38:06,266: INFO]: HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"


[2024-10-06 18:38:06,274: INFO]: Accessing as Vicky7873
[2024-10-06 18:38:06,680: INFO]: HTTP Request: GET https://dagshub.com/api/v1/repos/Vicky7873/95Mobiles "HTTP/1.1 200 OK"


[2024-10-06 18:38:06,685: INFO]: Initialized MLflow to track repo "Vicky7873/95Mobiles"


[2024-10-06 18:38:06,687: INFO]: Repository Vicky7873/95Mobiles initialized!


2024/10/06 18:38:07 INFO mlflow.tracking.fluent: Experiment with name 'Mobile model training' does not exist. Creating a new experiment.


Best parameters:  {'fit_intercept': True}
best score:  0.745351598566333
best estimator:  LinearRegression()


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **

Best parameters:  {'bootstrap': True, 'criterion': 'squared_error', 'n_estimators': 15, 'oob_score': False}
best score:  0.8603308761191031
best estimator:  RandomForestRegressor(n_estimators=15)
Best parameters:  {'criterion': 'absolute_error', 'splitter': 'best'}
best score:  0.7852277624991968
best estimator:  DecisionTreeRegressor(criterion='absolute_error')
Best Model Type: rfr
Best Model Score: 0.8603308761191031
Best Model: RandomForestRegressor(n_estimators=15)
