In [1]:
import os

In [2]:
os.chdir('../../')

In [3]:
%pwd

'/Users/bhikipallai/Desktop/Projects/95Mobiles'

In [4]:
from pathlib import Path
from dataclasses import dataclass

@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    X_train: Path
    X_test: Path
    y_train: Path
    y_test: Path
    saved_model: Path
    model_for_train: Path

In [5]:
from src.laptop.utils.common import read_yaml,create_directory
from src.laptop.constants import *

[2024-10-05 20:52:06,993: INFO]: Logging file start


In [6]:
class ConfigurationManger:
    def __init__(self,config_filepath = CONFIG_FILE_PATH,param_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(param_filepath)
        create_directory([self.config.data_root])

    def get_moel_train(self)->ModelTrainingConfig:
        config = self.config.Model_train
        create_directory([config.root_dir])

        model_trainconfig = ModelTrainingConfig(
            root_dir=config.root_dir,
            X_train=config.X_train,
            X_test=config.X_test,
            y_train=config.y_train,
            y_test=config.y_test,
            saved_model=config.saved_model,
            model_for_train=config.model_for_train
        )

        return model_trainconfig

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import ElasticNet,Ridge,Lasso
import mlflow
import joblib
import pandas as pd
import dagshub
from sklearn.model_selection import GridSearchCV

In [8]:
models = {
    "lr":LinearRegression(),
    "rfr":RandomForestRegressor(),
    "dtr":DecisionTreeRegressor(),
    "enet":ElasticNet(),
    "ridge":Ridge(),
    "lasso":Lasso()
}

In [9]:
grid_params = {
    "lr":{
        "fit_intercept":[False,True]
    },
    "rfr":{
        "n_estimators":[15,17,19],
        "criterion": ["squared_error", "absolute_error", "friedman_mse", "poisson"],
        "bootstrap" : [True,False],
        "oob_score" : [True,False]

    },
    "dtr":{
        "criterion": ["squared_error", "absolute_error", "friedman_mse", "poisson"],
        "splitter":["best","random"]
    },
    "enet":{
        "alpha":[1.0,0.5,1.5,2.0],
        "l1_ratio" :[0.3,0.4,0.5],
        "selection": ["cyclic","random"]
    },
    "ridge":{
        "alpha":[1.0,0.5,1.5,2.0],
        "solver":['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
    },

    "lasso" : {
        "alpha":[1.0,0.5,1.5,2.0],
    }
}

In [10]:
class Laptop_modeltrain:
    def __init__(self,config:ModelTrainingConfig):
        self.config = config
    
    def model_train(self):
        X_train = pd.read_csv(self.config.X_train)
        X_test = pd.read_csv(self.config.X_test)
        y_train = pd.read_csv(self.config.y_train)
        y_test = pd.read_csv(self.config.y_test)

        dagshub.init(repo_owner='Vicky7873', repo_name='95Mobiles', mlflow=True)
        mlflow.set_registry_uri("https://dagshub.com/Vicky7873/95Mobiles.mlflow")
        mlflow.set_experiment("Laptop model training")

        compare_score = -float("inf")
        with mlflow.start_run():
            for model_name,model in models.items():
                train_gdr = GridSearchCV(model,param_grid=grid_params[model_name],cv=5)
                train_gdr.fit(X_train,y_train)
                print("Best parameters: ", train_gdr.best_params_)
                print("best score: ", train_gdr.best_score_)
                print("best estimator: ", train_gdr.best_estimator_)

                mlflow.log_metric(f"{model_name}_best_score",train_gdr.best_score_)
                mlflow.log_params({f"{model_name}_best_params": train_gdr.best_params_})

                if train_gdr.best_score_>compare_score:
                    compare_score = train_gdr.best_score_
                    self.best_model = train_gdr.best_estimator_
                    print("Best Model Type:", self.best_model)
    

    def model_save(self):
        model = self.best_model
        joblib.dump(model,self.config.saved_model)
        joblib.dump(model, self.config.model_for_train)
        print(f"Model: {model} was saved to its path")


In [11]:
try:
    config=ConfigurationManger()
    model_config = config.get_moel_train()
    lp_model = Laptop_modeltrain(config=model_config)
    lp_model.model_train()
    lp_model.model_save()
except Exception as e:
    raise e


[2024-10-05 20:52:08,561: INFO]: yaml: config/laptop_config.yaml loaded successfully
[2024-10-05 20:52:08,563: INFO]: yaml: params/laptop_params.yaml loaded successfully
[2024-10-05 20:52:08,564: INFO]: created directory at: data
[2024-10-05 20:52:08,565: INFO]: created directory at: data/laptop/model_train
[2024-10-05 20:52:08,924: INFO]: HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"


[2024-10-05 20:52:08,929: INFO]: Accessing as Vicky7873
[2024-10-05 20:52:09,336: INFO]: HTTP Request: GET https://dagshub.com/api/v1/repos/Vicky7873/95Mobiles "HTTP/1.1 200 OK"


[2024-10-05 20:52:09,338: INFO]: Initialized MLflow to track repo "Vicky7873/95Mobiles"


[2024-10-05 20:52:09,340: INFO]: Repository Vicky7873/95Mobiles initialized!
Best parameters:  {'fit_intercept': True}
best score:  0.7362678834370764
best estimator:  LinearRegression()
Best Model Type: LinearRegression()


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
60 fits failed out of a total of 240.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
60 fits failed with the following error:
Traceback (most recent call last):
  File "/Applications/anaconda3/envs/95m/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Applications/anaconda3/envs/95m/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Applicati

Best parameters:  {'bootstrap': True, 'criterion': 'poisson', 'n_estimators': 17, 'oob_score': True}
best score:  0.8300642802274133
best estimator:  RandomForestRegressor(criterion='poisson', n_estimators=17, oob_score=True)
Best Model Type: RandomForestRegressor(criterion='poisson', n_estimators=17, oob_score=True)
Best parameters:  {'criterion': 'poisson', 'splitter': 'best'}
best score:  0.6006945268308224
best estimator:  DecisionTreeRegressor(criterion='poisson')
Best parameters:  {'alpha': 0.5, 'l1_ratio': 0.5, 'selection': 'random'}
best score:  0.7322970532939471
best estimator:  ElasticNet(alpha=0.5, selection='random')




Best parameters:  {'alpha': 2.0, 'solver': 'sparse_cg'}
best score:  0.73629452774934
best estimator:  Ridge(alpha=2.0, solver='sparse_cg')
Best parameters:  {'alpha': 2.0}
best score:  0.7362682613102145
best estimator:  Lasso(alpha=2.0)


2024/10/05 20:53:20 INFO mlflow.tracking._tracking_service.client: 🏃 View run victorious-fly-32 at: https://dagshub.com/Vicky7873/95Mobiles.mlflow/#/experiments/5/runs/3d6d33cb0d1040a1ac0e90b61c62a06b.
2024/10/05 20:53:20 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Vicky7873/95Mobiles.mlflow/#/experiments/5.


Model: RandomForestRegressor(criterion='poisson', n_estimators=17, oob_score=True) was saved to its path
