In [1]:
import os

In [2]:
os.chdir('../../')

In [3]:
%pwd

'/Users/bhikipallai/Desktop/Projects/95Mobiles'

In [4]:
from pathlib import Path
from dataclasses import dataclass

@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    X_train: Path
    X_test: Path
    y_train: Path
    y_test: Path
    saved_model: Path
    model_for_train: Path

In [5]:
from src.laptop.utils.common import read_yaml,create_directory
from src.laptop.constants import *

[2024-10-06 00:31:37,724: INFO]: Logging file start


In [6]:
class ConfigurationManger:
    def __init__(self,config_filepath = CONFIG_FILE_PATH,param_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(param_filepath)
        create_directory([self.config.data_root])

    def get_moel_train(self)->ModelTrainingConfig:
        config = self.config.Model_train
        create_directory([config.root_dir])

        model_trainconfig = ModelTrainingConfig(
            root_dir=config.root_dir,
            X_train=config.X_train,
            X_test=config.X_test,
            y_train=config.y_train,
            y_test=config.y_test,
            saved_model=config.saved_model,
            model_for_train=config.model_for_train
        )

        return model_trainconfig

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import ElasticNet,Ridge,Lasso
import mlflow
import joblib
import pandas as pd
import dagshub
from sklearn.model_selection import GridSearchCV

In [8]:
models = {
    "lr":LinearRegression(),
    "rfr":RandomForestRegressor(),
    "dtr":DecisionTreeRegressor(),
    "enet":ElasticNet(),
    "ridge":Ridge(),
    "lasso":Lasso()
}

In [9]:
grid_params = {
    "lr":{
        "fit_intercept":[False,True]
    },
    "rfr":{
        "n_estimators":[15,17,19],
        "criterion": ["squared_error", "absolute_error", "friedman_mse", "poisson"],
        "bootstrap" : [True,False],
        "oob_score" : [True,False]

    },
    "dtr":{
        "criterion": ["squared_error", "absolute_error", "friedman_mse", "poisson"],
        "splitter":["best","random"]
    },
    "enet":{
        "alpha":[1.0,0.5,1.5,2.0],
        "l1_ratio" :[0.3,0.4,0.5],
        "selection": ["cyclic","random"]
    },
    "ridge":{
        "alpha":[1.0,0.5,1.5,2.0],
        "solver":['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
    },

    "lasso" : {
        "alpha":[1.0,0.5,1.5,2.0],
    }
}

In [10]:
class Laptop_modeltrain:
    def __init__(self,config:ModelTrainingConfig):
        self.config = config
    
    def model_train(self):
        X_train = pd.read_csv(self.config.X_train)
        print(X_train.head(2))
        X_test = pd.read_csv(self.config.X_test)
        y_train = pd.read_csv(self.config.y_train)
        y_test = pd.read_csv(self.config.y_test)

        dagshub.init(repo_owner='Vicky7873', repo_name='95Mobiles', mlflow=True)
        mlflow.set_registry_uri("https://dagshub.com/Vicky7873/95Mobiles.mlflow")
        mlflow.set_experiment("Laptop model training")

        compare_score = -float("inf")
        with mlflow.start_run():
            for model_name,model in models.items():
                train_gdr = GridSearchCV(model,param_grid=grid_params[model_name],cv=5)
                train_gdr.fit(X_train,y_train)
                print("Best parameters: ", train_gdr.best_params_)
                print("best score: ", train_gdr.best_score_)
                print("best estimator: ", train_gdr.best_estimator_)

                mlflow.log_metric(f"{model_name}_best_score",train_gdr.best_score_)
                mlflow.log_params({f"{model_name}_best_params": train_gdr.best_params_})

                if train_gdr.best_score_>compare_score:
                    compare_score = train_gdr.best_score_
                    self.best_model = train_gdr.best_estimator_
                    print("Best Model Type:", self.best_model)
    

    def model_save(self):
        model = self.best_model
        joblib.dump(model,self.config.saved_model)
        joblib.dump(model, self.config.model_for_train)
        print(f"Model: {model} was saved to its path")


In [11]:
try:
    config=ConfigurationManger()
    model_config = config.get_moel_train()
    lp_model = Laptop_modeltrain(config=model_config)
    lp_model.model_train()
    lp_model.model_save()
except Exception as e:
    raise e


[2024-10-06 00:31:39,130: INFO]: yaml: config/laptop_config.yaml loaded successfully
[2024-10-06 00:31:39,132: INFO]: yaml: params/laptop_params.yaml loaded successfully
[2024-10-06 00:31:39,132: INFO]: created directory at: data
[2024-10-06 00:31:39,133: INFO]: created directory at: data/laptop/model_train
   Company  Product  TypeName    Inches  ScreenResolution  Cpu  Ram  Memory  \
0        7      594         3  0.408772                 8   74    8      26   
1        4      244         0 -1.204407                 7   83    8      16   

   Gpu  OpSys    Weight  
0    1      5 -0.208553  
1   61      5 -0.629467  
[2024-10-06 00:31:39,566: INFO]: HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"


[2024-10-06 00:31:39,571: INFO]: Accessing as Vicky7873
[2024-10-06 00:31:39,970: INFO]: HTTP Request: GET https://dagshub.com/api/v1/repos/Vicky7873/95Mobiles "HTTP/1.1 200 OK"


[2024-10-06 00:31:39,977: INFO]: Initialized MLflow to track repo "Vicky7873/95Mobiles"


[2024-10-06 00:31:39,979: INFO]: Repository Vicky7873/95Mobiles initialized!
Best parameters:  {'fit_intercept': True}
best score:  0.4698399820135103
best estimator:  LinearRegression()
Best Model Type: LinearRegression()


  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  warn(
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


Best parameters:  {'bootstrap': True, 'criterion': 'absolute_error', 'n_estimators': 15, 'oob_score': False}
best score:  0.8325961701944905
best estimator:  RandomForestRegressor(criterion='absolute_error', n_estimators=15)
Best Model Type: RandomForestRegressor(criterion='absolute_error', n_estimators=15)
Best parameters:  {'criterion': 'absolute_error', 'splitter': 'best'}
best score:  0.7108941833721356
best estimator:  DecisionTreeRegressor(criterion='absolute_error')
Best parameters:  {'alpha': 0.5, 'l1_ratio': 0.5, 'selection': 'cyclic'}
best score:  0.46091535910062886
best estimator:  ElasticNet(alpha=0.5)




Best parameters:  {'alpha': 2.0, 'solver': 'auto'}
best score:  0.46986158228549135
best estimator:  Ridge(alpha=2.0)
Best parameters:  {'alpha': 2.0}
best score:  0.4698406474017106
best estimator:  Lasso(alpha=2.0)


2024/10/06 00:32:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run wistful-stoat-279 at: https://dagshub.com/Vicky7873/95Mobiles.mlflow/#/experiments/5/runs/c6ded7c614a244fab6894ca89e3c1622.
2024/10/06 00:32:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Vicky7873/95Mobiles.mlflow/#/experiments/5.


Model: RandomForestRegressor(criterion='absolute_error', n_estimators=15) was saved to its path
