In [4]:
import os
os.chdir('../')

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    alpha: float
    l1_ratio: float
    target_column: str

In [8]:
from src.mlflow_demo.constants import *
from src.mlflow_demo.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self, 
                 config_file_path=CONFIG_FILE_PATH,
                 params_file_path=PARAMS_FILE_PATH,
                 schema_file_path=SCHEMA_FILE_PATH) -> None:
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)
        self.schema = read_yaml(schema_file_path)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self):
        create_directories([self.config.model_trainer.root_dir])
        return ModelTrainerConfig(root_dir=self.config.model_trainer.root_dir,
                                  train_data_path=self.config.model_trainer.train_data_path,
                                  test_data_path=self.config.model_trainer.test_data_path,
                                  model_name= self.config.model_trainer.model_name,
                                  alpha=self.params.ElasticNet.alpha,
                                  l1_ratio=self.params.ElasticNet.l1_ratio,
                                  target_column=self.schema.TARGET_COLUMN.name)
    

In [9]:
import pandas as pd
import joblib
from src.mlflow_demo import logger
from sklearn.linear_model import ElasticNet
import os

class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig) -> None:
        self.config = config

    def train(self):
        train = pd.read_csv(self.config.train_data_path)
        test = pd.read_csv(self.config.test_data_path)

        train_x = train.drop([self.config.target_column], axis=1)
        test_x = test.drop([self.config.target_column], axis=1)

        train_y = train[[self.config.target_column]]
        test_y = test[[self.config.target_column]]

        lr = ElasticNet(alpha=self.config.alpha, l1_ratio=self.config.l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        # Save the model
        joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))

[2023-07-31 22:09:41,736: INFO: utils: Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.]
[2023-07-31 22:09:41,742: INFO: utils: NumExpr defaulting to 8 threads.]


In [11]:
try:
    manager = ConfigurationManager()
    config = manager.get_model_trainer_config()

    process = ModelTrainer(config=config)
    process.train()
except Exception as e:
    raise e

[2023-07-31 22:12:24,737: INFO: common: Yaml config/config.yml loaded successfully.]
[2023-07-31 22:12:24,741: INFO: common: Yaml params.yml loaded successfully.]
[2023-07-31 22:12:24,744: INFO: common: Yaml schema.yml loaded successfully.]
[2023-07-31 22:12:24,745: INFO: common: created directory: artifacts]
[2023-07-31 22:12:24,747: INFO: common: created directory: artifacts/model_trainer]
