In [1]:
import os

%pwd

'/Users/wilsvenleong/Downloads/learning-materials/mlflow/research'

In [2]:
os.chdir("../")  # Move up to root directory
%pwd

'/Users/wilsvenleong/Downloads/learning-materials/mlflow'

## 1. Update `config.yaml`

```yaml
model_trainer:
  root_dir: artifacts/model_trainer
  train_data_path: artifacts/data_transformation/train.csv
  test_data_path: artifacts/data_transformation/test.csv
  model_name: model.joblib
```

## 3. Update `params.yaml`

```yaml
ALPHA: 0.5
L1_RATIO: 0.7
RANDOM_STATE: 42
```

## 4. Update the Entity

In [3]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    alpha: float
    l1_ratio: float
    random_state: int
    target_column: str

## 5. Update the `ConfigurationManager` in `src/config/configuration.py`

In [4]:
from mlflow_project.constants import *
from mlflow_project.utils.common import read_yaml, create_directories

from mlflow_project import logger


class ConfigurationManager:
    def __init__(
        self,
        config_file_path: Path = CONFIG_FILE_PATH,
        params_file_path: Path = PARAMS_FILE_PATH,
        schema_file_path: Path = SCHEMA_FILE_PATH,
    ):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)
        self.schema = read_yaml(schema_file_path)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        """
        Creates the root directory and returns
        the configuration for model trainer.

        Returns:
            ModelTrainerConfig: Configuration for model trainer.
        """
        model_trainer = self.config.model_trainer
        target = self.schema.target

        create_directories([model_trainer.root_dir])

        file_path = os.path.join(model_trainer.root_dir, ".gitkeep")
        if not os.path.exists(file_path):
            with open(file_path, "w") as f:
                logger.info(
                    f"Creating file: .gitkeep in directory {model_trainer.root_dir}"
                )
                pass

        model_trainer_config = ModelTrainerConfig(
            root_dir=Path(model_trainer.root_dir),
            train_data_path=Path(model_trainer.train_data_path),
            test_data_path=Path(model_trainer.test_data_path),
            model_name=str(model_trainer.model_name),
            alpha=float(self.params.ALPHA),
            l1_ratio=float(self.params.L1_RATIO),
            random_state=int(self.params.RANDOM_STATE),
            target_column=str(target.name),
        )

        return model_trainer_config

## 6. Update the Components

In [5]:
import os
import pandas as pd
from sklearn.linear_model import ElasticNet
import joblib


class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self) -> None:
        """Trains the model."""
        train_data = pd.read_csv(self.config.train_data_path)

        train_x = train_data.drop([self.config.target_column], axis=1)
        train_y = train_data[[self.config.target_column]]

        elastic_net = ElasticNet(
            alpha=self.config.alpha,
            l1_ratio=self.config.l1_ratio,
            random_state=self.config.random_state,
        )
        elastic_net.fit(train_x, train_y)

        joblib.dump(
            elastic_net, os.path.join(self.config.root_dir, self.config.model_name)
        )

## 7. Update the Pipeline

In [6]:
try:
    config_manager = ConfigurationManager()
    model_trainer_config = config_manager.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.train()
except Exception as e:
    logger.exception(e)
    raise e

[ 2023-10-25 00:10:29,710 ] 33 common mlflow_project -  INFO - Loaded YAML file successfully from: config/config.yaml
[ 2023-10-25 00:10:29,714 ] 33 common mlflow_project -  INFO - Loaded YAML file successfully from: params.yaml
[ 2023-10-25 00:10:29,717 ] 33 common mlflow_project -  INFO - Loaded YAML file successfully from: schema.yaml
[ 2023-10-25 00:10:29,719 ] 53 common mlflow_project -  INFO - Created directory at: artifacts
[ 2023-10-25 00:10:29,721 ] 53 common mlflow_project -  INFO - Created directory at: artifacts/model_trainer
[ 2023-10-25 00:10:29,723 ] 36 1994247948 mlflow_project -  INFO - Creating file: .gitkeep in directory artifacts/model_trainer
