In [1]:
import os 

In [2]:
%pwd

'c:\\Users\\tchok\\OneDrive\\Bureau\\My_github\\clustering-insured-population\\research'

In [3]:
os.chdir("../"
        )
os.getcwd()

'c:\\Users\\tchok\\OneDrive\\Bureau\\My_github\\clustering-insured-population'

In [4]:
from dataclasses import dataclass
from pathlib import Path

In [5]:
@dataclass(frozen=True) # frozen=True makes the dataclass immutable
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    base_model_path: Path
    training_data_path: Path
    param_n_clusters: int

In [6]:
from insuredSegmenter.constants import * 
from insuredSegmenter.utils.common import read_yaml, create_directories
from sklearn.cluster import KMeans 

In [7]:
class ConfigurationManager:
    def __init__(
        self, 
        config_path: Path = CONFIG_FILE_PATH, 
        params_path: Path = PARAMS_FILE_PATH):
        
        self.config = read_yaml(str(config_path))
        self.params = read_yaml(str(params_path))
        
        create_directories([self.config.artifacts_root]) # create directories if they do not exist

    def get_training_config(self) -> TrainingConfig:
        training = self.config.train_model
        prepare_base_model = self.config.prepare_base_model
        params = self.params.kmeans
        training_data_path = self.config.data_transformation.transformed_data_path
        
        create_directories([training.root_dir])
        training_config = TrainingConfig(
            root_dir= Path(training.root_dir),
            base_model_path= Path(prepare_base_model.base_model_path),
            trained_model_path=Path(training.trained_model_path),
            training_data_path=Path(training_data_path),
            param_n_clusters=params.n_clusters
        )
        return training_config

In [8]:
import joblib
import pandas as pd
import numpy as np
import pickle

In [9]:
class training:
    def __init__(self, config: TrainingConfig):
        self.config = config
    def _load_object(self, file_path: Path):
        """
        This function is used to load a pickled object from the specified path.
        """
        try:
            with open(file_path, 'rb') as file:
                return pickle.load(file)
        except Exception as e:
            raise (e)
    
    def get_base_model(self):
        # Load the base model
        self.model = joblib.load(self.config.base_model_path)
        
    def train_model(self):
        # Load the training data
        training_data = self._load_object(self.config.training_data_path)
        
        # Extract features and labels
        X = training_data
        # Train the KMeans model
        self.model.fit(X)
        self._save_model(path=self.config.trained_model_path)
        
    def _save_model(self,path: Path) :
        # Save the trained model to the specified path
        joblib.dump(self.model,path )
        


In [10]:
try :
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training_instance = training(config=training_config)
    training_instance.get_base_model()
    training_instance.train_model()
except Exception as e:
    raise e

[2025-05-14 17:56:05,363: INFO: common: YAML file loaded successfully: C:\Users\tchok\OneDrive\Bureau\My_github\clustering-insured-population\config\config.yaml]
[2025-05-14 17:56:05,383: INFO: common: YAML file loaded successfully: C:\Users\tchok\OneDrive\Bureau\My_github\clustering-insured-population\params.yaml]
[2025-05-14 17:56:05,389: INFO: common: created directory at: artifacts]
[2025-05-14 17:56:05,392: INFO: common: created directory at: artifacts/train_model]
