In [1]:
from cmath import log
import importlib
from pyexpat import model
import numpy as np
import yaml
from housing.exception import HousingException
import os
import sys

from collections import namedtuple
from typing import List
from housing.logger import logging
from sklearn.metrics import r2_score,mean_squared_error
GRID_SEARCH_KEY = 'grid_search'
MODULE_KEY = 'module'
CLASS_KEY = 'class'
PARAM_KEY = 'params'
MODEL_SELECTION_KEY = 'model_selection'
SEARCH_PARAM_GRID_KEY = "search_param_grid"


In [100]:
class ModelFactory:
    def __init__(self, model_config_path: str = None,):
        try:
            self.config: dict = ModelFactory.read_params(model_config_path)

            self.grid_search_cv_module: str = self.config[GRID_SEARCH_KEY][MODULE_KEY]
            self.grid_search_class_name: str = self.config[GRID_SEARCH_KEY][CLASS_KEY]
            self.grid_search_property_data: dict = dict(self.config[GRID_SEARCH_KEY][PARAM_KEY])

            self.models_initialization_config: dict = dict(self.config[MODEL_SELECTION_KEY])

            self.initialized_model_list = None
            self.grid_searched_best_model_list = None

        except Exception as e:
            raise HousingException(e, sys) from e
 
    @staticmethod
    def read_params(config_path: str) -> dict:
        try:
            with open(config_path) as yaml_file:
                config:dict = yaml.safe_load(yaml_file)
            return config
        except Exception as e:
            raise HousingException(e, sys) from e       
        
    @staticmethod
    def class_for_name(module_name:str, class_name:str):
        try:
            # load the module, will raise ImportError if module cannot be loaded
            module = importlib.import_module(module_name)
            # get the class, will raise AttributeError if class cannot be found
            logging.info(f"Executing command: from {module} import {class_name}")
            class_ref = getattr(module, class_name)
            return class_ref
        except Exception as e:
            raise HousingException(e, sys) from e
        
    @staticmethod
    def update_property_of_class(instance_ref:object, property_data: dict):
        try:
            if not isinstance(property_data, dict):
                raise Exception("property_data parameter required to dictionary")
            print(property_data)
            for key, value in property_data.items():
                logging.info(f"Executing:$ {str(instance_ref)}.{key}={value}")
                setattr(instance_ref, key, value)
            return instance_ref
        except Exception as e:
            raise HousingException(e, sys) from e    

    

In [101]:
model_config_file_path="F:\MachineLearningProject\Project_ML\config\model.yaml"

In [102]:
factory = ModelFactory(model_config_path=model_config_file_path)

In [11]:
os.path.exists(model_config_file_path)

True

In [30]:
conf = factory.config
conf

{'grid_search': {'class': 'GridSearchCV',
  'module': 'sklearn.model_selection',
  'params': {'cv': 5, 'verbose': 2}},
 'model_selection': {'module_0': {'class': 'LinearRegression',
   'module': 'sklearn.linear_model',
   'params': {'fit_intercept': True},
   'search_param_grid': {'fit_intercept': [True, False]}},
  'module_1': {'class': 'RandomForestRegressor',
   'module': 'sklearn.ensemble',
   'params': {'min_samples_leaf': 3},
   'search_param_grid': {'min_samples_leaf': [4, 6],
    'n_estimators': [100, 150],
    'max_depth': [3, 5, 7, 9]}}}}

In [19]:
conf[GRID_SEARCH_KEY]

{'class': 'GridSearchCV',
 'module': 'sklearn.model_selection',
 'params': {'cv': 5, 'verbose': 2}}

In [20]:
conf[GRID_SEARCH_KEY][MODULE_KEY]

'sklearn.model_selection'

In [21]:
conf[GRID_SEARCH_KEY][PARAM_KEY]

{'cv': 5, 'verbose': 2}

In [25]:
(conf[MODEL_SELECTION_KEY])

{'module_0': {'class': 'LinearRegression',
  'module': 'sklearn.linear_model',
  'params': {'fit_intercept': True},
  'search_param_grid': {'fit_intercept': [True, False]}},
 'module_1': {'class': 'RandomForestRegressor',
  'module': 'sklearn.ensemble',
  'params': {'min_samples_leaf': 3},
  'search_param_grid': {'min_samples_leaf': [4, 6],
   'n_estimators': [100, 150],
   'max_depth': [3, 5, 7, 9]}}}

In [45]:
class_name=conf[GRID_SEARCH_KEY][CLASS_KEY]
class_name

'GridSearchCV'

In [77]:
property_data = factory.grid_search_property_data
property_data

{'cv': 5, 'verbose': 2}

In [None]:
p

In [78]:
setattr(gsv,'cv',5)

In [83]:
lr = getattr(importlib.import_module('sklearn.linear_model'),'LinearRegression')

In [86]:
model = lr()
model.get_params()

{'copy_X': True,
 'fit_intercept': True,
 'n_jobs': None,
 'normalize': 'deprecated',
 'positive': False}

In [87]:
setattr(model,'fit_intercept',False)

In [88]:
model.get_params()

{'copy_X': True,
 'fit_intercept': False,
 'n_jobs': None,
 'normalize': 'deprecated',
 'positive': False}

In [95]:
for i in factory.models_initialization_config.keys():
     print(factory.models_initialization_config[i][PARAM_KEY])

{'fit_intercept': True}
{'min_samples_leaf': 3}


In [97]:
factory.models_initialization_config['module_0'][SEARCH_PARAM_GRID_KEY]

{'fit_intercept': [True, False]}

In [98]:
InitializedModelDetail = namedtuple("InitializedModelDetail",
                                    ["model_serial_number", "model", "param_grid_search", "model_name"])

In [103]:
initialized_model_list = []
for model_serial_number in factory.models_initialization_config.keys():

    model_initialization_config = factory.models_initialization_config[model_serial_number]
    model_obj_ref = ModelFactory.class_for_name(module_name=model_initialization_config[MODULE_KEY],
                                                class_name=model_initialization_config[CLASS_KEY]
                                                )
    model = model_obj_ref()
    
    if PARAM_KEY in model_initialization_config:
        model_obj_property_data = dict(model_initialization_config[PARAM_KEY])
        model = ModelFactory.update_property_of_class(instance_ref=model,
                                                        property_data=model_obj_property_data)

    param_grid_search = model_initialization_config[SEARCH_PARAM_GRID_KEY]
    model_name = f"{model_initialization_config[MODULE_KEY]}.{model_initialization_config[CLASS_KEY]}"

    model_initialization_config = InitializedModelDetail(model_serial_number=model_serial_number,
                                                            model=model,
                                                            param_grid_search=param_grid_search,
                                                            model_name=model_name
                                                            )

    initialized_model_list.append(model_initialization_config)

factory.initialized_model_list = initialized_model_list
print(factory.initialized_model_list)

{'fit_intercept': True}
{'min_samples_leaf': 3}
[InitializedModelDetail(model_serial_number='module_0', model=LinearRegression(), param_grid_search={'fit_intercept': [True, False]}, model_name='sklearn.linear_model.LinearRegression'), InitializedModelDetail(model_serial_number='module_1', model=RandomForestRegressor(min_samples_leaf=3), param_grid_search={'min_samples_leaf': [4, 6], 'n_estimators': [100, 150], 'max_depth': [3, 5, 7, 9]}, model_name='sklearn.ensemble.RandomForestRegressor')]


In [107]:
type(initialized_model_list[0])

__main__.InitializedModelDetail

In [111]:
initialized_model_list[0].param_grid_search

{'fit_intercept': [True, False]}

In [113]:
model_details = initialized_model_list
model_details


[InitializedModelDetail(model_serial_number='module_0', model=LinearRegression(), param_grid_search={'fit_intercept': [True, False]}, model_name='sklearn.linear_model.LinearRegression'),
 InitializedModelDetail(model_serial_number='module_1', model=RandomForestRegressor(min_samples_leaf=3), param_grid_search={'min_samples_leaf': [4, 6], 'n_estimators': [100, 150], 'max_depth': [3, 5, 7, 9]}, model_name='sklearn.ensemble.RandomForestRegressor')]

In [115]:
for model_data in model_details:
    print(model_data.model_serial_number)
    if model_data.model_serial_number == model_serial_number:
        print(model_data)

module_0
module_1
InitializedModelDetail(model_serial_number='module_1', model=RandomForestRegressor(min_samples_leaf=3), param_grid_search={'min_samples_leaf': [4, 6], 'n_estimators': [100, 150], 'max_depth': [3, 5, 7, 9]}, model_name='sklearn.ensemble.RandomForestRegressor')
