In [39]:
%pwd

'd:\\ML_Deployment\\Automadata'

In [2]:
import os 
os.chdir('../')
%pwd

'd:\\ML_Deployment\\Automadata'

In [60]:
import numpy as np 
import pandas as pd 
from dataclasses import dataclass
from pathlib import Path
import numpy as np 
import json
from src.logging import logger
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier, DMatrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from src.constants import *
from src.utils.common import *
from typing import Dict, Union, List

In [61]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_param_grid: dict 

class ConfigManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        
        model_param_grid = self.params.get('search_grid', {})
    

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_param_grid =model_param_grid,
        )

        return model_trainer_config


In [66]:
class ModelTrainer:
    def __init__(self, config:ModelTrainerConfig):
        self.config = config
        self.model_info = {}
        
    def read_transformed_data(self):
        train_data = np.load(self.config.train_data_path)
        test_data = np.load(self.config.test_data_path)
        X_train = train_data[:,:-1]
        y_train = train_data[:,-1]
        X_test = test_data[:,:-1]
        y_test = test_data[:,-1]
        return X_train, y_train, X_test, y_test
        
    def model_tuning(self):
        X_train, y_train, _, _ = self.read_transformed_data()
        
        xgb_params = {
            'tree_method': 'gpu_hist',  
            'gpu_id': 0,  
        }
        xgb_clf = XGBClassifier(**xgb_params)
        grid_search = GridSearchCV(estimator=xgb_clf, param_grid=self.config.model_param_grid , scoring='f1', cv=5, n_jobs=-1, verbose=1)
        grid_search.fit(X_train, y_train)
        best_estimator = grid_search.best_estimator_
        best_params = grid_search.best_params_
        best_model = XGBClassifier(tree_method='gpu_hist', gpu_id=0, **best_params)
        best_model.fit(X_train, y_train)

        self.model_info['best_params'] = grid_search.best_params_
        self.model_info['f1_score'] = grid_search.best_score_

        with open(os.path.join(self.config.root_dir, 'best_model_info.json'), 'w') as f:
            json.dump(self.model_info, f)

        save_bin(best_model, path=os.path.join(self.config.root_dir, 'model.joblib'))
            
        
        
        

In [67]:
try:
    config = ConfigManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.model_tuning()
except Exception as e:
    raise e

[2024-01-15 11:43:24,438: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-01-15 11:43:24,441: INFO: common: yaml file: params\params.yaml loaded successfully]
[2024-01-15 11:43:24,445: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-01-15 11:43:24,446: INFO: common: created directory at: artifacts]
[2024-01-15 11:43:24,446: INFO: common: created directory at: artifacts/model_trainer]
Fitting 5 folds for each of 486 candidates, totalling 2430 fits
