In [1]:
import os

In [2]:
%pwd

'c:\\Users\\frup00090410\\Mlops_project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\frup00090410\\Mlops_project'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    preprocessed_spilitted_data_path: Path
    model_path: Path
    batch_size: int
    epochs: int
    max_words: int
    validation_split: float
    learning_rate: float
    beta_1: float
    beta_2: float

In [6]:
from Classifier.constants import *
from Classifier.utils.common import read_yaml, create_directories, write_to_pickle

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_model_training_config(self) -> ModelTrainingConfig:
        config = self.config.model_training
        
        create_directories([config.root_dir])

        model_training_config = ModelTrainingConfig(
            root_dir=Path(config.root_dir),
            preprocessed_spilitted_data_path=Path(config.preprocessed_spilitted_data_path),
            model_path=Path(config.model_path),
            batch_size=self.params.BATCH_SIZE,
            epochs=self.params.EPOCHS,
            max_words=self.params.MAX_WORDS,
            validation_split=self.params.VALIDATION_SPLIT,
            learning_rate=self.params.LEARNING_RATE,
            beta_1=self.params.BETA_1,
            beta_2=self.params.BETA_2
        )

        return model_training_config

In [43]:
import os
import json
import numpy as np
from cnnClassifier import logger
from tensorflow import keras
from tensorflow.keras.layers import Activation, Dense, Dropout
from tensorflow.keras.models import Sequential
from keras import optimizers
import pickle

In [51]:
class ModelTraining:
    def __init__(self, config: ModelTrainingConfig): 
        self.config = config


    def read_pickle_files(self):
        data = {}
        for filename in os.listdir(self.config.preprocessed_spilitted_data_path):
            if filename.endswith('.pickle'):
                with open(os.path.join(self.config.preprocessed_spilitted_data_path, filename), 'rb') as f:
                    data[filename] = pickle.load(f)
        return data
    
    def train_model(self, data: dict):
        """_summary_

        Args:
            data (dict): _description_
        """

        x_train = data['X_train_preprocessed.pickle']
        y_train = data['y_train_preprocessed.pickle']
        x_test = data['X_test_preprocessed.pickle']
        y_test = data['y_test_preprocessed.pickle']

        num_classes = y_train.shape[1]

        # logger

        model = Sequential()
        model.add(Dense(512, input_shape=(self.config.max_words,)))
        model.add(Activation("relu"))
        model.add(Dropout(0.5))
        model.add(Dense(num_classes))
        model.add(Activation("softmax"))

        
        # create an optimizer instance
        adam = optimizers.Adam(learning_rate=self.config.learning_rate, beta_1=self.config.beta_1,\
                                beta_2=self.config.beta_2, epsilon=1e-08, decay=0.0, amsgrad=False)

        # compile your model with the optimizer
        model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

        # 
        model.fit(x_train, y_train, batch_size=self.config.batch_size,\
                   epochs=self.config.epochs, verbose=1, validation_split=self.config.validation_split)
        
        model.save(os.path.join(self.config.model_path, 'model.h5'))
        
        

In [52]:
config = ConfigurationManager()
model_training_config = config.get_model_training_config()
model_training = ModelTraining(config=model_training_config)
model_training.train_model(model_training.read_pickle_files())

[2023-12-26 10:11:05,515: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-12-26 10:11:05,525: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-26 10:11:05,528: INFO: common: created directory at: artifacts]
[2023-12-26 10:11:05,533: INFO: common: created directory at: artifacts/model_training]
