In [1]:
import os

In [2]:
%pwd

'd:\\Data Science\\NLP\\SentimentAnalysis\\research'

In [3]:
os.chdir("../")


In [4]:
%pwd

'd:\\Data Science\\NLP\\SentimentAnalysis'

In [27]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig: 
    root_dir: Path
    X_train_data_path: Path
    y_train_data_path: Path
    X_test_data_path: Path
    y_test_data_path: Path
    saved_model_path: str
    top_words: int
    input_length: int
    embedding_dim: int
    rnn_state_size: int
    dropout: float
    epochs: int
    batch_size: int
    #target_column: str   

In [28]:
from SentimentAnalysis.constants import *
from SentimentAnalysis.utils.common import read_yaml
from SentimentAnalysis.utils.common import create_directories

In [29]:
class ConfigurationManager:
    
    def __init__(
            self,
            config_file_path = CONFIG_FILE_PATH,
            params_file_path = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH):
        

            self.config = read_yaml(config_file_path)
            self.params = read_yaml(params_file_path)
            self.schema = read_yaml(schema_filepath)

            create_directories([self.config.dataStore_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
            config = self.config.model_trainer
            params = self.params.model_parameters
            model_trainer = self.params.model_training_parameters

            #schema = self.schema.TARGET_COLUMN

            create_directories([config.root_dir])

            model_trainer_config = ModelTrainerConfig(
                  root_dir = config.root_dir,
                  X_train_data_path = config.X_train_data_path,
                  y_train_data_path=config.y_train_data_path,
                  X_test_data_path = config.X_test_data_path,
                  y_test_data_path=config.y_test_data_path,
                  saved_model_path = config.saved_model_path,
                  top_words = params.top_words,
                  input_length = params.input_length,
                  embedding_dim = params.embedding_dim,
                  rnn_state_size = params.rnn_state_size,
                  dropout = params.dropout,
                  epochs=model_trainer.epochs,
                  batch_size=model_trainer.batch_size,
                  #target_column=schema.name
            )     
            return model_trainer_config 

In [30]:
import os
import pandas as pd
import numpy as np
from SentimentAnalysis.logging import logger
from pathlib import Path
import tensorflow as tf
import joblib

In [31]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        self.top_words = 10000
    def train(self):
        X_train = np.load(self.config.X_train_data_path)
        X_test = np.load(self.config.X_test_data_path)

        y_train = pd.read_csv(self.config.y_train_data_path)
        y_test = pd.read_csv(self.config.y_test_data_path)
        

        # Initialize model
        tf.keras.backend.clear_session()

        model = tf.keras.Sequential()
        # Add the embedding layer (Embedding Layer Input = Batch_Size * Length of each review)
        model.add(tf.keras.layers.Embedding(input_dim=self.config.top_words + 1,
                                            output_dim=self.config.embedding_dim,
                                            input_length=self.config.input_length))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LSTM(units=self.config.rnn_state_size, dropout=self.config.dropout))
        # Use Dense layer for output layer
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(rate=self.config.dropout))
        model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

        # Compile the model
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

        # Training the model
        history = model.fit(X_train, y_train,
                            epochs=self.config.epochs,
                            batch_size=self.config.batch_size,
                            validation_data=(X_test, y_test))
        
        joblib.dump(model, os.path.join(self.config.root_dir, self.config.saved_model_path))


In [33]:


try:
    config_manager = ConfigurationManager()
    model_trainer_config = config_manager.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
    
except Exception as e:
    raise e

[2023-08-12 14:22:51,126: INFO: common: YAML file loaded successfully: config\config.yaml]
[2023-08-12 14:22:51,129: INFO: common: YAML file loaded successfully: params.yaml]
[2023-08-12 14:22:51,131: INFO: common: YAML file loaded successfully: schema.yaml]
[2023-08-12 14:22:51,132: INFO: common: Created directory at: dataStore]
[2023-08-12 14:22:51,134: INFO: common: Created directory at: dataStore/model_trainer]


Epoch 1/10
Epoch 2/10

KeyboardInterrupt: 