In [1]:
import os

In [2]:
%pwd

'c:\\Users\\LENOVO\\1. Projects\\Bodycode-Anomaly-Detection-Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\LENOVO\\1. Projects\\Bodycode-Anomaly-Detection-Project'

In [5]:
from dataclasses import dataclass
from pathlib import Path
import os
import pandas as pd
import joblib
import keras_tuner as kt
from keras.models import Model
from keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler

from bodycoteAnomalyDetection.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from bodycoteAnomalyDetection.utils.common import read_yaml, create_directories




  import kerastuner as kt


In [6]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_ckpt: Path
    learning_rate_range: list
    batch_size_range: list
    epochs_range: list
    encoder_units_1_range: list
    encoder_units_2_range: list
    max_trials: int
    executions_per_trial: int
    training_epochs: int
    training_validation_split: float
    training_batch_size: int
    num_trials: int
    early_stopping_patience: int
    restore_best_weights: bool
    final_epochs: int
    final_batch_size: int
    final_validation_split: float

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.TrainingArguments
        create_directories([config.root_dir])

        return ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            model_ckpt=config.model_ckpt,
            learning_rate_range=params.learning_rate_range,
            batch_size_range=params.batch_size_range,
            epochs_range=params.epochs_range,
            encoder_units_1_range=params.encoder_units_1_range,
            encoder_units_2_range=params.encoder_units_2_range,
            max_trials=params.max_trials,
            executions_per_trial=params.executions_per_trial,
            training_epochs=params.training_epochs,
            training_validation_split=params.training_validation_split,
            training_batch_size=params.training_batch_size,
            num_trials=params.num_trials,
            early_stopping_patience=params.early_stopping_patience,
            restore_best_weights=params.restore_best_weights,
            final_epochs=params.final_epochs,
            final_batch_size=params.final_batch_size,
            final_validation_split=params.final_validation_split
        )

In [8]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        self.model = None
        self.input_shape = None
    
    def load_data(self):
        df = pd.read_csv(self.config.data_path)
        df.set_index('TimeStamp', inplace=True)
        return df

    def data_scaler(self, df):
        scaler = MinMaxScaler()
        X_scaled = scaler.fit_transform(df)
        scaler_filename = os.path.join(self.config.root_dir, "scaler_data.joblib")
        joblib.dump(scaler, scaler_filename)
        return X_scaled

    def data_reshaper(self, X_scaled):
        X_reshaped = X_scaled.reshape(X_scaled.shape[0], 1, X_scaled.shape[1])
        self.input_shape = X_reshaped.shape[1:]
        return X_reshaped

    def create_model(self, hp):
        input_shape = self.input_shape
        encoder_units_1 = hp.Choice('encoder_units_1', self.config.encoder_units_1_range)
        encoder_units_2 = hp.Choice('encoder_units_2', self.config.encoder_units_2_range)
        learning_rate = hp.Choice('learning_rate', self.config.learning_rate_range)

        inputs = Input(shape=input_shape)
        x = LSTM(encoder_units_1, activation='relu', return_sequences=True)(inputs)
        x = LSTM(encoder_units_2, activation='relu', return_sequences=False)(x)
        x = RepeatVector(input_shape[0])(x)
        x = LSTM(encoder_units_2, activation='relu', return_sequences=True)(x)
        x = LSTM(encoder_units_1, activation='relu', return_sequences=True)(x)
        output = TimeDistributed(Dense(input_shape[1]))(x)
        model = Model(inputs=inputs, outputs=output)
        model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mae')
        return model

    def train(self):
        df = self.load_data()
        X_scaled = self.data_scaler(df)
        X_train = self.data_reshaper(X_scaled)

        tuner = kt.RandomSearch(
            self.create_model,
            objective='val_loss',
            max_trials=self.config.max_trials,
            executions_per_trial=self.config.executions_per_trial,
            directory=os.path.join(self.config.root_dir, 'my_dir'),
            project_name='hparam_tuning'
        )

        tuner.search_space_summary()
        tuner.search(X_train, X_train, 
                     epochs=self.config.training_epochs,
                     validation_split=self.config.training_validation_split,
                     batch_size=self.config.training_batch_size)

        best_hps = tuner.get_best_hyperparameters(num_trials=self.config.num_trials)[0]

        early_stopping = EarlyStopping(monitor='val_loss', 
                                       patience=self.config.early_stopping_patience, 
                                       restore_best_weights=self.config.restore_best_weights)
        
        model = tuner.hypermodel.build(best_hps)
        history = model.fit(X_train, X_train, 
                            epochs=self.config.final_epochs, 
                            batch_size=self.config.final_batch_size, 
                            validation_split=self.config.final_validation_split, 
                            callbacks=[early_stopping]
                            )

        model.save(os.path.join(self.config.root_dir, "anomaly_detector_model.keras"))


In [9]:
try:
    config_manager = ConfigurationManager()
    model_trainer_config = config_manager.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.train()
except Exception as e:
    print("An error occurred:", e)
    raise

[2024-01-04 23:09:47,453: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-01-04 23:09:47,472: INFO: common: yaml file: params.yaml loaded successfully]
[2024-01-04 23:09:47,472: INFO: common: created directory at: artifacts]
[2024-01-04 23:09:47,480: INFO: common: created directory at: artifacts/model_trainer]
Reloading Tuner from artifacts/model_trainer\my_dir\hparam_tuning\tuner0.json
Search space summary
Default search space size: 3
encoder_units_1 (Choice)
{'default': 32, 'conditions': [], 'values': [32, 64, 128], 'ordered': True}
encoder_units_2 (Choice)
{'default': 16, 'conditions': [], 'values': [16, 32, 64], 'ordered': True}
learning_rate (Choice)
{'default': 0.001, 'conditions': [], 'values': [0.001, 0.01, 0.1], 'ordered': True}

]

]
