In [14]:
import os

In [15]:
%pwd

'd:\\Full Stack Data Science\\Time Series Analysis\\MAJOR PROJECT\\SMDF'

In [3]:
cd ..

d:\Full Stack Data Science\Time Series Analysis\MAJOR PROJECT\SMDF


In [4]:
%pwd

'd:\\Full Stack Data Science\\Time Series Analysis\\MAJOR PROJECT\\SMDF'

In [18]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    scaler: str
    filters: int
    kernel_size: int
    activation: str
    loss: str
    optimizer: str
    epoch: int
    batch_size: int
    M01AB: Path
    M01AE: Path
    N02BA: Path
    N02BE: Path
    N05B: Path
    N05C: Path
    R03: Path
    R06: Path
  
    


In [19]:
from SMDF.constants import *
from SMDF.utils.common import read_yaml, create_directories

In [20]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.hybrid
        schema = self.schema.trainer_column

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            filters = params.filters,
            kernel_size = params.kernel_size,
            scaler = config.scaler,
            activation = params.activation,
            loss = params.loss,
            optimizer = params.optimizer,
            epoch=params.epoch,
            batch_size=params.batch_size,
            M01AB = config.M01AB,
            M01AE = config.M01AE,
            N02BA = config.N02BA,
            N02BE = config.N02BE,
            N05B = config.N05B,
            N05C = config.N05C,
            R03 = config.R03,
            R06 = config.R06,
           
            

            
        )

        return model_trainer_config

In [23]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
from SMDF.logging import logger
import joblib
import numpy as np

# Deep learning
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Flatten, Convolution1D, RepeatVector, TimeDistributed
from sklearn.preprocessing import StandardScaler
#from keras.callbacks import EarlyStopping


In [24]:
class HybridModel:

    def __init__(self, config = ModelTrainerConfig):
        self.config = config

    def noramalizing_data(self,column_name):
        scaler = StandardScaler()
        train = pd.read_csv(self.config.train_data_path)
        test = pd.read_csv(self.config.test_data_path)
        train.drop(columns=["datum","Year","Month","Hour","Weekday Name"],inplace=True)
        test.drop(columns=["datum","Year","Month","Hour","Weekday Name"],inplace=True)

        
        
        #train[columns] = train[columns]
        #logger.info(train.head())
        train_data = scaler.fit_transform(train[column_name].values.reshape(-1,1))
        test_data = scaler.transform(test[column_name].values.reshape(-1,1))

        logger.info(train_data.shape)
        logger.info(test_data.shape)

    
        joblib.dump(scaler, os.path.join(self.config.root_dir, self.config.scaler))
        
        #print(test_data)
        return train_data, test_data
    # Here we will use previous one 60 days as features and next day as output or target
    # Preparing Train dataset

    def train_spliting(self,train):
        window_size = 60
        # Creating a data structure with 60 timesteps and 1 output
        X_train = []
        y_train = []
        for i in range(window_size, train.shape[0]):
            X_train.append(train[i-window_size:i]) 
            y_train.append(train[i, -1]) # consider Close as target
        # Change them to numpy array
        X_train, y_train = np.array(X_train).astype('float32'), np.array(y_train).reshape(-1,1)
        logger.info("Success training data fully spliting")
        return X_train,y_train
    
    # Preparing Test dataset

    def test_spliting(self,train, test):
        window_size = 60
        # Concatenate train data to test data
        dataset_total = np.concatenate((train, test), axis = 0)
        # Split test data and last window-size of train data
        inputs = dataset_total[len(dataset_total) - len(test) - window_size:]
        # Do the same thing for test data
        X_test = []
        y_test = []
        for i in range(window_size, window_size+len(test)):
            X_test.append(inputs[i-window_size:i,:]) 
            y_test.append(inputs[i,-1]) # consider Close as target
        # Change them to numpy array
        X_test, y_test = np.array(X_test).astype('float32'), np.array(y_test).reshape(-1, 1)
        logger.info(X_test.shape)
        return X_test,y_test
    
    
        
   
    def model_trainer(self,X_train,y_train,key):
        columns = [ self.config.M01AB,
            self.config.M01AE,
            self.config.N02BA,
            self.config.N02BE,
            self.config.N05B,
            self.config.N05C,
            self.config.R03,
            self.config.R06]
        model = Sequential()
        model.add(Convolution1D(filters=self.config.filters, kernel_size=self.config.kernel_size, activation=self.config.activation, input_shape=(X_train[1,:].shape)))
        model.add(Convolution1D(filters=self.config.filters, kernel_size=self.config.kernel_size, activation=self.config.activation))
        model.add(Flatten())
        model.add(RepeatVector(y_train.shape[1]))
        model.add(LSTM(128, activation=self.config.activation, return_sequences=True))
        model.add(TimeDistributed(Dense(100, activation=self.config.activation)))
        model.add(TimeDistributed(Dense(1)))
        model.compile(loss=self.config.loss, optimizer=self.config.optimizer)
        logger.info("Model Training start")
        model.fit(X_train, y_train, 
                  epochs = self.config.epoch,
                    batch_size = self.config.batch_size)
        logger.info("Model Trained Sucessfully")
        model.summary()
        tf.saved_model.save(model, columns[key])
        logger.info(" Model {key} save suceessfully!  ")
        
        return model
        

    

    



In [25]:
try:
    config = ConfigurationManager()
    data_config = config.get_model_trainer_config()
    trainer = HybridModel(config=data_config)
    columns = ["M01AB",
            "M01AE",
            "N02BA",
            "N02BE",
            "N05B",
            "N05C",
            "R03",
            "R06"]
    for key,val in enumerate(columns):
        train, test = trainer.noramalizing_data(val)
        X_train,y_train =  trainer.train_spliting(train)
        X_test, y_test = trainer.test_spliting(train,test)
        model = trainer.model_trainer(X_train,y_train,key)

except Exception as e:
    raise e

[2023-07-01 21:22:25,954: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-07-01 21:22:25,959: INFO: common: yaml file: params.yaml loaded successfully]
[2023-07-01 21:22:25,965: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-07-01 21:22:25,968: INFO: common: created directory at: artifacts]
[2023-07-01 21:22:25,971: INFO: common: created directory at: artifacts/model_trainer]
[2023-07-01 21:22:26,005: INFO: 1215740052: (1684, 1)]
[2023-07-01 21:22:26,006: INFO: 1215740052: (422, 1)]


[2023-07-01 21:22:26,112: INFO: 1215740052: Success training data fully spliting]
[2023-07-01 21:22:26,114: INFO: 1215740052: (422, 60, 1)]
[2023-07-01 21:22:27,528: INFO: 1215740052: Model Training start]
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/1

KeyboardInterrupt: 

In [18]:
import tensorflow as tf