In [1]:
import os 

In [2]:
pwd%

'c:\\Users\\amenm\\OneDrive\\Desktop\\p2m_final\\BTC_PRICE_PREDICTION_MODEL\\research'

In [3]:
os.chdir("../")

In [4]:
pwd%

'c:\\Users\\amenm\\OneDrive\\Desktop\\p2m_final\\BTC_PRICE_PREDICTION_MODEL'

In [5]:
os.environ["MLFLOW_TRACKING_URI"]="https://dagshub.com/amenallahbenothmen/BTC_PRICE_PREDICTION.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"]="amenallahbenothmen"
os.environ["MLFLOW_TRACKING_PASSWORD"]="487600b5c6de91d125db4e8065f79f19d3bcc5a7"

In [18]:
from dataclasses import dataclass
from pathlib import Path 
@dataclass(frozen=True)
class TrainingConfig:
    root_dir:Path 
    trained_model_path:Path
    full_model_path :Path
    training_data :Path
    data_dir :Path
    model_dir:Path
    saved_model_path:Path
    prediction_dir:Path
    result_path:Path
    batch_size : int 
    epochs : int 
    patience : int 
    learning_rate: float 
    forecast_horizon:int
    all_params : dict 
    mlflow_uri:str


In [19]:
from src.LSTM_BTC_Prediction.constants  import *
from src.LSTM_BTC_Prediction.utils.common import read_yaml,create_directories,save_json

In [20]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath=CONFIG_FILE_PATH,
            params_filepath=PARAMS_FILE_PATH):
        self.config=read_yaml(config_filepath) 
        self.params=read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_tarining_config(self) ->TrainingConfig:
        training=self.config.training
        prepare_base_model=self.config.prepare_base_model
        params=self.params
        training_data=self.config.data_preprocessing.data_final_dir

        create_directories([training.root_dir])
        create_directories([training.model_dir])
        create_directories([training.prediction_dir])

        training_config=TrainingConfig(
            root_dir=training.root_dir,
            trained_model_path=training.trained_model_path,
            full_model_path=prepare_base_model.full_model_path,
            training_data=training_data,
            data_dir=self.config.data_preprocessing.data_dir,
            model_dir=training.model_dir,
            saved_model_path=training.saved_model_path,
            prediction_dir=training.prediction_dir,
            result_path=training.result_path,
            batch_size=params.BATCH_SIZE,
            epochs=params.EPOCHS,
            patience=params.PATIENCE,
            learning_rate=params.LEARNING_RATE,
            forecast_horizon=params.FORECAST_HORIZON,
            all_params=params,
            mlflow_uri="https://dagshub.com/amenallahbenothmen/BTC_PRICE_PREDICTION.mlflow"
        )
        return training_config
    



In [21]:
import tensorflow as tf
from src.LSTM_BTC_Prediction import logger  
import numpy as np 
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import mlflow
import mlflow.keras
from urllib.parse import urlparse
from sklearn.metrics import mean_squared_error
import pandas as pd 
import os 

In [24]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config 

    def get_base_model(self):
        try:
            self.model = tf.keras.models.load_model(self.config.full_model_path)
            logger.info("Model loaded successfully")
        except Exception as e:
            logger.error(f'Error loading model: {e}')

    def get_data(self):
        try:
            self.trainX = np.load(os.path.join(self.config.training_data, "trainX.npy"))
            self.trainY = np.load(os.path.join(self.config.training_data, "trainY.npy"))
            self.valX = np.load(os.path.join(self.config.training_data, "valX.npy"))
            self.valY = np.load(os.path.join(self.config.training_data, "valY.npy"))
            self.testX = np.load(os.path.join(self.config.training_data, "testX.npy"))
            self.testY = np.load(os.path.join(self.config.training_data, "testY.npy"))
            self.df_test = pd.read_csv(os.path.join(self.config.data_dir, "test.csv"))
            self.df_train = pd.read_csv(os.path.join(self.config.data_dir, "train.csv"))
            self.df_val = pd.read_csv(os.path.join(self.config.data_dir, "val.csv"))
            logger.info("DATA loaded successfully")
        except Exception as e:
            logger.error(f"Error loading data: {e}") 

    def calculate_rmse(self, y_true, y_pred):
        return np.sqrt(np.mean((y_true - y_pred) ** 2))

    def save_score(self, train_loss, val_loss, test_rmse):
        self.scores = {"train_loss": train_loss, "val_loss": val_loss, "test_rmse": test_rmse}
        save_json(path=Path("scores.json"), data=self.scores)       

    def train(self):
        self.model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=self.config.learning_rate),
            loss='mean_squared_error'
        ) 
        checkpoint_path = self.config.trained_model_path
        checkpoint = ModelCheckpoint(
            filepath=checkpoint_path, 
            monitor='val_loss',
            verbose=1, 
            save_best_only=True,
            mode='min'
        )
        earlystopping = EarlyStopping(
            monitor='val_loss', 
            patience=self.config.patience, 
            restore_best_weights=True
        )
        callbacks = [checkpoint, earlystopping]

        history = self.model.fit(
            self.trainX, 
            self.trainY, 
            batch_size=self.config.batch_size,
            epochs=self.config.epochs,
            verbose=1, 
            shuffle=False, 
            validation_data=(self.valX, self.valY),
            callbacks=callbacks
        )

        max_test = self.df_test['close'].max()
        min_test = self.df_test['close'].min()
        max_val = self.df_val['close'].max()
        min_val = self.df_val['close'].min()
        max_train = self.df_train['close'].max()
        min_train = self.df_train['close'].min()                

        train_loss = history.history['loss'][-1] * (max_train - min_train) + min_train
        val_loss = history.history['val_loss'][-1] * (max_val - min_val) + min_val

        test_predictions = self.model.predict(self.testX)
        test_predictions = test_predictions * (max_test - min_test) + min_test

        prediction=test_predictions.reshape(-1,1).flatten()[-self.config.forecast_horizon:]

        self.save_prediction(prediction=prediction)


        actual_price = self.testY * (max_test - min_test) + min_test

        test_rmse = self.calculate_rmse(actual_price, test_predictions)

        self.save_score(train_loss, val_loss, test_rmse)

    def save_model_to_dir(self):
        model=tf.keras.models.load_model(self.config.trained_model_path)
        model.save(self.config.saved_model_path)
 
    def save_prediction(self,prediction:np.array):
         np.save(self.config.result_path,prediction)


    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics(
                {"train_loss": self.scores["train_loss"], "val_loss": self.scores["val_loss"], "test_rmse": self.scores["test_rmse"]}
            )
            if tracking_url_type_store != "file":
                mlflow.keras.log_model(self.model, "model", registered_model_name="LSTM_BTC_PREDECTION")
            else:
                mlflow.keras.log_model(self.model, "model")


In [25]:
try :
    config=ConfigurationManager()
    training_config=config.get_tarining_config()
    training=Training(config=training_config)
    training.get_base_model()
    training.get_data()
    training.train()
    training.log_into_mlflow()
    training.save_model_to_dir()
except Exception as e :
    raise e    

[2024-05-10 17:27:21,219: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-10 17:27:21,221: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-10 17:27:21,223: INFO: common: created directory at: artifacts]
[2024-05-10 17:27:21,223: INFO: common: created directory at: artifacts/training]
[2024-05-10 17:27:21,224: INFO: common: created directory at: model]
[2024-05-10 17:27:21,225: INFO: common: created directory at: prediction]
[2024-05-10 17:27:21,305: INFO: 2426360760: Model loaded successfully]
[2024-05-10 17:27:21,330: INFO: 2426360760: DATA loaded successfully]
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step - loss: 0.5467
Epoch 1: val_loss improved from inf to 0.21577, saving model to artifacts/training/model.keras
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 139ms/step - loss: 0.5440 - val_loss: 0.2158
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 78ms/step
[2024-05-10 17:27

Registered model 'LSTM_BTC_PREDECTION' already exists. Creating a new version of this model...
2024/05/10 17:28:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LSTM_BTC_PREDECTION, version 6
Created version '6' of model 'LSTM_BTC_PREDECTION'.
