In [1]:
# Installing required packages for Jupyter notebook conversion and formatting
! pip install nbconvert
! pip install nbformat

import re # Importing the 're' module for regular expression operations
import os # Importing 'os' module to interact with the operating system for file handling and directory management
import logging # Importing 'logging' module to set up logging and track the execution of the code
from nbconvert.preprocessors import ExecutePreprocessor # Importing 'ExecutePreprocessor' from 'nbconvert' to execute Jupyter notebooks
import nbformat # Importing 'nbformat' for reading and writing Jupyter notebook files


import warnings
# Suppress all warnings
warnings.filterwarnings("ignore")



### Sanitizing for handling error messages

In [2]:
def sanitize_ansi_escape_sequences(text):
    # Remove ANSI escape sequences
    ansi_escape = re.compile(r'(?:\x1B[@-_]|[\x80-\x9F]|\x1B\[|\x9B)[0-?]*[ -/]*[@-~]')
    return ansi_escape.sub('', text)

def configure_logging(notebook_name):
    info_log_path = f'../Logs/info/{notebook_name}_info.log'
    error_log_path = f'../Logs/error/{notebook_name}_error.log'

    # Ensuring directory exists
    os.makedirs(os.path.dirname(info_log_path), exist_ok=True)
    os.makedirs(os.path.dirname(error_log_path), exist_ok=True)

    # Creating separate loggers for info and error
    info_logger = logging.getLogger(f'{notebook_name}_info')
    error_logger = logging.getLogger(f'{notebook_name}_error')

    # Setting levels for the loggers
    info_logger.setLevel(logging.INFO)
    error_logger.setLevel(logging.ERROR)

    # Removing any existing handlers
    info_logger.handlers.clear()
    error_logger.handlers.clear()

    # Creating file handlers
    info_handler = logging.FileHandler(info_log_path, mode='a')
    error_handler = logging.FileHandler(error_log_path, mode='a')

    # Creating a formatter and set it for both handlers
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    info_handler.setFormatter(formatter)
    error_handler.setFormatter(formatter)

    # Adding handlers to the loggers
    info_logger.addHandler(info_handler)
    error_logger.addHandler(error_handler)
    
    return info_logger, error_logger

### Execution of Initial Load

In [3]:


def execute_notebook(notebook_path):
    """
    Executes a Jupyter notebook and capturing errors, ensuring execution stops on the first error encountered.
    """
    # Extracting notebook name from the provided path
    notebook_name = os.path.splitext(os.path.basename(notebook_path))[0]

    # Configuring logging for both information and error tracking
    info_logger, error_logger = configure_logging(notebook_name)

    # Logging the start of notebook execution
    info_logger.info(f"Starting execution of {notebook_path}")

    # Reading the notebook content from file
    with open(notebook_path, 'r', encoding='utf-8') as file:
        nb = nbformat.read(file, as_version=4)
    
    # Setting up ExecutePreprocessor to run the notebook with defined timeout and error handling
    ep = ExecutePreprocessor(timeout=1600, kernel_name='python3', allow_errors=False)

    try:
        # Running the notebook and processing it within the specified directory
        ep.preprocess(nb, {'metadata': {'path': os.path.dirname(notebook_path)}})
        
        # Writing the executed notebook back to the file
        with open(notebook_path, 'wt', encoding='utf-8') as file:
            nbformat.write(nb, file)
        
        # Logging the successful execution of the notebook
        info_logger.info(f"Successfully ran {notebook_path}")
        print(f"Successfully ran {notebook_path}")
        return True

    except Exception as e:
        # Sanitizing and logging any errors encountered during execution
        sanitized_error = sanitize_ansi_escape_sequences(str(e))
        error_logger.error(f"Failed to execute {notebook_path}: {sanitized_error}")
        print(f"Failed to execute {notebook_path}: {sanitized_error}")
        return False

# Defining the list of notebooks and directory to execute
notebooks = [
    'Data Extraction.ipynb',
    'Data Preprocessing, Feature Selection, Predicted Price.ipynb',
    'Data Transformation.ipynb',
    'Data Initial Loading.ipynb',
    'Initial Load Testing.ipynb'
]
notebook_dir = '../Code/'

# Iterating over each notebook and executing them sequentially
for notebook in notebooks:
    full_path = os.path.join(notebook_dir, notebook)
    
    # Stopping execution if any notebook fails
    if not execute_notebook(full_path):
        _, error_logger = configure_logging(os.path.splitext(os.path.basename(full_path))[0])
        error_logger.error(f"Stopping further execution due to error in {notebook}")
        break


Successfully ran ../Code/Data Extraction.ipynb
Successfully ran ../Code/Data Preprocessing, Feature Selection, Predicted Price.ipynb
Successfully ran ../Code/Data Transformation.ipynb
Successfully ran ../Code/Data Initial Loading.ipynb
Successfully ran ../Code/Initial Load Testing.ipynb


### Execution of Incremental load

In [4]:

def execute_notebook(notebook_path):
    """
    Executes a Jupyter notebook and captures errors, ensuring execution stops on the first error encountered.
    """
    # Extracting notebook name from the path
    notebook_name = os.path.splitext(os.path.basename(notebook_path))[0]
    
    # Configuring loggers for capturing information and errors
    info_logger, error_logger = configure_logging(notebook_name)

    # Logging the start of notebook execution
    info_logger.info(f"Starting execution of {notebook_path}")
    
    # Reading the notebook content from the file
    with open(notebook_path, 'r', encoding='utf-8') as file:
        nb = nbformat.read(file, as_version=4)

    # Setting up the ExecutePreprocessor with a defined timeout and no tolerance for errors
    ep = ExecutePreprocessor(timeout=1600, kernel_name='python3', allow_errors=False)

    try:
        # Executing the notebook and passing the required metadata
        ep.preprocess(nb, {'metadata': {'path': os.path.dirname(notebook_path)}})
        
        # Writing the executed notebook back to its file
        with open(notebook_path, 'wt', encoding='utf-8') as file:
            nbformat.write(nb, file)

        # Logging successful completion of notebook execution
        info_logger.info(f"Successfully ran {notebook_path}")
        print(f"Successfully ran {notebook_path}")
        return True

    except Exception as e:
        # Sanitizing and logging errors during execution
        sanitized_error = sanitize_ansi_escape_sequences(str(e))
        error_logger.error(f"Failed to execute {notebook_path}: {sanitized_error}")
        print(f"Failed to execute {notebook_path}: {sanitized_error}")
        return False

# Defining the list of notebooks and their directory to be executed
notebooks = [
    'Data Incremental Loading.ipynb',
    'Incremental Load Testing.ipynb'
]
notebook_dir = '../Code/'

# Iterating through each notebook to execute them
for notebook in notebooks:
    full_path = os.path.join(notebook_dir, notebook)
    
    # Stopping execution if any notebook fails
    if not execute_notebook(full_path):
        _, error_logger = configure_logging(os.path.splitext(os.path.basename(full_path))[0])
        error_logger.error(f"Stopping further execution due to error in {notebook}")
        break


Successfully ran ../Code/Data Incremental Loading.ipynb
Successfully ran ../Code/Incremental Load Testing.ipynb
