In [2]:
import os

In [3]:
%pwd

'c:\\Users\\athar\\Projects\\Natural_Language_Processing\\Text-Summarizer-Project\\research'

In [4]:
os.chdir("C:/Users/athar/Projects/Natural_Language_Processing/Text-Summarizer-Project")

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    STATUS_FILE: str
    ALL_REQUIRED_FILES: list

In [6]:
from TextSummarizer.constants import *
from TextSummarizer.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    """
    A class to manage configuration settings for a project.
    This class handles loading YAML configuration files, creating necessary directories,
    and providing configuration objects for specific components of the project.
    """
    
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,  # Default path for the main configuration file
        params_filepath=PARAMS_FILE_PATH  # Default path for the parameters file
    ):
        """
        Initialize the ConfigurationManager instance.

        Args:
            config_filepath (str): Path to the main configuration file (YAML).
            params_filepath (str): Path to the parameters file (YAML).
        """
        # Load the YAML configuration files into Python dictionaries
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        # Log the loaded configuration for debugging purposes
        print("Loaded Config:", self.config)
        
        # Create the directory specified in the `artifacts_root` key of the configuration
        create_directories([self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        """
        Retrieve and prepare the data validation configuration.

        This method creates the necessary directories for data validation and initializes
        a `DataValidationConfig` object.

        Returns:
            DataValidationConfig: An instance containing data validation configuration details.
        """
        # Access the `data_validation` section of the configuration
        config = self.config.data_validation
        
        # Create the root directory for data validation if it doesn't already exist
        create_directories([config.root_dir])
        
        # Initialize and return a DataValidationConfig object with necessary parameters
        data_validation_config = DataValidationConfig(
            root_dir=config.root_dir,             # Root directory for data validation
            STATUS_FILE=config.STATUS_FILE,      # Status file name
            ALL_REQUIRED_FILES=config.ALL_REQUIRED_FILES  # List of required files
        )
        return data_validation_config

In [9]:
import os
from TextSummarizer.logging import logger

In [13]:
class DataValidation:
    def __init__(self,config: DataValidationConfig):
        self.config=config
        
    def validate_all_files_exists(self)-> bool:
        try:
            validation_status=None
            all_files= os.listdir(os.path.join("artifacts","data_ingestion","samsum_dataset"))
            
            for file in all_files:
                if file not in self.config.ALL_REQUIRED_FILES:
                    validation_status=False
                    with open(self.config.STATUS_FILE,'w') as f:
                        f.write(f"Validation Status: {validation_status}")
                else:
                    validation_status=True
                    with open(self.config.STATUS_FILE,'w') as f:
                        f.write(f"Validation Status: {validation_status}")
            
            return validation_status
        
        except Exception as e:
            raise e

In [14]:
try:
    config=ConfigurationManager()
    data_validation_config=config.get_data_validation_config()
    data_validation=DataValidation(config=data_validation_config)
    data_validation.validate_all_files_exists()
except Exception as e:
    raise e

[2025-01-28 18:14:43,748: INFO: common: yaml file:config\config.yaml  loaded successfully]
[2025-01-28 18:14:43,752: INFO: common: yaml file:params.yaml  loaded successfully]
Loaded Config: {'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_URL': 'https://github.com/entbappy/Branching-tutorial/raw/master/summarizer-data.zip', 'local_data_file': 'artifacts/data_ingestion/data.zip', 'unzip_dir': 'artifacts/data_ingestion'}, 'data_validation': {'root_dir': 'artifacts/data_validation', 'STATUS_FILE': 'artifacts/data_validation/status.txt', 'ALL_REQUIRED_FILES': ['train', 'test', 'validation']}}
[2025-01-28 18:14:43,753: INFO: common: created directory at: artifacts]
[2025-01-28 18:14:43,755: INFO: common: created directory at: artifacts/data_validation]
