In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\Maza\\Desktop\\Pinecone_pipeline'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class EditSummaryConfig:
    """
    Configuration class for editing JSON summaries.

    This class holds the configuration settings required for reading and saving
    JSON summaries.

    Attributes:
        read_json_summary (Path): Path to the JSON summary file that needs to be read.
        load_edited_summary (Path): Path where the edited JSON summary should be saved.
    """

    read_json_summary: Path
    load_edited_summary: Path



In [5]:
from vector_db_pipeline.utils.common import load_json, read_yaml, create_directories,save_json
from pathlib import Path
from vector_db_pipeline.constants import *
from vector_db_pipeline import logger

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        models_filepath = MODELS_FILE_PATH,
        prompt_template = PROMPT_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)
        self.models = read_yaml(models_filepath)
        self.prompt_template = read_yaml(prompt_template)
    
        

    
    def get_edit_summary_config(self) -> EditSummaryConfig:
        """
        Create and return an EditSummaryConfig object based on the current configuration.

        This function retrieves the configuration related to the edited JSON summary, 
        creates an EditSummaryConfig object with the relevant settings, and returns it.

        Returns:
            EditSummaryConfig: The configuration object for editing the JSON summary.
        """
       
        config = self.config.edited_json_summary
        edit_summary_config = EditSummaryConfig(
            read_json_summary=config.read_json_summary,
            load_edited_summary = config.load_edited_summary
        
        ) 

        return edit_summary_config

In [7]:
class EditSummary:
    def __init__(self, config: EditSummaryConfig):
        """
        Initialize the EditSummary class with a configuration object.

        Args:
            config (EditSummaryConfig): Configuration object containing paths for reading and saving JSON summaries.
        """
        self.config = config

    def clean_json_summary(self):
        """
        Read a JSON summary file, filter out entries with empty values, and save the edited summary.

        This function performs the following steps:
        1. Load the JSON summary from the path specified in the configuration.
        2. Filter out entries in each file's summary that have empty values.
        3. Save the edited summary to the path specified in the configuration.

        The resulting edited summary contains only the non-empty entries from the original summary.
        """

        summary = load_json(Path(self.config.read_json_summary))
        
        # Create an edited summary by filtering out entries with empty values
        try:
            
            edited_summary = {file: {key: value for key, value in contents.items() if value}
                            for file, contents in summary.items()}
            logger.info(f"Empty values filtered out")
        except Exception as e:
            logger.error(f"Error filtering out entries with empty values: {e}")
      
        
   
        save_json(Path(self.config.load_edited_summary), edited_summary)



            
    

In [8]:
import time

In [9]:
start = time.time()
config = ConfigurationManager()
edit_summary_config = config.get_edit_summary_config()
edit_summary= EditSummary(edit_summary_config)
json_summary = edit_summary.clean_json_summary()
logger.info(f"Editing summary latency: {(time.time() - start):.4f} seconds")

[2024-05-24 16:06:52,890: INFO: common: yaml file: config\config.yaml loaded successfully:]
[2024-05-24 16:06:52,901: INFO: common: yaml file: schema.yaml loaded successfully:]
[2024-05-24 16:06:52,911: INFO: common: yaml file: params.yaml loaded successfully:]


[2024-05-24 16:06:52,922: INFO: common: yaml file: models.yaml loaded successfully:]
[2024-05-24 16:06:52,937: INFO: common: yaml file: prompt_template.yaml loaded successfully:]
[2024-05-24 16:06:52,949: INFO: common: json file loaded succesfully from: artifacts\json_summary\json_summary.json:]
[2024-05-24 16:06:52,952: INFO: 2176072198: Empty values filtered out:]
[2024-05-24 16:06:52,956: INFO: common: json file saved at: artifacts\json_summary\json_summary_edited.json:]
[2024-05-24 16:06:52,958: INFO: 3292707835: Editing summary latency: 0.0836 seconds:]
